[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"portal-settings:en":3,"public-menus:all":38,"post:ollama-is-not-the-product-building-production-ready-open-llm-applications:en":229,"related:post:ollama-is-not-the-product-building-production-ready-open-llm-applications:en:1":867},{"statusCode":4,"data":5,"message":37},200,{"tenantId":6,"lang":7,"defaultLang":8,"siteUrl":9,"contactEmail":10,"brandName":11,"logoUrl":12,"siteName":11,"siteDescription":13,"ogImage":10,"robotsIndex":14,"socialLinks":10,"reservedSlugs":10,"seoPolicy":15},"stajic","en","de","https:\u002F\u002Fstajic.de",null,"Stajic Platform","\u002FLogo_Planet.svg","Stajic Portal",true,{"branding":16,"relatedContent":17,"crossDomainLinks":18},{"logoUrl":12},{"enabled":14},[19,22,25,28,31,34],{"url":20,"label":21,"isActive":14,"showInFooter":14,"includeInSameAs":14},"https:\u002F\u002Ffigure.rocks","figure.rocks",{"url":23,"label":24,"isActive":14,"showInFooter":14,"includeInSameAs":14},"https:\u002F\u002Floving.rocks","loving.rocks",{"url":26,"label":27,"isActive":14,"showInFooter":14,"includeInSameAs":14},"https:\u002F\u002Fbazify.com","bazify.com",{"url":29,"label":30,"isActive":14,"showInFooter":14,"includeInSameAs":14},"https:\u002F\u002Fbazify.de","bazify.de",{"url":32,"label":33,"isActive":14,"showInFooter":14,"includeInSameAs":14},"https:\u002F\u002Fbazify.at","bazify.at",{"url":35,"label":36,"isActive":14,"showInFooter":14,"includeInSameAs":14},"https:\u002F\u002Fbazify.ba","bazify.ba","Portal settings resolved",[39,45],{"id":40,"name":41,"location":42,"isActive":14,"isDefault":43,"items":44},1,"main-navigation","header",false,[],{"id":46,"name":47,"location":48,"isActive":14,"isDefault":14,"items":49},4,"main-menu","sidebar",[50,66,79,93,103,118,133],{"id":51,"title":52,"url":60,"route":-1,"target":61,"cssClass":-1,"icon":62,"isActive":14,"type":63,"productId":10,"categoryId":10,"shopCategoryId":10,"articleId":10,"pageId":64,"portfolioId":10,"children":65},"item-18",{"de":53,"en":54,"es":55,"fr":56,"it":54,"ru":57,"sr":58,"zh":59},"Startseite","Home","Inicio","Accueil","Главная","Почетна","首页","\u002Ffull-stack-web-developer-munich-performance-seo-and-maintainable-builds","_self","i-lucide-home","page",111,[],{"id":67,"title":68,"url":75,"route":-1,"target":61,"cssClass":-1,"icon":76,"isActive":14,"type":63,"productId":10,"categoryId":10,"shopCategoryId":10,"articleId":10,"pageId":77,"portfolioId":10,"children":78},"item-22",{"de":69,"en":69,"es":70,"fr":69,"it":71,"ru":72,"sr":73,"zh":74},"Vision","Visión","Visione","Видение","Визија","想象","\u002Fueber-uns-webdesign-muenchen-webaplikation","i-lucide-eye",113,[],{"id":80,"title":81,"url":89,"route":-1,"target":61,"cssClass":-1,"icon":90,"isActive":14,"type":63,"productId":10,"categoryId":10,"shopCategoryId":10,"articleId":10,"pageId":91,"portfolioId":10,"children":92},"item-19",{"de":82,"en":83,"es":84,"fr":83,"it":85,"ru":86,"sr":87,"zh":88},"Leistungen","Services","Servicios","Servizi","Услуги","Услуге","服务","\u002Fservices-dienstleistungen-muenchen","i-lucide-wrench",116,[],{"id":94,"title":95,"url":99,"route":-1,"target":61,"cssClass":-1,"icon":100,"isActive":14,"type":63,"productId":10,"categoryId":10,"shopCategoryId":10,"articleId":10,"pageId":101,"portfolioId":10,"children":102},"item-23",{"de":96,"en":96,"es":96,"fr":96,"it":96,"ru":97,"sr":97,"zh":98},"Blog","Блог","博客","\u002Fblog","i-lucide-book-open",112,[],{"id":104,"title":105,"url":114,"route":-1,"target":61,"cssClass":-1,"icon":115,"isActive":14,"type":63,"productId":10,"categoryId":10,"shopCategoryId":10,"articleId":10,"pageId":116,"portfolioId":10,"children":117},"item-32",{"de":106,"en":107,"es":108,"fr":109,"it":110,"ru":111,"sr":112,"zh":113},"Neue Technologien","New Technologies","Nuevas tecnologías","Nouvelles technologies","Nuove tecnologie","Новые технологии","Нове технологије","新技术！","\u002Fneue-webtechnologien","i-lucide-sparkles",122,[],{"id":119,"title":120,"url":129,"route":-1,"target":61,"cssClass":-1,"icon":130,"isActive":14,"type":63,"productId":10,"categoryId":10,"shopCategoryId":10,"articleId":10,"pageId":131,"portfolioId":10,"children":132},"item-20",{"de":121,"en":122,"es":123,"fr":124,"it":125,"ru":126,"sr":127,"zh":128},"Kontakt","Contact us!","Contacto","Contact","Contatto","Контакт","Контактирајте нас","联系我们！","\u002Fcontact","i-lucide-mail",115,[],{"id":134,"title":135,"url":144,"route":-1,"target":61,"cssClass":-1,"icon":145,"isActive":14,"type":63,"productId":10,"categoryId":10,"shopCategoryId":10,"articleId":10,"pageId":146,"portfolioId":10,"children":147},"item-21",{"de":136,"en":137,"es":138,"fr":139,"it":140,"ru":141,"sr":142,"zh":143},"Unsere Arbeit","Our Work","Nuestro trabajo","Nos réalisations","I nostri lavori","Наши работы","Наши радови","文件夹","\u002Fportfolio","i-lucide-briefcase",114,[148,161,175,187,193,205,217],{"id":149,"title":150,"url":144,"route":-1,"target":61,"cssClass":-1,"icon":159,"isActive":14,"type":63,"productId":10,"categoryId":10,"shopCategoryId":10,"articleId":10,"pageId":146,"portfolioId":10,"children":160},"item-24",{"de":151,"en":152,"es":153,"fr":154,"it":155,"ru":156,"sr":157,"zh":158},"Alle Projekte","All Projects","Todos los proyectos","Tous les projets","Tutti i progetti","Все проекты","Сви пројекти","所有项目","i-lucide-grid-3x3",[],{"id":162,"title":163,"url":171,"route":-1,"target":61,"cssClass":-1,"icon":172,"isActive":14,"type":173,"productId":10,"categoryId":10,"shopCategoryId":10,"articleId":10,"pageId":10,"portfolioId":10,"children":174},"item-29",{"de":164,"en":165,"es":166,"fr":167,"it":168,"ru":169,"sr":170,"zh":143},"Local Roots, Global Reach","Local Roots - Global Reach","Empresa local ","Entreprise locale","Azienda locale","Местная компания","Локално предузеће глобално тржиште","\u002Fportfolio\u002Flocal-roots-global-reach-communication-media-systems-for-modern-business","i-lucide-folder","custom",[],{"id":176,"title":177,"url":185,"route":-1,"target":61,"cssClass":-1,"icon":172,"isActive":14,"type":-1,"productId":10,"categoryId":10,"shopCategoryId":10,"articleId":10,"pageId":10,"portfolioId":10,"children":186},"item-30",{"de":178,"en":179,"es":180,"fr":181,"it":182,"ru":183,"sr":184,"zh":179},"Webpräsenz mit Ansage – Automobile Bauer Joomla","High-Impact Website – Automobile Bauer (Joomla)","Presencia web impactante – Automobile Bauer (Joomla)","Présence web percutante – Automobile Bauer (Joomla)","Presenza web di forte impatto – Automobile Bauer (Joomla)","Выразительный сайт – Automobile Bauer (Joomla)","Упечатљива веб-презентација – Automobile Bauer (Joomla)","\u002Fportfolio\u002Fautomobile-bauer-joomla",[],{"id":188,"title":189,"url":191,"route":-1,"target":61,"cssClass":-1,"icon":172,"isActive":14,"type":173,"productId":10,"categoryId":10,"shopCategoryId":10,"articleId":10,"pageId":10,"portfolioId":10,"children":192},"item-28",{"de":190,"en":190,"es":190,"fr":190,"it":190,"ru":190,"sr":190,"zh":190},"Solr Suggester","\u002Fportfolio\u002Fsolr-fuzzy-suggester-und-solr-infix-suggester-abfrage-ueber-ajax-und-filterung",[],{"id":194,"title":195,"url":203,"route":-1,"target":61,"cssClass":-1,"icon":172,"isActive":14,"type":173,"productId":10,"categoryId":10,"shopCategoryId":10,"articleId":10,"pageId":10,"portfolioId":10,"children":204},"item-27",{"de":196,"en":197,"es":198,"fr":199,"it":200,"ru":201,"sr":202,"zh":197},"Firmenwebseite SEO","Company Website SEO","Sitio web corporativo SEO","Site web d’entreprise SEO","Sito web aziendale SEO","Корпоративный сайт SEO","Пословна веб-страница SEO","\u002Fportfolio\u002Fseo-sem-branding-mobile-webseite-muenchen",[],{"id":206,"title":207,"url":215,"route":-1,"target":61,"cssClass":-1,"icon":172,"isActive":14,"type":173,"productId":10,"categoryId":10,"shopCategoryId":10,"articleId":10,"pageId":10,"portfolioId":10,"children":216},"item-31",{"de":208,"en":209,"es":210,"fr":211,"it":212,"ru":213,"sr":214,"zh":209},"Digitalisierungsportal","Digitalization Portal","Portal de digitalización","Portail de numérisation","Portale di digitalizzazione","Портал цифровизации","Портал за дигитализацију","\u002Fportfolio\u002Fdigitalisierungsportal-archiv-museum-bibliothek-ead-lido-mets-mods",[],{"id":218,"title":219,"url":227,"route":-1,"target":61,"cssClass":-1,"icon":172,"isActive":14,"type":-1,"productId":10,"categoryId":10,"shopCategoryId":10,"articleId":10,"pageId":10,"portfolioId":10,"children":228},"item-33",{"de":220,"en":221,"es":222,"fr":223,"it":224,"ru":225,"sr":226,"zh":221},"Integration von GPT-4 mit LibreOffice auf Ubuntu","GPT-4 Integration with LibreOffice on Ubuntu","Integración de GPT-4 con LibreOffice en Ubuntu","Intégration de GPT-4 avec LibreOffice sur Ubuntu","Integrazione di GPT-4 con LibreOffice su Ubuntu","Интеграция GPT-4 с LibreOffice на Ubuntu","Интеграција GPT-4 са LibreOffice-ом на Ubuntu-у","\u002Fportfolio\u002Fgpt-4-libreoffice-integration",[],{"statusCode":4,"data":230,"message":866},{"id":231,"title":232,"slug":233,"content":234,"contentJson":235,"excerpt":633,"featuredImage":634,"featuredImageAlt":635,"featuredImageCaption":10,"featuredImageTitle":10,"featuredImageCopyright":10,"featuredImageAuthor":10,"featuredImageSourceUrl":10,"featuredImageLicense":10,"featuredImageIsAiGenerated":14,"status":636,"publishedAt":637,"createdAt":638,"updatedAt":639,"categories":640,"author":661,"translations":666},"459","Ollama Is Not the Product: Building Production-Ready Open-LLM Applications","ollama-is-not-the-product-building-production-ready-open-llm-applications","{\"time\":1782679301267,\"blocks\":[{\"id\":\"HsB_-_Sdzh\",\"type\":\"header\",\"data\":{\"text\":\"Ollama Is Not the Product: Building Production-Ready Open-LLM Applications\",\"level\":1},\"tunes\":{}},{\"id\":\"G_FTnJiMVo\",\"type\":\"paragraph\",\"data\":{\"text\":\"Tools like \u003Cb>Ollama\u003C\u002Fb> have made local LLM experimentation almost frictionless. Install the runtime, pull a model, send a prompt, and a local assistant answers in seconds. That is useful. It is also the easiest part of the journey.\"},\"tunes\":{}},{\"id\":\"fVa1sJxmy6\",\"type\":\"paragraph\",\"data\":{\"text\":\"A terminal demo is not an application. A local model is not a product. A productive Open-LLM application needs controlled data ingestion, retrieval, permissions, evaluation, logging, provider abstraction, deployment discipline, and a user workflow that solves a real business problem.\"},\"tunes\":{}},{\"id\":\"f6EzZPNhRc\",\"type\":\"paragraph\",\"data\":{\"text\":\"The core thesis is simple: \u003Cb>Ollama is a runtime, not the product\u003C\u002Fb>. The product lives in the application layer around the model. That layer decides which documents are visible, which chunks are retrieved, which prompts are used, how answers are evaluated, how failures are logged, and whether the system can be trusted in daily work.\"},\"tunes\":{}},{\"id\":\"wjgsXAJQ-i\",\"type\":\"paragraph\",\"data\":{\"text\":\"This is where local AI becomes interesting for teams building private assistants, internal knowledge tools, LLM-enabled SaaS features, or enterprise workflow copilots. The model can run locally, but privacy, reliability, and business value only exist if the full system is engineered properly.\"},\"tunes\":{}},{\"id\":\"NnYUiRBtwJ\",\"type\":\"header\",\"data\":{\"text\":\"Ollama Is a Runtime, Not a Product\",\"level\":2},\"tunes\":{}},{\"id\":\"nVrpsWCll3\",\"type\":\"paragraph\",\"data\":{\"text\":\"Ollama is excellent for running and serving open models locally. It is especially strong for exploration, developer workflows, PoCs, and teams that want to test model behavior without immediately committing to a cloud provider. It exposes local model interaction through APIs and compatibility layers that make it easier to connect existing OpenAI-style tooling.\"},\"tunes\":{}},{\"id\":\"_aQOCMkaG8\",\"type\":\"paragraph\",\"data\":{\"text\":\"That does not mean Ollama solves the application problem. It does not automatically know which company documents a user may read. It does not create tenant isolation, document versioning, audit trails, source attribution, evaluation datasets, or business-specific UI workflows. Those responsibilities stay with the application architecture.\"},\"tunes\":{}},{\"id\":\"jeeZbnQvh7\",\"type\":\"list\",\"data\":{\"style\":\"unordered\",\"meta\":{},\"items\":[\"\u003Cb>Ollama helps with:\u003C\u002Fb> local model execution, fast experimentation, API-based model access, offline-first PoCs, and developer productivity.\",\"\u003Cb>Ollama does not solve by itself:\u003C\u002Fb> access control, RAG quality, document lifecycle, tenant boundaries, evaluation, audit logging, monitoring, fallback behavior, or enterprise workflow design.\",\"\u003Cb>The architectural mistake:\u003C\u002Fb> treating local inference as if it were already a secure AI product.\"]},\"tunes\":{}},{\"id\":\"U8K0Qat9xz\",\"type\":\"quote\",\"data\":{\"text\":\"The model is the easiest part. The application layer is where the product value lives.\",\"caption\":\"Architecture principle\",\"alignment\":\"left\"},\"tunes\":{}},{\"id\":\"NYU66FQDT8\",\"type\":\"header\",\"data\":{\"text\":\"Demo, PoC, and Production Are Different Stages\",\"level\":2},\"tunes\":{}},{\"id\":\"-EGq4eScAy\",\"type\":\"paragraph\",\"data\":{\"text\":\"Many Open-LLM initiatives fail because teams confuse a working demo with a production capability. The difference is not cosmetic. Each maturity level has a different goal, different risk profile, and different engineering requirement.\"},\"tunes\":{}},{\"id\":\"V_OydBfVSx\",\"type\":\"table\",\"data\":{\"withHeadings\":true,\"stretched\":false,\"content\":[[\"Stage\",\"What it proves\",\"Typical setup\",\"What is still missing\"],[\"Demo\",\"A model can answer a prompt locally.\",\"Ollama on a laptop, one model, one prompt, no real data control.\",\"Permissions, retrieval, logs, evaluation, deployment, monitoring, user workflow.\"],[\"PoC\",\"A small controlled system can answer questions over selected documents or workflows.\",\"Basic web UI, ingestion script, vector search, limited users, limited document scope.\",\"Scale, governance, test datasets, fallback strategy, auditability, support model.\"],[\"Production\",\"Multiple users can use the system safely and repeatedly inside real work.\",\"Authenticated app, tenant isolation, RAG pipeline, observability, provider abstraction, backups, release process.\",\"Continuous improvement, evaluation expansion, operational maturity.\"]]},\"tunes\":{}},{\"id\":\"ikp3g9j4So\",\"type\":\"paragraph\",\"data\":{\"text\":\"A PoC can be intentionally small. Production cannot be intentionally blind. Once real users, private data, business decisions, and compliance expectations enter the system, the architecture must become explicit.\"},\"tunes\":{}},{\"id\":\"f1xN0TAU53\",\"type\":\"header\",\"data\":{\"text\":\"Where RAG Fits In\",\"level\":2},\"tunes\":{}},{\"id\":\"wVTJ2-nEg8\",\"type\":\"paragraph\",\"data\":{\"text\":\"\u003Cb>Retrieval-Augmented Generation\u003C\u002Fb> is the most common bridge between a language model and private business knowledge. The model does not magically know internal documents, contracts, tickets, product specs, or runbooks. The application must retrieve relevant context before asking the model to answer.\"},\"tunes\":{}},{\"id\":\"fvxr_M2czf\",\"type\":\"paragraph\",\"data\":{\"text\":\"A practical RAG flow looks like this:\"},\"tunes\":{}},{\"id\":\"tVZHbBjBJe\",\"type\":\"list\",\"data\":{\"style\":\"ordered\",\"meta\":{\"counterType\":\"numeric\"},\"items\":[\"Documents are uploaded or synchronized from controlled sources.\",\"Text is extracted, cleaned, and split into chunks.\",\"Chunks receive metadata such as tenant, owner, document version, source URL, access scope, and timestamp.\",\"Embeddings are generated for each chunk.\",\"Vectors are stored in pgvector, Qdrant, or another retrieval layer.\",\"At query time, the application checks permissions before retrieval.\",\"Relevant chunks are retrieved with similarity search and metadata filters.\",\"The prompt builder injects selected context into the model request.\",\"The answer is generated with citations, confidence boundaries, and no-answer fallback when retrieval is weak.\"]},\"tunes\":{}},{\"id\":\"5VP9OTHrad\",\"type\":\"paragraph\",\"data\":{\"text\":\"RAG reduces hallucinations, but it does not eliminate them. A poor chunking strategy, weak metadata, missing permission filters, low-quality embeddings, or overly broad retrieval can still produce convincing but wrong answers. Serious RAG systems need thresholds, citations, retrieval logs, and evaluation.\"},\"tunes\":{}},{\"id\":\"6R3G0KzeXf\",\"type\":\"header\",\"data\":{\"text\":\"A Practical Local Open-LLM Architecture\",\"level\":2},\"tunes\":{}},{\"id\":\"hsYURnHqGd\",\"type\":\"paragraph\",\"data\":{\"text\":\"A realistic production path does not require exotic infrastructure. For many teams, a strong first architecture can use a normal web stack: Nuxt for the frontend, a Nitro or Node API, PostgreSQL as the system of record, pgvector for retrieval, Ollama as a local runtime, Prisma for data access, and background workers for ingestion and embeddings.\"},\"tunes\":{}},{\"id\":\"ie58nqmk5o\",\"type\":\"code\",\"data\":{\"code\":\"User question | v\\nFrontend UI | v\\nAPI \u002F Backend | +--> Authentication +--> Tenant and role permission check | v\\nRetrieval layer | +--> PostgreSQL metadata +--> pgvector or Qdrant vector search +--> similarity threshold | v\\nPrompt builder | +--> system prompt +--> retrieved chunks +--> source references +--> no-answer rules | v\\nLLM provider abstraction | +--> Ollama \u002F local model +--> cloud model fallback +--> future self-hosted runtime | v\\nAnswer with sources | v\\nLogs, traces, metrics, evaluation dataset\"},\"tunes\":{}},{\"id\":\"HCdDDntKaG\",\"type\":\"paragraph\",\"data\":{\"text\":\"This architecture keeps the model replaceable. Ollama can be the first runtime, but the system should not be locked to one inference engine. A clean architecture separates user workflow, retrieval, prompt construction, model provider, and observability.\"},\"tunes\":{}},{\"id\":\"jx3zLyUEmA\",\"type\":\"list\",\"data\":{\"style\":\"unordered\",\"meta\":{},\"items\":[\"\u003Cb>Frontend:\u003C\u002Fb> Nuxt or another web UI for authenticated user workflows.\",\"\u003Cb>Backend\u002FAPI:\u003C\u002Fb> Nitro, Node.js, or FastAPI for orchestration, permissions, and provider routing.\",\"\u003Cb>Database:\u003C\u002Fb> PostgreSQL for documents, users, tenants, roles, prompts, logs, and metadata.\",\"\u003Cb>Vector search:\u003C\u002Fb> pgvector for simple integrated retrieval or Qdrant when vector search becomes a dedicated service.\",\"\u003Cb>Model runtime:\u003C\u002Fb> Ollama for local execution, llama.cpp server for lightweight serving, or vLLM for higher-throughput GPU serving.\",\"\u003Cb>Storage:\u003C\u002Fb> local filesystem or S3-compatible object storage for uploaded files and extracted artifacts.\",\"\u003Cb>Workers:\u003C\u002Fb> background ingestion, chunking, embedding, re-indexing, and document version processing.\",\"\u003Cb>Observability:\u003C\u002Fb> logs, metrics, prompt traces, retrieval traces, latency tracking, and evaluation results.\"]},\"tunes\":{}},{\"id\":\"CG1PJLbPuV\",\"type\":\"header\",\"data\":{\"text\":\"The Production Readiness Checklist\",\"level\":2},\"tunes\":{}},{\"id\":\"aOHxNXO4Mk\",\"type\":\"paragraph\",\"data\":{\"text\":\"The following checklist is the difference between a local model demo and a usable Open-LLM application:\"},\"tunes\":{}},{\"id\":\"TBkg7dLMT1\",\"type\":\"list\",\"data\":{\"style\":\"unordered\",\"meta\":{},\"items\":[\"Every document and chunk has a tenant_id.\",\"Retrieval is blocked until user and role permissions are checked.\",\"Documents include metadata, source, owner, version, lifecycle state, and retention policy.\",\"Chunking strategy is documented and tested against real documents.\",\"Embedding model selection is explicit and versioned.\",\"Vector index configuration is reproducible.\",\"Similarity threshold prevents weak context from being used blindly.\",\"Answers include source citations or source references where possible.\",\"No-answer fallback is used when retrieval confidence is too low.\",\"Prompt templates are versioned and release-controlled.\",\"LLM provider abstraction is built from the start.\",\"Structured output is validated before downstream use.\",\"Evaluation dataset contains real questions, expected sources, and unacceptable answers.\",\"Retrieval logs show which chunks were used for each answer.\",\"Latency, token usage, GPU\u002FCPU load, and error rates are monitored.\",\"Privacy and retention rules cover uploads, extracted text, chunks, embeddings, prompts, responses, and logs.\",\"Deployment, backup, restore, rollback, and incident paths are documented.\"]},\"tunes\":{}},{\"id\":\"tdTcmD4DEO\",\"type\":\"paragraph\",\"data\":{\"text\":\"This connects directly with \u003Ca href=\\\"\u002Fenterprise\\\">Enterprise Delivery OS\u003C\u002Fa>: useful AI is not only a model decision. It is delivery discipline, evidence, controls, metrics, and operational ownership.\"},\"tunes\":{}},{\"id\":\"zOuV7s1ByX\",\"type\":\"header\",\"data\":{\"text\":\"Provider Abstraction: Do Not Marry One Model\",\"level\":2},\"tunes\":{}},{\"id\":\"gPuC35eEAc\",\"type\":\"paragraph\",\"data\":{\"text\":\"Local models are valuable for privacy-sensitive use cases, offline scenarios, cost control, and internal experimentation. Cloud models may still be stronger for difficult reasoning, coding, multilingual accuracy, or multimodal work. A production application should not treat any one model as permanent infrastructure.\"},\"tunes\":{}},{\"id\":\"ssy1KKIyUv\",\"type\":\"paragraph\",\"data\":{\"text\":\"A provider abstraction allows the same application workflow to call Ollama, OpenAI, Gemini, Anthropic, Mistral, vLLM, or another self-hosted endpoint without rewriting the product. The application decides the provider based on use case, data sensitivity, latency, cost, and quality requirements.\"},\"tunes\":{}},{\"id\":\"droVs9NwWG\",\"type\":\"code\",\"data\":{\"code\":\"type LlmProvider = \\\"ollama\\\" | \\\"openai\\\" | \\\"gemini\\\" | \\\"anthropic\\\" | \\\"vllm\\\"; type ChatInput = { provider: LlmProvider; model: string; tenantId: string; userId: string; question: string; context: Array\u003C{ chunkId: string; sourceTitle: string; text: string; }>;\\n}; async function chat(input: ChatInput) { await assertUserCanAccessContext(input.userId, input.context); const messages = buildRagMessages({ question: input.question, context: input.context, rules: [ \\\"Answer only from provided context when possible.\\\", \\\"Cite source titles.\\\", \\\"Say when the context is insufficient.\\\" ] }); return llm.chat({ provider: input.provider, model: input.model, messages, trace: { tenantId: input.tenantId, userId: input.userId, promptVersion: \\\"rag-v3.2\\\" } });\\n}\"},\"tunes\":{}},{\"id\":\"TqpI4xqkq1\",\"type\":\"paragraph\",\"data\":{\"text\":\"The key idea is not the TypeScript syntax. The key idea is the boundary. The application owns permissions, retrieval, prompt rules, tracing, and evaluation. The provider only produces the model output.\"},\"tunes\":{}},{\"id\":\"RuZt2H8Qtd\",\"type\":\"header\",\"data\":{\"text\":\"pgvector vs Qdrant\",\"level\":2},\"tunes\":{}},{\"id\":\"-nl0_dtw9s\",\"type\":\"paragraph\",\"data\":{\"text\":\"For many Open-LLM applications, the vector store decision is simple at the beginning: if PostgreSQL is already your system of record, \u003Cb>pgvector\u003C\u002Fb> is a strong starting point. It keeps metadata, permissions, document records, and vectors close together. That reduces operational complexity.\"},\"tunes\":{}},{\"id\":\"mJJZ9Vhiy4\",\"type\":\"paragraph\",\"data\":{\"text\":\"\u003Cb>Qdrant\u003C\u002Fb> becomes more attractive when vector search needs to become a dedicated service: larger retrieval scale, advanced filtering, specialized indexing, hybrid search patterns, or independent scaling of retrieval infrastructure.\"},\"tunes\":{}},{\"id\":\"ZNBNQmWTqi\",\"type\":\"table\",\"data\":{\"withHeadings\":true,\"stretched\":false,\"content\":[[\"Option\",\"Best fit\",\"Strengths\",\"Trade-offs\"],[\"pgvector\",\"PoCs, early production, systems already built on PostgreSQL.\",\"Single database, SQL joins, ACID behavior, simpler operations, metadata close to vectors.\",\"Less specialized than a dedicated vector database at large retrieval scale.\"],[\"Qdrant\",\"Dedicated semantic search services, larger vector workloads, advanced filtering and retrieval patterns.\",\"Purpose-built vector search, strong filtering model, independent scaling, retrieval-focused APIs.\",\"Adds another infrastructure component and operational surface.\"],[\"Start simple\",\"Most enterprise teams beginning with private RAG applications.\",\"Lower architecture risk, faster delivery, easier audit path.\",\"May need migration later if retrieval becomes central and high-scale.\"]]},\"tunes\":{}},{\"id\":\"EaNwvz0Ala\",\"type\":\"paragraph\",\"data\":{\"text\":\"A practical rule: start with pgvector when PostgreSQL already owns your business data and the retrieval scale is moderate. Move to Qdrant when vector search becomes a product capability of its own.\"},\"tunes\":{}},{\"id\":\"Xm3eJKCflO\",\"type\":\"header\",\"data\":{\"text\":\"Security and Privacy Reality Check\",\"level\":2},\"tunes\":{}},{\"id\":\"rdIR5uvVuv\",\"type\":\"paragraph\",\"data\":{\"text\":\"Local inference does not automatically mean secure AI. It only means the model execution can happen locally. The full data path still matters: uploaded files, extracted text, chunks, embeddings, logs, prompts, responses, backups, developer access, admin tools, and analytics exports.\"},\"tunes\":{}},{\"id\":\"IhqG3kxPSc\",\"type\":\"paragraph\",\"data\":{\"text\":\"Embeddings also deserve care. They are not the original document, but they still represent information derived from sensitive content. Treat them as part of the protected data path, especially when they are tied to metadata, source records, user queries, or tenant identifiers.\"},\"tunes\":{}},{\"id\":\"QvcBrg8G6B\",\"type\":\"list\",\"data\":{\"style\":\"unordered\",\"meta\":{},\"items\":[\"Do not index documents before access rules are known.\",\"Do not retrieve chunks without tenant and role filters.\",\"Do not log full prompts and responses without retention rules.\",\"Do not send sensitive context to a cloud model unless the policy allows it.\",\"Do not assume that local equals GDPR-compliant; compliance depends on the complete processing design.\",\"Do not let evaluation and debugging copies become uncontrolled shadow datasets.\"]},\"tunes\":{}},{\"id\":\"_YLdd4iHrS\",\"type\":\"paragraph\",\"data\":{\"text\":\"This is where \u003Ca href=\\\"\u002Fblog\u002Fenterprise-grade-multi-tenant-architecture-for-an-international-platform\\\">multi-instance SaaS architecture\u003C\u002Fa> matters. If tenants, roles, data ownership, and operational boundaries are weak, adding a local LLM can amplify risk instead of reducing it.\"},\"tunes\":{}},{\"id\":\"I6V0KMIQjb\",\"type\":\"header\",\"data\":{\"text\":\"Evaluation Is Not Optional\",\"level\":2},\"tunes\":{}},{\"id\":\"yaKCxuptHO\",\"type\":\"paragraph\",\"data\":{\"text\":\"A production Open-LLM application needs an evaluation loop. Without it, teams only have anecdotes. A few good demo answers do not prove that retrieval works, that the prompt is stable, or that the model behaves safely across real usage.\"},\"tunes\":{}},{\"id\":\"Kq_qe2AL-v\",\"type\":\"paragraph\",\"data\":{\"text\":\"A minimal evaluation dataset should include real user questions, expected source documents, unacceptable answers, no-answer cases, and regression tests for previous failures. Every change to chunking, embeddings, prompts, model provider, or retrieval threshold should be tested against that dataset.\"},\"tunes\":{}},{\"id\":\"oBlROQd5Li\",\"type\":\"list\",\"data\":{\"style\":\"unordered\",\"meta\":{},\"items\":[\"\u003Cb>Retrieval evaluation:\u003C\u002Fb> Did the system fetch the right source chunks?\",\"\u003Cb>Answer evaluation:\u003C\u002Fb> Did the answer stay grounded in the retrieved context?\",\"\u003Cb>Safety evaluation:\u003C\u002Fb> Did the system avoid forbidden disclosures or unauthorized data?\",\"\u003Cb>Operational evaluation:\u003C\u002Fb> Did latency, failure rate, and cost stay inside acceptable limits?\",\"\u003Cb>Regression evaluation:\u003C\u002Fb> Did a model or prompt upgrade break previously correct behavior?\"]},\"tunes\":{}},{\"id\":\"9gDXPLZ6Rp\",\"type\":\"paragraph\",\"data\":{\"text\":\"RAG reduces hallucinations, but it does not remove the need for evaluation. Evaluation is the control loop that turns a clever prototype into an improving product.\"},\"tunes\":{}},{\"id\":\"Hi1K8JLPUo\",\"type\":\"header\",\"data\":{\"text\":\"Deployment Discipline: The Model Is a Release Event\",\"level\":2},\"tunes\":{}},{\"id\":\"sEpigdnlkD\",\"type\":\"paragraph\",\"data\":{\"text\":\"Changing a model is not a harmless configuration tweak. It can change answer style, reasoning behavior, language quality, latency, token usage, citation discipline, and failure modes. In production, model upgrades should be handled like release events.\"},\"tunes\":{}},{\"id\":\"sLXBCBh3E0\",\"type\":\"paragraph\",\"data\":{\"text\":\"That means versioning prompts, embedding models, retrieval settings, model identifiers, provider routes, and evaluation results. It also means having rollback logic. If a new model causes worse retrieval-grounded answers or breaks structured output, the system needs a controlled path back to the known-good configuration.\"},\"tunes\":{}},{\"id\":\"pcbe8eGMbe\",\"type\":\"paragraph\",\"data\":{\"text\":\"This fits the same operating logic as \u003Ca href=\\\"\u002Fenterprise\u002Freference-models\u002Fdelivery-and-change\\\">Delivery &amp; Change\u003C\u002Fa> and \u003Ca href=\\\"\u002Fenterprise\u002Freference-models\u002Fdigital-platform\\\">AI-ready platform architecture\u003C\u002Fa>: stable delivery requires repeatable controls, not heroic manual fixes.\"},\"tunes\":{}},{\"id\":\"mOhI-jF1zx\",\"type\":\"header\",\"data\":{\"text\":\"Conclusion: Production Begins Where the Demo Ends\",\"level\":2},\"tunes\":{}},{\"id\":\"3mXK980w23\",\"type\":\"paragraph\",\"data\":{\"text\":\"Ollama can start the journey. It makes local model execution accessible and lowers the barrier to experimentation. That is valuable. But the production application begins where the demo ends.\"},\"tunes\":{}},{\"id\":\"9O27eQ_Svm\",\"type\":\"paragraph\",\"data\":{\"text\":\"The model is only one replaceable component. The real product is the controlled system around it: ingestion, permissions, retrieval, prompts, provider routing, evaluation, logs, monitoring, deployment, backup, and user workflow. That is where privacy becomes real, quality becomes measurable, and business value becomes repeatable.\"},\"tunes\":{}},{\"id\":\"4Lyy3E4ZsF\",\"type\":\"paragraph\",\"data\":{\"text\":\"A serious Open-LLM application is built through engineering discipline, not by running a local model once. Ollama can start the journey, but the production application begins where the demo ends.\"},\"tunes\":{}},{\"id\":\"eKPRtinG5L\",\"type\":\"paragraph\",\"data\":{\"text\":\"Theme:\"},\"tunes\":{}},{\"id\":\"GMa6lE1NNl\",\"type\":\"paragraph\",\"data\":{\"text\":\"Ollama, Open LLM, RAG, pgvector, Qdrant, Local AI, LLMOps, Provider Abstraction, Enterprise AI Architecture\"},\"tunes\":{}}],\"version\":\"2.31.5\"}",{"time":236,"blocks":237,"version":632},1782679301267,[238,242,248,253,258,263,269,274,279,290,298,303,308,334,339,344,349,354,371,376,381,386,392,397,411,416,421,444,449,454,459,464,469,474,479,484,489,514,519,524,529,534,546,551,556,561,566,577,582,587,592,597,602,607,612,617,622,627],{"id":239,"data":240,"type":42,"tunes":241},"HsB_-_Sdzh",{"text":232,"level":40},{},{"id":243,"data":244,"type":246,"tunes":247},"G_FTnJiMVo",{"text":245},"Tools like \u003Cb>Ollama\u003C\u002Fb> have made local LLM experimentation almost frictionless. Install the runtime, pull a model, send a prompt, and a local assistant answers in seconds. That is useful. It is also the easiest part of the journey.","paragraph",{},{"id":249,"data":250,"type":246,"tunes":252},"fVa1sJxmy6",{"text":251},"A terminal demo is not an application. A local model is not a product. A productive Open-LLM application needs controlled data ingestion, retrieval, permissions, evaluation, logging, provider abstraction, deployment discipline, and a user workflow that solves a real business problem.",{},{"id":254,"data":255,"type":246,"tunes":257},"f6EzZPNhRc",{"text":256},"The core thesis is simple: \u003Cb>Ollama is a runtime, not the product\u003C\u002Fb>. The product lives in the application layer around the model. That layer decides which documents are visible, which chunks are retrieved, which prompts are used, how answers are evaluated, how failures are logged, and whether the system can be trusted in daily work.",{},{"id":259,"data":260,"type":246,"tunes":262},"wjgsXAJQ-i",{"text":261},"This is where local AI becomes interesting for teams building private assistants, internal knowledge tools, LLM-enabled SaaS features, or enterprise workflow copilots. The model can run locally, but privacy, reliability, and business value only exist if the full system is engineered properly.",{},{"id":264,"data":265,"type":42,"tunes":268},"NnYUiRBtwJ",{"text":266,"level":267},"Ollama Is a Runtime, Not a Product",2,{},{"id":270,"data":271,"type":246,"tunes":273},"nVrpsWCll3",{"text":272},"Ollama is excellent for running and serving open models locally. It is especially strong for exploration, developer workflows, PoCs, and teams that want to test model behavior without immediately committing to a cloud provider. It exposes local model interaction through APIs and compatibility layers that make it easier to connect existing OpenAI-style tooling.",{},{"id":275,"data":276,"type":246,"tunes":278},"_aQOCMkaG8",{"text":277},"That does not mean Ollama solves the application problem. It does not automatically know which company documents a user may read. It does not create tenant isolation, document versioning, audit trails, source attribution, evaluation datasets, or business-specific UI workflows. Those responsibilities stay with the application architecture.",{},{"id":280,"data":281,"type":288,"tunes":289},"jeeZbnQvh7",{"meta":282,"items":283,"style":287},{},[284,285,286],"\u003Cb>Ollama helps with:\u003C\u002Fb> local model execution, fast experimentation, API-based model access, offline-first PoCs, and developer productivity.","\u003Cb>Ollama does not solve by itself:\u003C\u002Fb> access control, RAG quality, document lifecycle, tenant boundaries, evaluation, audit logging, monitoring, fallback behavior, or enterprise workflow design.","\u003Cb>The architectural mistake:\u003C\u002Fb> treating local inference as if it were already a secure AI product.","unordered","list",{},{"id":291,"data":292,"type":296,"tunes":297},"U8K0Qat9xz",{"text":293,"caption":294,"alignment":295},"The model is the easiest part. The application layer is where the product value lives.","Architecture principle","left","quote",{},{"id":299,"data":300,"type":42,"tunes":302},"NYU66FQDT8",{"text":301,"level":267},"Demo, PoC, and Production Are Different Stages",{},{"id":304,"data":305,"type":246,"tunes":307},"-EGq4eScAy",{"text":306},"Many Open-LLM initiatives fail because teams confuse a working demo with a production capability. The difference is not cosmetic. Each maturity level has a different goal, different risk profile, and different engineering requirement.",{},{"id":309,"data":310,"type":332,"tunes":333},"V_OydBfVSx",{"content":311,"stretched":43,"withHeadings":14},[312,317,322,327],[313,314,315,316],"Stage","What it proves","Typical setup","What is still missing",[318,319,320,321],"Demo","A model can answer a prompt locally.","Ollama on a laptop, one model, one prompt, no real data control.","Permissions, retrieval, logs, evaluation, deployment, monitoring, user workflow.",[323,324,325,326],"PoC","A small controlled system can answer questions over selected documents or workflows.","Basic web UI, ingestion script, vector search, limited users, limited document scope.","Scale, governance, test datasets, fallback strategy, auditability, support model.",[328,329,330,331],"Production","Multiple users can use the system safely and repeatedly inside real work.","Authenticated app, tenant isolation, RAG pipeline, observability, provider abstraction, backups, release process.","Continuous improvement, evaluation expansion, operational maturity.","table",{},{"id":335,"data":336,"type":246,"tunes":338},"ikp3g9j4So",{"text":337},"A PoC can be intentionally small. Production cannot be intentionally blind. Once real users, private data, business decisions, and compliance expectations enter the system, the architecture must become explicit.",{},{"id":340,"data":341,"type":42,"tunes":343},"f1xN0TAU53",{"text":342,"level":267},"Where RAG Fits In",{},{"id":345,"data":346,"type":246,"tunes":348},"wVTJ2-nEg8",{"text":347},"\u003Cb>Retrieval-Augmented Generation\u003C\u002Fb> is the most common bridge between a language model and private business knowledge. The model does not magically know internal documents, contracts, tickets, product specs, or runbooks. The application must retrieve relevant context before asking the model to answer.",{},{"id":350,"data":351,"type":246,"tunes":353},"fvxr_M2czf",{"text":352},"A practical RAG flow looks like this:",{},{"id":355,"data":356,"type":288,"tunes":370},"tVZHbBjBJe",{"meta":357,"items":359,"style":369},{"counterType":358},"numeric",[360,361,362,363,364,365,366,367,368],"Documents are uploaded or synchronized from controlled sources.","Text is extracted, cleaned, and split into chunks.","Chunks receive metadata such as tenant, owner, document version, source URL, access scope, and timestamp.","Embeddings are generated for each chunk.","Vectors are stored in pgvector, Qdrant, or another retrieval layer.","At query time, the application checks permissions before retrieval.","Relevant chunks are retrieved with similarity search and metadata filters.","The prompt builder injects selected context into the model request.","The answer is generated with citations, confidence boundaries, and no-answer fallback when retrieval is weak.","ordered",{},{"id":372,"data":373,"type":246,"tunes":375},"5VP9OTHrad",{"text":374},"RAG reduces hallucinations, but it does not eliminate them. A poor chunking strategy, weak metadata, missing permission filters, low-quality embeddings, or overly broad retrieval can still produce convincing but wrong answers. Serious RAG systems need thresholds, citations, retrieval logs, and evaluation.",{},{"id":377,"data":378,"type":42,"tunes":380},"6R3G0KzeXf",{"text":379,"level":267},"A Practical Local Open-LLM Architecture",{},{"id":382,"data":383,"type":246,"tunes":385},"hsYURnHqGd",{"text":384},"A realistic production path does not require exotic infrastructure. For many teams, a strong first architecture can use a normal web stack: Nuxt for the frontend, a Nitro or Node API, PostgreSQL as the system of record, pgvector for retrieval, Ollama as a local runtime, Prisma for data access, and background workers for ingestion and embeddings.",{},{"id":387,"data":388,"type":390,"tunes":391},"ie58nqmk5o",{"code":389},"User question | v\nFrontend UI | v\nAPI \u002F Backend | +--> Authentication +--> Tenant and role permission check | v\nRetrieval layer | +--> PostgreSQL metadata +--> pgvector or Qdrant vector search +--> similarity threshold | v\nPrompt builder | +--> system prompt +--> retrieved chunks +--> source references +--> no-answer rules | v\nLLM provider abstraction | +--> Ollama \u002F local model +--> cloud model fallback +--> future self-hosted runtime | v\nAnswer with sources | v\nLogs, traces, metrics, evaluation dataset","code",{},{"id":393,"data":394,"type":246,"tunes":396},"HCdDDntKaG",{"text":395},"This architecture keeps the model replaceable. Ollama can be the first runtime, but the system should not be locked to one inference engine. A clean architecture separates user workflow, retrieval, prompt construction, model provider, and observability.",{},{"id":398,"data":399,"type":288,"tunes":410},"jx3zLyUEmA",{"meta":400,"items":401,"style":287},{},[402,403,404,405,406,407,408,409],"\u003Cb>Frontend:\u003C\u002Fb> Nuxt or another web UI for authenticated user workflows.","\u003Cb>Backend\u002FAPI:\u003C\u002Fb> Nitro, Node.js, or FastAPI for orchestration, permissions, and provider routing.","\u003Cb>Database:\u003C\u002Fb> PostgreSQL for documents, users, tenants, roles, prompts, logs, and metadata.","\u003Cb>Vector search:\u003C\u002Fb> pgvector for simple integrated retrieval or Qdrant when vector search becomes a dedicated service.","\u003Cb>Model runtime:\u003C\u002Fb> Ollama for local execution, llama.cpp server for lightweight serving, or vLLM for higher-throughput GPU serving.","\u003Cb>Storage:\u003C\u002Fb> local filesystem or S3-compatible object storage for uploaded files and extracted artifacts.","\u003Cb>Workers:\u003C\u002Fb> background ingestion, chunking, embedding, re-indexing, and document version processing.","\u003Cb>Observability:\u003C\u002Fb> logs, metrics, prompt traces, retrieval traces, latency tracking, and evaluation results.",{},{"id":412,"data":413,"type":42,"tunes":415},"CG1PJLbPuV",{"text":414,"level":267},"The Production Readiness Checklist",{},{"id":417,"data":418,"type":246,"tunes":420},"aOHxNXO4Mk",{"text":419},"The following checklist is the difference between a local model demo and a usable Open-LLM application:",{},{"id":422,"data":423,"type":288,"tunes":443},"TBkg7dLMT1",{"meta":424,"items":425,"style":287},{},[426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442],"Every document and chunk has a tenant_id.","Retrieval is blocked until user and role permissions are checked.","Documents include metadata, source, owner, version, lifecycle state, and retention policy.","Chunking strategy is documented and tested against real documents.","Embedding model selection is explicit and versioned.","Vector index configuration is reproducible.","Similarity threshold prevents weak context from being used blindly.","Answers include source citations or source references where possible.","No-answer fallback is used when retrieval confidence is too low.","Prompt templates are versioned and release-controlled.","LLM provider abstraction is built from the start.","Structured output is validated before downstream use.","Evaluation dataset contains real questions, expected sources, and unacceptable answers.","Retrieval logs show which chunks were used for each answer.","Latency, token usage, GPU\u002FCPU load, and error rates are monitored.","Privacy and retention rules cover uploads, extracted text, chunks, embeddings, prompts, responses, and logs.","Deployment, backup, restore, rollback, and incident paths are documented.",{},{"id":445,"data":446,"type":246,"tunes":448},"tdTcmD4DEO",{"text":447},"This connects directly with \u003Ca href=\"\u002Fenterprise\">Enterprise Delivery OS\u003C\u002Fa>: useful AI is not only a model decision. It is delivery discipline, evidence, controls, metrics, and operational ownership.",{},{"id":450,"data":451,"type":42,"tunes":453},"zOuV7s1ByX",{"text":452,"level":267},"Provider Abstraction: Do Not Marry One Model",{},{"id":455,"data":456,"type":246,"tunes":458},"gPuC35eEAc",{"text":457},"Local models are valuable for privacy-sensitive use cases, offline scenarios, cost control, and internal experimentation. Cloud models may still be stronger for difficult reasoning, coding, multilingual accuracy, or multimodal work. A production application should not treat any one model as permanent infrastructure.",{},{"id":460,"data":461,"type":246,"tunes":463},"ssy1KKIyUv",{"text":462},"A provider abstraction allows the same application workflow to call Ollama, OpenAI, Gemini, Anthropic, Mistral, vLLM, or another self-hosted endpoint without rewriting the product. The application decides the provider based on use case, data sensitivity, latency, cost, and quality requirements.",{},{"id":465,"data":466,"type":390,"tunes":468},"droVs9NwWG",{"code":467},"type LlmProvider = \"ollama\" | \"openai\" | \"gemini\" | \"anthropic\" | \"vllm\"; type ChatInput = { provider: LlmProvider; model: string; tenantId: string; userId: string; question: string; context: Array\u003C{ chunkId: string; sourceTitle: string; text: string; }>;\n}; async function chat(input: ChatInput) { await assertUserCanAccessContext(input.userId, input.context); const messages = buildRagMessages({ question: input.question, context: input.context, rules: [ \"Answer only from provided context when possible.\", \"Cite source titles.\", \"Say when the context is insufficient.\" ] }); return llm.chat({ provider: input.provider, model: input.model, messages, trace: { tenantId: input.tenantId, userId: input.userId, promptVersion: \"rag-v3.2\" } });\n}",{},{"id":470,"data":471,"type":246,"tunes":473},"TqpI4xqkq1",{"text":472},"The key idea is not the TypeScript syntax. The key idea is the boundary. The application owns permissions, retrieval, prompt rules, tracing, and evaluation. The provider only produces the model output.",{},{"id":475,"data":476,"type":42,"tunes":478},"RuZt2H8Qtd",{"text":477,"level":267},"pgvector vs Qdrant",{},{"id":480,"data":481,"type":246,"tunes":483},"-nl0_dtw9s",{"text":482},"For many Open-LLM applications, the vector store decision is simple at the beginning: if PostgreSQL is already your system of record, \u003Cb>pgvector\u003C\u002Fb> is a strong starting point. It keeps metadata, permissions, document records, and vectors close together. That reduces operational complexity.",{},{"id":485,"data":486,"type":246,"tunes":488},"mJJZ9Vhiy4",{"text":487},"\u003Cb>Qdrant\u003C\u002Fb> becomes more attractive when vector search needs to become a dedicated service: larger retrieval scale, advanced filtering, specialized indexing, hybrid search patterns, or independent scaling of retrieval infrastructure.",{},{"id":490,"data":491,"type":332,"tunes":513},"ZNBNQmWTqi",{"content":492,"stretched":43,"withHeadings":14},[493,498,503,508],[494,495,496,497],"Option","Best fit","Strengths","Trade-offs",[499,500,501,502],"pgvector","PoCs, early production, systems already built on PostgreSQL.","Single database, SQL joins, ACID behavior, simpler operations, metadata close to vectors.","Less specialized than a dedicated vector database at large retrieval scale.",[504,505,506,507],"Qdrant","Dedicated semantic search services, larger vector workloads, advanced filtering and retrieval patterns.","Purpose-built vector search, strong filtering model, independent scaling, retrieval-focused APIs.","Adds another infrastructure component and operational surface.",[509,510,511,512],"Start simple","Most enterprise teams beginning with private RAG applications.","Lower architecture risk, faster delivery, easier audit path.","May need migration later if retrieval becomes central and high-scale.",{},{"id":515,"data":516,"type":246,"tunes":518},"EaNwvz0Ala",{"text":517},"A practical rule: start with pgvector when PostgreSQL already owns your business data and the retrieval scale is moderate. Move to Qdrant when vector search becomes a product capability of its own.",{},{"id":520,"data":521,"type":42,"tunes":523},"Xm3eJKCflO",{"text":522,"level":267},"Security and Privacy Reality Check",{},{"id":525,"data":526,"type":246,"tunes":528},"rdIR5uvVuv",{"text":527},"Local inference does not automatically mean secure AI. It only means the model execution can happen locally. The full data path still matters: uploaded files, extracted text, chunks, embeddings, logs, prompts, responses, backups, developer access, admin tools, and analytics exports.",{},{"id":530,"data":531,"type":246,"tunes":533},"IhqG3kxPSc",{"text":532},"Embeddings also deserve care. They are not the original document, but they still represent information derived from sensitive content. Treat them as part of the protected data path, especially when they are tied to metadata, source records, user queries, or tenant identifiers.",{},{"id":535,"data":536,"type":288,"tunes":545},"QvcBrg8G6B",{"meta":537,"items":538,"style":287},{},[539,540,541,542,543,544],"Do not index documents before access rules are known.","Do not retrieve chunks without tenant and role filters.","Do not log full prompts and responses without retention rules.","Do not send sensitive context to a cloud model unless the policy allows it.","Do not assume that local equals GDPR-compliant; compliance depends on the complete processing design.","Do not let evaluation and debugging copies become uncontrolled shadow datasets.",{},{"id":547,"data":548,"type":246,"tunes":550},"_YLdd4iHrS",{"text":549},"This is where \u003Ca href=\"\u002Fblog\u002Fenterprise-grade-multi-tenant-architecture-for-an-international-platform\">multi-instance SaaS architecture\u003C\u002Fa> matters. If tenants, roles, data ownership, and operational boundaries are weak, adding a local LLM can amplify risk instead of reducing it.",{},{"id":552,"data":553,"type":42,"tunes":555},"I6V0KMIQjb",{"text":554,"level":267},"Evaluation Is Not Optional",{},{"id":557,"data":558,"type":246,"tunes":560},"yaKCxuptHO",{"text":559},"A production Open-LLM application needs an evaluation loop. Without it, teams only have anecdotes. A few good demo answers do not prove that retrieval works, that the prompt is stable, or that the model behaves safely across real usage.",{},{"id":562,"data":563,"type":246,"tunes":565},"Kq_qe2AL-v",{"text":564},"A minimal evaluation dataset should include real user questions, expected source documents, unacceptable answers, no-answer cases, and regression tests for previous failures. Every change to chunking, embeddings, prompts, model provider, or retrieval threshold should be tested against that dataset.",{},{"id":567,"data":568,"type":288,"tunes":576},"oBlROQd5Li",{"meta":569,"items":570,"style":287},{},[571,572,573,574,575],"\u003Cb>Retrieval evaluation:\u003C\u002Fb> Did the system fetch the right source chunks?","\u003Cb>Answer evaluation:\u003C\u002Fb> Did the answer stay grounded in the retrieved context?","\u003Cb>Safety evaluation:\u003C\u002Fb> Did the system avoid forbidden disclosures or unauthorized data?","\u003Cb>Operational evaluation:\u003C\u002Fb> Did latency, failure rate, and cost stay inside acceptable limits?","\u003Cb>Regression evaluation:\u003C\u002Fb> Did a model or prompt upgrade break previously correct behavior?",{},{"id":578,"data":579,"type":246,"tunes":581},"9gDXPLZ6Rp",{"text":580},"RAG reduces hallucinations, but it does not remove the need for evaluation. Evaluation is the control loop that turns a clever prototype into an improving product.",{},{"id":583,"data":584,"type":42,"tunes":586},"Hi1K8JLPUo",{"text":585,"level":267},"Deployment Discipline: The Model Is a Release Event",{},{"id":588,"data":589,"type":246,"tunes":591},"sEpigdnlkD",{"text":590},"Changing a model is not a harmless configuration tweak. It can change answer style, reasoning behavior, language quality, latency, token usage, citation discipline, and failure modes. In production, model upgrades should be handled like release events.",{},{"id":593,"data":594,"type":246,"tunes":596},"sLXBCBh3E0",{"text":595},"That means versioning prompts, embedding models, retrieval settings, model identifiers, provider routes, and evaluation results. It also means having rollback logic. If a new model causes worse retrieval-grounded answers or breaks structured output, the system needs a controlled path back to the known-good configuration.",{},{"id":598,"data":599,"type":246,"tunes":601},"pcbe8eGMbe",{"text":600},"This fits the same operating logic as \u003Ca href=\"\u002Fenterprise\u002Freference-models\u002Fdelivery-and-change\">Delivery &amp; Change\u003C\u002Fa> and \u003Ca href=\"\u002Fenterprise\u002Freference-models\u002Fdigital-platform\">AI-ready platform architecture\u003C\u002Fa>: stable delivery requires repeatable controls, not heroic manual fixes.",{},{"id":603,"data":604,"type":42,"tunes":606},"mOhI-jF1zx",{"text":605,"level":267},"Conclusion: Production Begins Where the Demo Ends",{},{"id":608,"data":609,"type":246,"tunes":611},"3mXK980w23",{"text":610},"Ollama can start the journey. It makes local model execution accessible and lowers the barrier to experimentation. That is valuable. But the production application begins where the demo ends.",{},{"id":613,"data":614,"type":246,"tunes":616},"9O27eQ_Svm",{"text":615},"The model is only one replaceable component. The real product is the controlled system around it: ingestion, permissions, retrieval, prompts, provider routing, evaluation, logs, monitoring, deployment, backup, and user workflow. That is where privacy becomes real, quality becomes measurable, and business value becomes repeatable.",{},{"id":618,"data":619,"type":246,"tunes":621},"4Lyy3E4ZsF",{"text":620},"A serious Open-LLM application is built through engineering discipline, not by running a local model once. Ollama can start the journey, but the production application begins where the demo ends.",{},{"id":623,"data":624,"type":246,"tunes":626},"eKPRtinG5L",{"text":625},"Theme:",{},{"id":628,"data":629,"type":246,"tunes":631},"GMa6lE1NNl",{"text":630},"Ollama, Open LLM, RAG, pgvector, Qdrant, Local AI, LLMOps, Provider Abstraction, Enterprise AI Architecture",{},"2.31.5","Running a local model with Ollama is easy. Building a production-ready Open-LLM application is harder: it requires RAG, access control, provider abstraction, evaluation, logging, deployment discipline and a controlled application layer around the model.\n","\u002Fuploads\u002F2026\u002F06\u002Follama-is-not-the-product-building-production-ready-open-llm-applications-1782679361640-h0usqf.webp","ollama-is-not-the-product-building-production-ready-open-llm-applications-1782679361640-h0usqf","PUBLISHED","2026-06-28T16:39:00.000Z","2026-06-28T20:39:17.972Z","2026-06-28T20:56:51.719Z",[641,645,649,653,657],{"id":642,"name":643,"slug":644},39,"Enterprise Delivery OS","enterprise",{"id":646,"name":647,"slug":648},87,"LLMOps Playbook","llmops",{"id":650,"name":651,"slug":652},58,"Evaluation & Quality Gates","evaluation",{"id":654,"name":655,"slug":656},89,"Evaluation Harness","evaluation-harness",{"id":658,"name":659,"slug":660},85,"Quality Gates","quality-gates",{"id":662,"login":663,"email":664,"displayName":665},"20","rooth8233","aleksandar@stajic.de","Aleksandar Stajić",[667],{"lang":7,"title":232,"content":234,"contentJson":668,"excerpt":633},{"time":236,"blocks":669,"version":632},[670,673,676,679,682,685,688,691,694,699,702,705,708,716,719,722,725,728,733,736,739,742,745,748,753,756,759,764,767,770,773,776,779,782,785,788,791,799,802,805,808,811,816,819,822,825,828,833,836,839,842,845,848,851,854,857,860,863],{"id":239,"data":671,"type":42,"tunes":672},{"text":232,"level":40},{},{"id":243,"data":674,"type":246,"tunes":675},{"text":245},{},{"id":249,"data":677,"type":246,"tunes":678},{"text":251},{},{"id":254,"data":680,"type":246,"tunes":681},{"text":256},{},{"id":259,"data":683,"type":246,"tunes":684},{"text":261},{},{"id":264,"data":686,"type":42,"tunes":687},{"text":266,"level":267},{},{"id":270,"data":689,"type":246,"tunes":690},{"text":272},{},{"id":275,"data":692,"type":246,"tunes":693},{"text":277},{},{"id":280,"data":695,"type":288,"tunes":698},{"meta":696,"items":697,"style":287},{},[284,285,286],{},{"id":291,"data":700,"type":296,"tunes":701},{"text":293,"caption":294,"alignment":295},{},{"id":299,"data":703,"type":42,"tunes":704},{"text":301,"level":267},{},{"id":304,"data":706,"type":246,"tunes":707},{"text":306},{},{"id":309,"data":709,"type":332,"tunes":715},{"content":710,"stretched":43,"withHeadings":14},[711,712,713,714],[313,314,315,316],[318,319,320,321],[323,324,325,326],[328,329,330,331],{},{"id":335,"data":717,"type":246,"tunes":718},{"text":337},{},{"id":340,"data":720,"type":42,"tunes":721},{"text":342,"level":267},{},{"id":345,"data":723,"type":246,"tunes":724},{"text":347},{},{"id":350,"data":726,"type":246,"tunes":727},{"text":352},{},{"id":355,"data":729,"type":288,"tunes":732},{"meta":730,"items":731,"style":369},{"counterType":358},[360,361,362,363,364,365,366,367,368],{},{"id":372,"data":734,"type":246,"tunes":735},{"text":374},{},{"id":377,"data":737,"type":42,"tunes":738},{"text":379,"level":267},{},{"id":382,"data":740,"type":246,"tunes":741},{"text":384},{},{"id":387,"data":743,"type":390,"tunes":744},{"code":389},{},{"id":393,"data":746,"type":246,"tunes":747},{"text":395},{},{"id":398,"data":749,"type":288,"tunes":752},{"meta":750,"items":751,"style":287},{},[402,403,404,405,406,407,408,409],{},{"id":412,"data":754,"type":42,"tunes":755},{"text":414,"level":267},{},{"id":417,"data":757,"type":246,"tunes":758},{"text":419},{},{"id":422,"data":760,"type":288,"tunes":763},{"meta":761,"items":762,"style":287},{},[426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442],{},{"id":445,"data":765,"type":246,"tunes":766},{"text":447},{},{"id":450,"data":768,"type":42,"tunes":769},{"text":452,"level":267},{},{"id":455,"data":771,"type":246,"tunes":772},{"text":457},{},{"id":460,"data":774,"type":246,"tunes":775},{"text":462},{},{"id":465,"data":777,"type":390,"tunes":778},{"code":467},{},{"id":470,"data":780,"type":246,"tunes":781},{"text":472},{},{"id":475,"data":783,"type":42,"tunes":784},{"text":477,"level":267},{},{"id":480,"data":786,"type":246,"tunes":787},{"text":482},{},{"id":485,"data":789,"type":246,"tunes":790},{"text":487},{},{"id":490,"data":792,"type":332,"tunes":798},{"content":793,"stretched":43,"withHeadings":14},[794,795,796,797],[494,495,496,497],[499,500,501,502],[504,505,506,507],[509,510,511,512],{},{"id":515,"data":800,"type":246,"tunes":801},{"text":517},{},{"id":520,"data":803,"type":42,"tunes":804},{"text":522,"level":267},{},{"id":525,"data":806,"type":246,"tunes":807},{"text":527},{},{"id":530,"data":809,"type":246,"tunes":810},{"text":532},{},{"id":535,"data":812,"type":288,"tunes":815},{"meta":813,"items":814,"style":287},{},[539,540,541,542,543,544],{},{"id":547,"data":817,"type":246,"tunes":818},{"text":549},{},{"id":552,"data":820,"type":42,"tunes":821},{"text":554,"level":267},{},{"id":557,"data":823,"type":246,"tunes":824},{"text":559},{},{"id":562,"data":826,"type":246,"tunes":827},{"text":564},{},{"id":567,"data":829,"type":288,"tunes":832},{"meta":830,"items":831,"style":287},{},[571,572,573,574,575],{},{"id":578,"data":834,"type":246,"tunes":835},{"text":580},{},{"id":583,"data":837,"type":42,"tunes":838},{"text":585,"level":267},{},{"id":588,"data":840,"type":246,"tunes":841},{"text":590},{},{"id":593,"data":843,"type":246,"tunes":844},{"text":595},{},{"id":598,"data":846,"type":246,"tunes":847},{"text":600},{},{"id":603,"data":849,"type":42,"tunes":850},{"text":605,"level":267},{},{"id":608,"data":852,"type":246,"tunes":853},{"text":610},{},{"id":613,"data":855,"type":246,"tunes":856},{"text":615},{},{"id":618,"data":858,"type":246,"tunes":859},{"text":620},{},{"id":623,"data":861,"type":246,"tunes":862},{"text":625},{},{"id":628,"data":864,"type":246,"tunes":865},{"text":630},{},"Post erfolgreich abgerufen",{"items":868,"source":909,"manualIds":910,"manualMatchedIds":911},[869,876,883,890,897,903],{"id":870,"slug":871,"title":872,"excerpt":873,"featuredImage":874,"publishedAt":875},"361","model-view-controller-mvc","Model-View-Controller (MVC): The Structural Backbone of Modern Web Applications","Model-View-Controller, usually shortened to MVC, remains one of the most durable architectural patterns in software development. It gives teams a practical way to separate business logic, presentation, and user interaction so applications stay easier to build, extend, test, and maintain. This article explains what MVC is, why it still matters, where it fits in today’s web stacks, and how it connects to broader platform architecture, delivery quality, migration strategy, and operational maturity.","\u002Fuploads\u002F2026\u002F03\u002Fmodel-view-controller-mvc-1774872805793-0bjubu.webp","2023-04-12T12:57:00.000Z",{"id":877,"slug":878,"title":879,"excerpt":880,"featuredImage":881,"publishedAt":882},"381","enterprise-grade-multi-tenant-architecture-for-an-international-platform","Enterprise-Grade Multi-Tenant Architecture for an International Platform","Loving Rocks is an enterprise-grade wedding platform designed with a true multi-tenant architecture, isolated databases per tenant, and built-in internationalization for global scalability, security, and long-term operational stability.","\u002Fuploads\u002F2026\u002F01\u002Fenterprise-grade-multi-tenant-architecture-for-an-international-platform-1769789121298-b6v7ak.webp","2026-01-30T12:04:00.000Z",{"id":884,"slug":885,"title":886,"excerpt":887,"featuredImage":888,"publishedAt":889},"445","qwen-3-6-in-production-release-runbook-ai-rollback-and-llmops-versioning","Qwen 3.6 in Production: Release Runbook, AI Rollback, and LLMOps Versioning","Qwen 3.6 is not just another model upgrade. It is a release event, a rollback scenario, and a versioning problem at the same time. This article explains how Qwen 3.6 should be handled in production through LLMOps discipline, prompt and model traceability, controlled rollout, and evidence-based rollback readiness.","\u002Fuploads\u002F2026\u002F02\u002Fnew-qwen-3-5-plus-1771515512741-dcbi9p.webp","2026-05-04T02:49:00.000Z",{"id":891,"slug":892,"title":893,"excerpt":894,"featuredImage":895,"publishedAt":896},"364","tipps-fuer-die-verbesserung-der-seo-suchmaschinenoptimierung","Mastering the SEO Workflow: Essential Optimization Strategies for Organic Growth","A structured SEO workflow is crucial for sustainable organic growth. Learn the ten foundational strategies, from keyword research and technical optimization to content quality and performance analysis.","\u002Fuploads\u002F2026\u002F03\u002Ftipps-fuer-die-verbesserung-der-seo-suchmaschinenoptimierung-1774866098131-hwkzrg.webp","2024-01-26T06:35:00.000Z",{"id":898,"slug":656,"title":899,"excerpt":900,"featuredImage":901,"publishedAt":902},"434","Comprehensive Guide to Evaluation Harness: Mastering LLM Performance Evaluation","This guide provides a detailed walkthrough of Evaluation Harness, an essential framework for rigorously assessing large language model (LLM) capabilities in enterprise LLMOps pipelines. Learn setup, best practices, and advanced techniques to ensure reliable model benchmarking and optimization.","\u002Fuploads\u002F2026\u002F04\u002Fevaluation-harness-1775466944495-4s0xv2.webp","2026-03-01T17:50:00.000Z",{"id":904,"slug":905,"title":906,"excerpt":907,"featuredImage":888,"publishedAt":908},"384","new-qwen-3-5-plus","New Qwen 3.5-Plus: Open-source AI is getting serious now","Discover the groundbreaking features and benefits of Alibaba's Qwen 3.5-Plus, a revolutionary open-source AI for developers.","2026-02-19T10:23:00.000Z","fallback",[],[]]