#!/usr/bin/env python3 """ RAG query: search indexed documents and generate answer via Ollama. Usage: python rag/query.py "ваш вопрос" """ import os import sys import pickle from pathlib import Path import requests import numpy as np import faiss OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://192.168.0.47:11434") EMBED_MODEL = os.getenv("EMBED_MODEL", "bge-m3") GENERATE_MODEL = os.getenv("GENERATE_MODEL", "qwen3.5:9b") TOP_K = 5 STORE_DIR = Path(__file__).resolve().parent / "store" INDEX_PATH = STORE_DIR / "faiss.index" CHUNKS_PATH = STORE_DIR / "chunks.pkl" META_PATH = STORE_DIR / "meta.pkl" SYSTEM_PROMPT = """Ты — эксперт по теории автоматического управления и электротехнике. Отвечай на вопросы, опираясь ТОЛЬКО на предоставленный контекст. Если в контексте нет информации для ответа, скажи об этом. Отвечай на русском языке, точно и по существу. Указывай источники, из которых взята информация.""" def load_index(): if not INDEX_PATH.exists(): print("Index not found! Run: python rag/index.py") sys.exit(1) index = faiss.read_index(str(INDEX_PATH)) with open(CHUNKS_PATH, "rb") as f: chunks = pickle.load(f) with open(META_PATH, "rb") as f: meta = pickle.load(f) return index, chunks, meta def get_embedding(text: str) -> np.ndarray: resp = requests.post( f"{OLLAMA_HOST}/api/embed", json={"model": EMBED_MODEL, "input": [text]}, timeout=60, ) resp.raise_for_status() return np.array(resp.json()["embeddings"], dtype=np.float32) def search(query: str, index, chunks, meta, k: int = TOP_K): q_emb = get_embedding(query) faiss.normalize_L2(q_emb) scores, indices = index.search(q_emb, k) results = [] for score, idx in zip(scores[0], indices[0]): if idx < 0: continue results.append({ "chunk": chunks[idx], "meta": meta[idx], "score": float(score), }) return results def generate(query: str, context_chunks: list[dict]) -> str: context_parts = [] for i, r in enumerate(context_chunks, 1): url = r["meta"]["url"] context_parts.append(f"[Источник {i}] ({url})\n{r['chunk']}") context = "\n\n---\n\n".join(context_parts) prompt = f"""Контекст из документов: {context} --- Вопрос: {query} Ответ:""" resp = requests.post( f"{OLLAMA_HOST}/api/generate", json={ "model": GENERATE_MODEL, "system": SYSTEM_PROMPT, "prompt": prompt, "stream": False, "think": False, "options": {"temperature": 0.3, "num_predict": 2048}, }, timeout=300, ) resp.raise_for_status() return resp.json()["response"] def main(): if len(sys.argv) < 2: print("Usage: python rag/query.py \"ваш вопрос\"") sys.exit(1) query = " ".join(sys.argv[1:]) print(f"Query: {query}\n") index, chunks, meta = load_index() print(f"Index: {index.ntotal} vectors") results = search(query, index, chunks, meta) print(f"Top-{len(results)} results:\n") for i, r in enumerate(results, 1): print(f" [{i}] score={r['score']:.4f} {r['meta']['url']}") print(f" {r['chunk'][:120]}...\n") print("Generating answer...\n") answer = generate(query, results) print("=" * 60) print(answer) print("=" * 60) print("\nSources:") seen = set() for r in results: url = r["meta"]["url"] if url not in seen: seen.add(url) print(f" - {url}") if __name__ == "__main__": main()