Files
ai-control-systems/rag/query.py
root f105ab6277 Initial commit: RAG system for control theory Q&A
Ollama + FAISS based retrieval-augmented generation system that indexes
Wikipedia articles on automatic control theory and answers questions in Russian.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-24 00:57:53 -04:00

138 lines
3.8 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
RAG query: search indexed documents and generate answer via Ollama.
Usage: python rag/query.py "ваш вопрос"
"""
import os
import sys
import pickle
from pathlib import Path
import requests
import numpy as np
import faiss
OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://192.168.0.47:11434")
EMBED_MODEL = os.getenv("EMBED_MODEL", "bge-m3")
GENERATE_MODEL = os.getenv("GENERATE_MODEL", "qwen3.5:9b")
TOP_K = 5
STORE_DIR = Path(__file__).resolve().parent / "store"
INDEX_PATH = STORE_DIR / "faiss.index"
CHUNKS_PATH = STORE_DIR / "chunks.pkl"
META_PATH = STORE_DIR / "meta.pkl"
SYSTEM_PROMPT = """Ты — эксперт по теории автоматического управления и электротехнике.
Отвечай на вопросы, опираясь ТОЛЬКО на предоставленный контекст.
Если в контексте нет информации для ответа, скажи об этом.
Отвечай на русском языке, точно и по существу.
Указывай источники, из которых взята информация."""
def load_index():
if not INDEX_PATH.exists():
print("Index not found! Run: python rag/index.py")
sys.exit(1)
index = faiss.read_index(str(INDEX_PATH))
with open(CHUNKS_PATH, "rb") as f:
chunks = pickle.load(f)
with open(META_PATH, "rb") as f:
meta = pickle.load(f)
return index, chunks, meta
def get_embedding(text: str) -> np.ndarray:
resp = requests.post(
f"{OLLAMA_HOST}/api/embed",
json={"model": EMBED_MODEL, "input": [text]},
timeout=60,
)
resp.raise_for_status()
return np.array(resp.json()["embeddings"], dtype=np.float32)
def search(query: str, index, chunks, meta, k: int = TOP_K):
q_emb = get_embedding(query)
faiss.normalize_L2(q_emb)
scores, indices = index.search(q_emb, k)
results = []
for score, idx in zip(scores[0], indices[0]):
if idx < 0:
continue
results.append({
"chunk": chunks[idx],
"meta": meta[idx],
"score": float(score),
})
return results
def generate(query: str, context_chunks: list[dict]) -> str:
context_parts = []
for i, r in enumerate(context_chunks, 1):
url = r["meta"]["url"]
context_parts.append(f"[Источник {i}] ({url})\n{r['chunk']}")
context = "\n\n---\n\n".join(context_parts)
prompt = f"""Контекст из документов:
{context}
---
Вопрос: {query}
Ответ:"""
resp = requests.post(
f"{OLLAMA_HOST}/api/generate",
json={
"model": GENERATE_MODEL,
"system": SYSTEM_PROMPT,
"prompt": prompt,
"stream": False,
"think": False,
"options": {"temperature": 0.3, "num_predict": 2048},
},
timeout=300,
)
resp.raise_for_status()
return resp.json()["response"]
def main():
if len(sys.argv) < 2:
print("Usage: python rag/query.py \"ваш вопрос\"")
sys.exit(1)
query = " ".join(sys.argv[1:])
print(f"Query: {query}\n")
index, chunks, meta = load_index()
print(f"Index: {index.ntotal} vectors")
results = search(query, index, chunks, meta)
print(f"Top-{len(results)} results:\n")
for i, r in enumerate(results, 1):
print(f" [{i}] score={r['score']:.4f} {r['meta']['url']}")
print(f" {r['chunk'][:120]}...\n")
print("Generating answer...\n")
answer = generate(query, results)
print("=" * 60)
print(answer)
print("=" * 60)
print("\nSources:")
seen = set()
for r in results:
url = r["meta"]["url"]
if url not in seen:
seen.add(url)
print(f" - {url}")
if __name__ == "__main__":
main()