Ollama + FAISS based retrieval-augmented generation system that indexes Wikipedia articles on automatic control theory and answers questions in Russian. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
138 lines
3.8 KiB
Python
138 lines
3.8 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
RAG query: search indexed documents and generate answer via Ollama.
|
||
Usage: python rag/query.py "ваш вопрос"
|
||
"""
|
||
|
||
import os
|
||
import sys
|
||
import pickle
|
||
from pathlib import Path
|
||
|
||
import requests
|
||
import numpy as np
|
||
import faiss
|
||
|
||
OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://192.168.0.47:11434")
|
||
EMBED_MODEL = os.getenv("EMBED_MODEL", "bge-m3")
|
||
GENERATE_MODEL = os.getenv("GENERATE_MODEL", "qwen3.5:9b")
|
||
TOP_K = 5
|
||
|
||
STORE_DIR = Path(__file__).resolve().parent / "store"
|
||
INDEX_PATH = STORE_DIR / "faiss.index"
|
||
CHUNKS_PATH = STORE_DIR / "chunks.pkl"
|
||
META_PATH = STORE_DIR / "meta.pkl"
|
||
|
||
SYSTEM_PROMPT = """Ты — эксперт по теории автоматического управления и электротехнике.
|
||
Отвечай на вопросы, опираясь ТОЛЬКО на предоставленный контекст.
|
||
Если в контексте нет информации для ответа, скажи об этом.
|
||
Отвечай на русском языке, точно и по существу.
|
||
Указывай источники, из которых взята информация."""
|
||
|
||
|
||
def load_index():
|
||
if not INDEX_PATH.exists():
|
||
print("Index not found! Run: python rag/index.py")
|
||
sys.exit(1)
|
||
index = faiss.read_index(str(INDEX_PATH))
|
||
with open(CHUNKS_PATH, "rb") as f:
|
||
chunks = pickle.load(f)
|
||
with open(META_PATH, "rb") as f:
|
||
meta = pickle.load(f)
|
||
return index, chunks, meta
|
||
|
||
|
||
def get_embedding(text: str) -> np.ndarray:
|
||
resp = requests.post(
|
||
f"{OLLAMA_HOST}/api/embed",
|
||
json={"model": EMBED_MODEL, "input": [text]},
|
||
timeout=60,
|
||
)
|
||
resp.raise_for_status()
|
||
return np.array(resp.json()["embeddings"], dtype=np.float32)
|
||
|
||
|
||
def search(query: str, index, chunks, meta, k: int = TOP_K):
|
||
q_emb = get_embedding(query)
|
||
faiss.normalize_L2(q_emb)
|
||
scores, indices = index.search(q_emb, k)
|
||
results = []
|
||
for score, idx in zip(scores[0], indices[0]):
|
||
if idx < 0:
|
||
continue
|
||
results.append({
|
||
"chunk": chunks[idx],
|
||
"meta": meta[idx],
|
||
"score": float(score),
|
||
})
|
||
return results
|
||
|
||
|
||
def generate(query: str, context_chunks: list[dict]) -> str:
|
||
context_parts = []
|
||
for i, r in enumerate(context_chunks, 1):
|
||
url = r["meta"]["url"]
|
||
context_parts.append(f"[Источник {i}] ({url})\n{r['chunk']}")
|
||
context = "\n\n---\n\n".join(context_parts)
|
||
|
||
prompt = f"""Контекст из документов:
|
||
|
||
{context}
|
||
|
||
---
|
||
|
||
Вопрос: {query}
|
||
|
||
Ответ:"""
|
||
|
||
resp = requests.post(
|
||
f"{OLLAMA_HOST}/api/generate",
|
||
json={
|
||
"model": GENERATE_MODEL,
|
||
"system": SYSTEM_PROMPT,
|
||
"prompt": prompt,
|
||
"stream": False,
|
||
"think": False,
|
||
"options": {"temperature": 0.3, "num_predict": 2048},
|
||
},
|
||
timeout=300,
|
||
)
|
||
resp.raise_for_status()
|
||
return resp.json()["response"]
|
||
|
||
|
||
def main():
|
||
if len(sys.argv) < 2:
|
||
print("Usage: python rag/query.py \"ваш вопрос\"")
|
||
sys.exit(1)
|
||
|
||
query = " ".join(sys.argv[1:])
|
||
print(f"Query: {query}\n")
|
||
|
||
index, chunks, meta = load_index()
|
||
print(f"Index: {index.ntotal} vectors")
|
||
|
||
results = search(query, index, chunks, meta)
|
||
print(f"Top-{len(results)} results:\n")
|
||
for i, r in enumerate(results, 1):
|
||
print(f" [{i}] score={r['score']:.4f} {r['meta']['url']}")
|
||
print(f" {r['chunk'][:120]}...\n")
|
||
|
||
print("Generating answer...\n")
|
||
answer = generate(query, results)
|
||
print("=" * 60)
|
||
print(answer)
|
||
print("=" * 60)
|
||
|
||
print("\nSources:")
|
||
seen = set()
|
||
for r in results:
|
||
url = r["meta"]["url"]
|
||
if url not in seen:
|
||
seen.add(url)
|
||
print(f" - {url}")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|