- Updated README.md to include new setup instructions for RAG and observability. - Added internal knowledge base (KB) setup for SQL optimization team, supporting various document types. - Implemented token usage logging in LLM tools to track costs and usage. - Refactored SQL analysis and optimization prompts for clarity and consistency. - Introduced filtering of external tools based on environment configuration. - Enhanced conservative analysis agent with structured prompt for performance suggestions. - Updated requirements.txt to include new dependencies for RAG functionality. - Added internal KB helpers for building and attaching knowledge to agents.
100 lines
3.3 KiB
Python
100 lines
3.3 KiB
Python
"""Internal KB (RAG) setup for the SQL optimizer team."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
import os
|
|
|
|
from agno.db.sqlite import SqliteDb
|
|
from agno.knowledge.knowledge import Knowledge
|
|
from agno.knowledge.embedder.sentence_transformer import SentenceTransformerEmbedder
|
|
from agno.vectordb.chroma import ChromaDb
|
|
|
|
from sql_optimizer_team.tools.engine.config.logger import get_logger
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class InternalKBConfig:
|
|
kb_path: Path
|
|
chroma_path: Path
|
|
embedder_id: str
|
|
contents_db_file: Path
|
|
block_external: bool
|
|
|
|
|
|
def _load_config() -> InternalKBConfig:
|
|
kb_path = Path(os.getenv("SQL_OPT_KB_PATH", "kb")).resolve()
|
|
chroma_path = Path(os.getenv("SQL_OPT_KB_CHROMA_PATH", "tmp/kb_chroma")).resolve()
|
|
embedder_id = os.getenv(
|
|
"SQL_OPT_KB_EMBEDDER_ID",
|
|
"sentence-transformers/all-MiniLM-L6-v2",
|
|
).strip()
|
|
contents_db_file = Path(os.getenv("SQL_OPT_KB_DB_FILE", "tmp/sql_optimizer_kb.db")).resolve()
|
|
block_external = os.getenv("SQL_OPT_BLOCK_EXTERNAL_TOOLS", "true").strip().lower() in {"1", "true", "yes", "on"}
|
|
return InternalKBConfig(
|
|
kb_path=kb_path,
|
|
chroma_path=chroma_path,
|
|
embedder_id=embedder_id,
|
|
contents_db_file=contents_db_file,
|
|
block_external=block_external,
|
|
)
|
|
|
|
|
|
def build_internal_knowledge() -> Knowledge:
|
|
config = _load_config()
|
|
|
|
if config.block_external:
|
|
logger.info("External tools blocked for KB", kb_path=str(config.kb_path))
|
|
|
|
embedder = SentenceTransformerEmbedder(id=config.embedder_id)
|
|
vector_db = ChromaDb(
|
|
name="sql-optimizer-kb",
|
|
path=str(config.chroma_path),
|
|
persistent_client=True,
|
|
embedder=embedder,
|
|
)
|
|
contents_db = SqliteDb(db_file=str(config.contents_db_file))
|
|
|
|
knowledge = Knowledge(
|
|
name="internal-sql-kb",
|
|
description="Base de conhecimento interna para otimização de SQL",
|
|
vector_db=vector_db,
|
|
contents_db=contents_db,
|
|
max_results=6,
|
|
)
|
|
|
|
if not config.kb_path.exists():
|
|
logger.warning("KB path not found; skipping ingest", kb_path=str(config.kb_path))
|
|
return knowledge
|
|
|
|
if config.block_external and not config.kb_path.is_dir():
|
|
logger.warning("KB path is not a directory; skipping ingest", kb_path=str(config.kb_path))
|
|
return knowledge
|
|
|
|
try:
|
|
knowledge.insert(
|
|
path=str(config.kb_path),
|
|
include=["**/*.md", "**/*.txt", "**/*.sql", "**/*.pdf"],
|
|
exclude=["**/.git/**", "**/.venv/**", "**/__pycache__/**"],
|
|
upsert=True,
|
|
skip_if_exists=True,
|
|
)
|
|
logger.info("KB ingest complete", kb_path=str(config.kb_path))
|
|
except Exception as exc:
|
|
logger.error("KB ingest failed", error=str(exc))
|
|
|
|
return knowledge
|
|
|
|
|
|
def attach_internal_knowledge(knowledge: Knowledge, *agents: object) -> None:
|
|
for agent in agents:
|
|
try:
|
|
setattr(agent, "knowledge", knowledge)
|
|
setattr(agent, "add_knowledge_to_context", True)
|
|
setattr(agent, "search_knowledge", True)
|
|
setattr(agent, "update_knowledge", False)
|
|
except Exception as exc:
|
|
logger.warning("Failed to attach knowledge", agent=str(agent), error=str(exc))
|