poc-v1/src/sql_optimizer_team/agents/sql_analyst_agent.py

from agno.agent import Agent
from agno.db.sqlite import SqliteDb
from sql_optimizer_team.tools.engine.model_selector import get_model
from sql_optimizer_team.tools.prompt_tools import supported_databases
from sql_optimizer_team.tools.sql_tools import load_sql_from_file, ensure_non_empty
import os

base_model = get_model()

SQL_TO_NATURAL_PROMPT = """
You are an expert $database_name database analyst and performance specialist. Your task is to translate the SQL query below into a detailed, precise natural-language description that another agent will later use to reconstruct and optimize the query.

$database_name SQL Query:
```sql
$query
```

Your explanation must follow these requirements:

1. **Describe the overall purpose**
- Explain clearly what the query is intended to accomplish and why (retrieve data, update rows, aggregate information, validate existence, create structures, etc.).

2. **List ALL involved database objects**
Explicitly list every:
- Table
- View
- CTE (Common Table Expression)
- Subquery or derived table
- Function
- Stored procedure, if referenced
- Temporary table
- Schema-qualified object
Use the exact names as they appear in the query.

3. **Describe all essential operations**
Explicitly state, using exact column names:
- Columns retrieved or modified
- Join types, join conditions, and which objects participate
- Filters and conditions (WHERE, boolean logic, comparisons)
- Aggregations (SUM, COUNT, etc.)
- Grouping and HAVING clauses
- Sorting (ORDER BY)
- Window functions
- DISTINCT, TOP, LIMIT, OFFSET, pagination
- Any $database_name-specific features used$specific_features

4. **Maintain strict factual accuracy**
- Do NOT infer business meaning unless directly implied.
- Do NOT rename or paraphrase column names; repeat them exactly.

5. **Use clear, structured natural language**
- Provide a step-by-step explanation that makes every operation and purpose explicit.
- The output must be complete enough that the query can be reconstructed.

6. **⚠️ CRITICAL: Identify Performance Issues**
Flag any of these CRITICAL performance problems found in the query:
- **NO WHERE CLAUSE** (BE CAREFUL - AVOID FALSE POSITIVES):
    * ONLY flag if the MAIN/OUTER SELECT has absolutely NO WHERE keyword with filtering conditions
    * If query HAS 'WHERE' followed by conditions (even old-style JOINs in WHERE), DO NOT flag
    * Subqueries/EXISTS having WHERE does NOT mean main query has no WHERE
    * CROSS APPLY/LATERAL with internal WHERE counts as filtered
    * If truly no WHERE: Flag as CRITICAL (causes FULL TABLE SCAN, no predicate pushdown)
- **Non-SARGable patterns**: Functions on indexed columns in WHERE/JOIN (e.g., YEAR(date), UPPER(col))
- **Leading wildcards**: LIKE '%value%' patterns that prevent index usage
- **Implicit conversions**: Type mismatches in comparisons
- **NOLOCK/WITH (NOLOCK) hints**: If query uses WITH (NOLOCK), WITH (nolock), WITH(NOLOCK), (NOLOCK), (nolock) or NOLOCK/nolock (any case) → DO NOT REMOVE, but FLAG as **CRITICAL RISK**: "⚠️ WITH (NOLOCK) reads uncommitted/dirty data - CRITICAL: may cause INCORRECT FINANCIAL VALUES and data inconsistencies in production"
$analysis_requirements

Explanation:
""".strip()

_db_path = os.getenv("SQL_OPT_TEAM_DB_FILE", "tmp/sql_optimizer_team.db")
_debug_mode = os.getenv("SQL_OPT_TEAM_DEBUG_MODE", "false").strip().lower() in {"1", "true", "yes", "on"}

db = SqliteDb(db_file=_db_path)


async def explain_query_tool(
    database_type: str,
    sql: str,
    provider: str | None = None,
    model: str | None = None,
    temperature: float | None = None,
    max_tokens: int | None = None,
    api_key: str | None = None,
) -> dict[str, str]:
    from sql_optimizer_team.tools.core_tools import explain_query_core

    return await explain_query_core(
        database_type=database_type,
        sql=sql,
        provider=provider,
        model=model,
        temperature=temperature,
        max_tokens=max_tokens,
        api_key=api_key,
    )

sql_analyst_agent = Agent(
    name="SQL Analyst",
    role=(
        "Você recebe a SQL original e o banco alvo e produz a descrição natural detalhada. "
        "A saída deve seguir exatamente a prompt original (SQL → natural) do projeto oracle-sql-query-optimizer."
    ),
    model=base_model,
    tools=[explain_query_tool, load_sql_from_file, ensure_non_empty, supported_databases],
    markdown=True,
    add_history_to_context=True,
    db=db,
    enable_agentic_memory=True,
    enable_user_memories=True,
    debug_mode=_debug_mode,
    instructions=[
        "- Solicite banco e SQL se não estiverem presentes. Bancos suportados: use supported_databases().",
        "- Se o usuário fornecer um caminho de arquivo, use load_sql_from_file().",
        "- Preferência: use explain_query_core(database_type, sql) para gerar a explicação via core de negócio.",
                "- Use a template oficial abaixo para estruturar a explicação (SQL → natural).",
                SQL_TO_NATURAL_PROMPT,
        "- Entregue apenas a explicação natural estruturada conforme a prompt; não reescreva a SQL.",
        "- Identifique problemas críticos de performance conforme a prompt.",
    ],
)