refactor: Remove AgentPromptTemplates and integrate prompt templates directly into agents for improved clarity and maintainability
This commit is contained in:
parent
ae33ee5bca
commit
c6dd91810b
6 changed files with 414 additions and 242 deletions
|
|
@ -1,9 +1,12 @@
|
||||||
"""SQL optimizer team agents."""
|
"""SQL optimizer team agents.
|
||||||
|
|
||||||
from sql_optimizer_team.agents.sql_analyst_agent import sql_analyst_agent
|
This package uses lazy imports to avoid circular dependencies with tools.
|
||||||
from sql_optimizer_team.agents.sql_optimizer_agent import sql_optimizer_agent
|
"""
|
||||||
from sql_optimizer_team.agents.sql_quality_agent import sql_quality_agent
|
|
||||||
from sql_optimizer_team.agents.conservative_analysis_agent import conservative_analysis_agent
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import importlib
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"sql_analyst_agent",
|
"sql_analyst_agent",
|
||||||
|
|
@ -11,3 +14,21 @@ __all__ = [
|
||||||
"sql_quality_agent",
|
"sql_quality_agent",
|
||||||
"conservative_analysis_agent",
|
"conservative_analysis_agent",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
_AGENT_MODULES = {
|
||||||
|
"sql_analyst_agent": "sql_optimizer_team.agents.sql_analyst_agent",
|
||||||
|
"sql_optimizer_agent": "sql_optimizer_team.agents.sql_optimizer_agent",
|
||||||
|
"sql_quality_agent": "sql_optimizer_team.agents.sql_quality_agent",
|
||||||
|
"conservative_analysis_agent": "sql_optimizer_team.agents.conservative_analysis_agent",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def __getattr__(name: str) -> Any:
|
||||||
|
if name in _AGENT_MODULES:
|
||||||
|
module = importlib.import_module(_AGENT_MODULES[name])
|
||||||
|
return getattr(module, name)
|
||||||
|
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
||||||
|
|
||||||
|
|
||||||
|
def __dir__() -> list[str]:
|
||||||
|
return sorted(list(globals().keys()) + list(__all__))
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
from agno.agent import Agent
|
from agno.agent import Agent
|
||||||
from agno.db.sqlite import SqliteDb
|
from agno.db.sqlite import SqliteDb
|
||||||
from sql_optimizer_team.tools.engine.model_selector import get_model
|
from sql_optimizer_team.tools.engine.model_selector import get_model
|
||||||
from sql_optimizer_team.agents.prompts import AgentPromptTemplates
|
|
||||||
from sql_optimizer_team.tools.prompt_tools import supported_databases
|
from sql_optimizer_team.tools.prompt_tools import supported_databases
|
||||||
from sql_optimizer_team.tools.sql_tools import load_sql_from_file, ensure_non_empty
|
from sql_optimizer_team.tools.sql_tools import load_sql_from_file, ensure_non_empty
|
||||||
import os
|
import os
|
||||||
|
|
@ -31,7 +30,52 @@ conservative_analysis_agent = Agent(
|
||||||
"- Solicite banco e SQL se não estiverem presentes.",
|
"- Solicite banco e SQL se não estiverem presentes.",
|
||||||
"- Se o usuário fornecer um caminho de arquivo, use load_sql_from_file().",
|
"- Se o usuário fornecer um caminho de arquivo, use load_sql_from_file().",
|
||||||
"- Use a template oficial abaixo para a análise conservadora (sem reescrever a SQL).",
|
"- Use a template oficial abaixo para a análise conservadora (sem reescrever a SQL).",
|
||||||
AgentPromptTemplates.CONSERVATIVE_ANALYSIS.template.strip(),
|
"""
|
||||||
|
You are an expert $database_name database analyst and performance specialist.
|
||||||
|
|
||||||
|
Your task is to ANALYZE the SQL query below and provide SUGGESTIONS for improvement.
|
||||||
|
|
||||||
|
⚠️ CRITICAL: You must NOT rewrite or modify the query. Only provide analysis and suggestions.
|
||||||
|
|
||||||
|
$database_name SQL Query:
|
||||||
|
```sql
|
||||||
|
$query
|
||||||
|
```
|
||||||
|
|
||||||
|
Query Complexity Information:
|
||||||
|
- Columns: $column_count
|
||||||
|
- Tables: $table_count
|
||||||
|
- Subqueries: $subquery_count
|
||||||
|
- CASE statements: $case_count
|
||||||
|
- JOINs: $join_count
|
||||||
|
- Complexity Level: $complexity_level
|
||||||
|
|
||||||
|
Provide your analysis in the following structured format:
|
||||||
|
|
||||||
|
## PERFORMANCE ISSUES
|
||||||
|
List each performance issue found, with severity (CRITICAL/HIGH/MEDIUM/LOW):
|
||||||
|
- [SEVERITY] Issue description
|
||||||
|
- [SEVERITY] Issue description
|
||||||
|
|
||||||
|
## SUGGESTED INDEXES
|
||||||
|
List indexes that could improve this query:
|
||||||
|
- CREATE INDEX idx_name ON table(columns) -- Reason
|
||||||
|
|
||||||
|
## OPTIMIZATION SUGGESTIONS
|
||||||
|
List specific suggestions WITHOUT rewriting the query:
|
||||||
|
- Suggestion 1: Description of what could be improved and why
|
||||||
|
- Suggestion 2: Description of what could be improved and why
|
||||||
|
|
||||||
|
## RISK ASSESSMENT
|
||||||
|
- WITH (NOLOCK) usage: [Yes/No] - If yes, explain the risks
|
||||||
|
- Missing WHERE clause: [Yes/No] - If yes, explain the impact
|
||||||
|
- Implicit conversions: [Yes/No] - If yes, list them
|
||||||
|
|
||||||
|
## SUMMARY
|
||||||
|
Brief summary of the most important findings and priority order for addressing them.
|
||||||
|
|
||||||
|
Remember: DO NOT provide a rewritten query. Only analysis and suggestions.
|
||||||
|
""".strip(),
|
||||||
"- NÃO reescreva a SQL em hipótese alguma.",
|
"- NÃO reescreva a SQL em hipótese alguma.",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -1,227 +0,0 @@
|
||||||
"""Prompt templates used by Agno agents.
|
|
||||||
|
|
||||||
This module keeps the canonical prompts alongside the agents to ensure
|
|
||||||
all prompt content is owned and maintained by the agent layer.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from string import Template
|
|
||||||
|
|
||||||
|
|
||||||
class AgentPromptTemplates:
|
|
||||||
"""Collection of prompt templates for SQL optimization agents."""
|
|
||||||
|
|
||||||
SQL_TO_NATURAL = Template("""
|
|
||||||
You are an expert $database_name database analyst and performance specialist. Your task is to translate the SQL query below into a detailed, precise natural-language description that another agent will later use to reconstruct and optimize the query.
|
|
||||||
|
|
||||||
$database_name SQL Query:
|
|
||||||
```sql
|
|
||||||
$query
|
|
||||||
```
|
|
||||||
|
|
||||||
Your explanation must follow these requirements:
|
|
||||||
|
|
||||||
1. **Describe the overall purpose**
|
|
||||||
- Explain clearly what the query is intended to accomplish and why (retrieve data, update rows, aggregate information, validate existence, create structures, etc.).
|
|
||||||
|
|
||||||
2. **List ALL involved database objects**
|
|
||||||
Explicitly list every:
|
|
||||||
- Table
|
|
||||||
- View
|
|
||||||
- CTE (Common Table Expression)
|
|
||||||
- Subquery or derived table
|
|
||||||
- Function
|
|
||||||
- Stored procedure, if referenced
|
|
||||||
- Temporary table
|
|
||||||
- Schema-qualified object
|
|
||||||
Use the exact names as they appear in the query.
|
|
||||||
|
|
||||||
3. **Describe all essential operations**
|
|
||||||
Explicitly state, using exact column names:
|
|
||||||
- Columns retrieved or modified
|
|
||||||
- Join types, join conditions, and which objects participate
|
|
||||||
- Filters and conditions (WHERE, boolean logic, comparisons)
|
|
||||||
- Aggregations (SUM, COUNT, etc.)
|
|
||||||
- Grouping and HAVING clauses
|
|
||||||
- Sorting (ORDER BY)
|
|
||||||
- Window functions
|
|
||||||
- DISTINCT, TOP, LIMIT, OFFSET, pagination
|
|
||||||
- Any $database_name-specific features used$specific_features
|
|
||||||
|
|
||||||
4. **Maintain strict factual accuracy**
|
|
||||||
- Do NOT infer business meaning unless directly implied.
|
|
||||||
- Do NOT rename or paraphrase column names; repeat them exactly.
|
|
||||||
|
|
||||||
5. **Use clear, structured natural language**
|
|
||||||
- Provide a step-by-step explanation that makes every operation and purpose explicit.
|
|
||||||
- The output must be complete enough that the query can be reconstructed.
|
|
||||||
|
|
||||||
6. **⚠️ CRITICAL: Identify Performance Issues**
|
|
||||||
Flag any of these CRITICAL performance problems found in the query:
|
|
||||||
- **NO WHERE CLAUSE** (BE CAREFUL - AVOID FALSE POSITIVES):
|
|
||||||
* ONLY flag if the MAIN/OUTER SELECT has absolutely NO WHERE keyword with filtering conditions
|
|
||||||
* If query HAS 'WHERE' followed by conditions (even old-style JOINs in WHERE), DO NOT flag
|
|
||||||
* Subqueries/EXISTS having WHERE does NOT mean main query has no WHERE
|
|
||||||
* CROSS APPLY/LATERAL with internal WHERE counts as filtered
|
|
||||||
* If truly no WHERE: Flag as CRITICAL (causes FULL TABLE SCAN, no predicate pushdown)
|
|
||||||
- **Non-SARGable patterns**: Functions on indexed columns in WHERE/JOIN (e.g., YEAR(date), UPPER(col))
|
|
||||||
- **Leading wildcards**: LIKE '%value%' patterns that prevent index usage
|
|
||||||
- **Implicit conversions**: Type mismatches in comparisons
|
|
||||||
- **NOLOCK/WITH (NOLOCK) hints**: If query uses WITH (NOLOCK), WITH (nolock), WITH(NOLOCK), (NOLOCK), (nolock) or NOLOCK/nolock (any case) → DO NOT REMOVE, but FLAG as **CRITICAL RISK**: "⚠️ WITH (NOLOCK) reads uncommitted/dirty data - CRITICAL: may cause INCORRECT FINANCIAL VALUES and data inconsistencies in production"
|
|
||||||
$analysis_requirements
|
|
||||||
|
|
||||||
Explanation:
|
|
||||||
""")
|
|
||||||
|
|
||||||
NATURAL_TO_SQL = Template("""
|
|
||||||
You are an expert $database_name SQL developer and query performance specialist.
|
|
||||||
Your task is to write an optimized SQL query based exclusively on the natural-language description provided below.
|
|
||||||
|
|
||||||
Description:
|
|
||||||
$explanation
|
|
||||||
|
|
||||||
⚠️ CRITICAL RULES - READ BEFORE GENERATING SQL:
|
|
||||||
|
|
||||||
1. **PRESERVE ALL BUSINESS LOGIC EXACTLY**
|
|
||||||
- Every CASE WHEN statement must have IDENTICAL conditions and results
|
|
||||||
- Every calculated column must use IDENTICAL formulas
|
|
||||||
- Every subquery must query the SAME tables with SAME filters
|
|
||||||
- Do NOT simplify, merge, or "improve" business logic - even if it looks redundant
|
|
||||||
- If description mentions specific conditions (cd_tp_apolice = 2, etc.), preserve them EXACTLY
|
|
||||||
|
|
||||||
2. **PRESERVE ALL TABLES AND COLUMNS**
|
|
||||||
- Include EVERY table mentioned in the description
|
|
||||||
- Include EVERY column mentioned in the description
|
|
||||||
- Use EXACT column names as described (no renaming)
|
|
||||||
- Use EXACT table aliases as described
|
|
||||||
|
|
||||||
3. **Translate the full described logic into SQL**
|
|
||||||
- Implement all actions, operations, filters, joins, and conditions exactly as stated.
|
|
||||||
- Use every object and column referenced in the description, using their exact names.
|
|
||||||
- If the description mentions specific filter values (e.g., cd_tipo_endosso = 0), use those EXACT values
|
|
||||||
|
|
||||||
4. **Write optimized SQL while preserving semantics**
|
|
||||||
- Apply $database_name best practices for performance.
|
|
||||||
- Use indexing-aware filtering, efficient join strategies, and clear expressions.
|
|
||||||
- Implement aggregations, groupings, window functions, or pagination when described.
|
|
||||||
- Prefer performant constructs commonly recommended for $database_name workloads.
|
|
||||||
- OPTIMIZATION means structure/hints/indexes - NOT changing logic
|
|
||||||
|
|
||||||
5. **Use $database_name-specific syntax and features**
|
|
||||||
- Apply native functions, operators, optimizer behaviors, or hints when appropriate.
|
|
||||||
- Incorporate $specific_requirements if provided.
|
|
||||||
|
|
||||||
6. **Ensure logical fidelity - ZERO TOLERANCE FOR CHANGES**
|
|
||||||
- The SQL must reflect PRECISELY the behavior described
|
|
||||||
- Do NOT add logic not explicitly stated
|
|
||||||
- Do NOT omit any step described
|
|
||||||
- Do NOT infer or assume details beyond what is explicitly stated
|
|
||||||
- Do NOT "simplify" complex CASE statements
|
|
||||||
- Do NOT merge or combine separate calculated columns
|
|
||||||
|
|
||||||
7. **Self-Verification Checklist** (perform before outputting):
|
|
||||||
- [ ] All tables from description are present in query
|
|
||||||
- [ ] All columns from description are present in SELECT
|
|
||||||
- [ ] All CASE conditions match description exactly
|
|
||||||
- [ ] All subquery filters match description exactly
|
|
||||||
- [ ] All JOIN conditions match description exactly
|
|
||||||
- [ ] No business logic was simplified or changed
|
|
||||||
|
|
||||||
8. **Output format**
|
|
||||||
- Provide ONLY the final, optimized SQL query.
|
|
||||||
- Do NOT include explanations, comments, or extra text.
|
|
||||||
|
|
||||||
Optimized SQL Query:
|
|
||||||
""")
|
|
||||||
|
|
||||||
CONSERVATIVE_ANALYSIS = Template("""
|
|
||||||
You are an expert $database_name database analyst and performance specialist.
|
|
||||||
|
|
||||||
Your task is to ANALYZE the SQL query below and provide SUGGESTIONS for improvement.
|
|
||||||
|
|
||||||
⚠️ CRITICAL: You must NOT rewrite or modify the query. Only provide analysis and suggestions.
|
|
||||||
|
|
||||||
$database_name SQL Query:
|
|
||||||
```sql
|
|
||||||
$query
|
|
||||||
```
|
|
||||||
|
|
||||||
Query Complexity Information:
|
|
||||||
- Columns: $column_count
|
|
||||||
- Tables: $table_count
|
|
||||||
- Subqueries: $subquery_count
|
|
||||||
- CASE statements: $case_count
|
|
||||||
- JOINs: $join_count
|
|
||||||
- Complexity Level: $complexity_level
|
|
||||||
|
|
||||||
Provide your analysis in the following structured format:
|
|
||||||
|
|
||||||
## PERFORMANCE ISSUES
|
|
||||||
List each performance issue found, with severity (CRITICAL/HIGH/MEDIUM/LOW):
|
|
||||||
- [SEVERITY] Issue description
|
|
||||||
- [SEVERITY] Issue description
|
|
||||||
|
|
||||||
## SUGGESTED INDEXES
|
|
||||||
List indexes that could improve this query:
|
|
||||||
- CREATE INDEX idx_name ON table(columns) -- Reason
|
|
||||||
|
|
||||||
## OPTIMIZATION SUGGESTIONS
|
|
||||||
List specific suggestions WITHOUT rewriting the query:
|
|
||||||
- Suggestion 1: Description of what could be improved and why
|
|
||||||
- Suggestion 2: Description of what could be improved and why
|
|
||||||
|
|
||||||
## RISK ASSESSMENT
|
|
||||||
- WITH (NOLOCK) usage: [Yes/No] - If yes, explain the risks
|
|
||||||
- Missing WHERE clause: [Yes/No] - If yes, explain the impact
|
|
||||||
- Implicit conversions: [Yes/No] - If yes, list them
|
|
||||||
|
|
||||||
## SUMMARY
|
|
||||||
Brief summary of the most important findings and priority order for addressing them.
|
|
||||||
|
|
||||||
Remember: DO NOT provide a rewritten query. Only analysis and suggestions.
|
|
||||||
""")
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def render_sql_to_natural(
|
|
||||||
cls, database_name: str, query: str, specific_features: str = "", analysis_requirements: str = ""
|
|
||||||
) -> str:
|
|
||||||
return cls.SQL_TO_NATURAL.substitute(
|
|
||||||
database_name=database_name,
|
|
||||||
query=query,
|
|
||||||
specific_features=f"\n{specific_features}" if specific_features else "",
|
|
||||||
analysis_requirements=f"\n{analysis_requirements}" if analysis_requirements else "",
|
|
||||||
)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def render_natural_to_sql(
|
|
||||||
cls, database_name: str, explanation: str, specific_requirements: str
|
|
||||||
) -> str:
|
|
||||||
return cls.NATURAL_TO_SQL.substitute(
|
|
||||||
database_name=database_name,
|
|
||||||
explanation=explanation,
|
|
||||||
specific_requirements="\n".join(
|
|
||||||
f"- {req}" for req in specific_requirements.split("\n") if req.strip()
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def render_conservative_analysis(
|
|
||||||
cls,
|
|
||||||
database_name: str,
|
|
||||||
query: str,
|
|
||||||
column_count: int = 0,
|
|
||||||
table_count: int = 0,
|
|
||||||
subquery_count: int = 0,
|
|
||||||
case_count: int = 0,
|
|
||||||
join_count: int = 0,
|
|
||||||
complexity_level: str = "unknown",
|
|
||||||
) -> str:
|
|
||||||
return cls.CONSERVATIVE_ANALYSIS.substitute(
|
|
||||||
database_name=database_name,
|
|
||||||
query=query,
|
|
||||||
column_count=column_count,
|
|
||||||
table_count=table_count,
|
|
||||||
subquery_count=subquery_count,
|
|
||||||
case_count=case_count,
|
|
||||||
join_count=join_count,
|
|
||||||
complexity_level=complexity_level,
|
|
||||||
)
|
|
||||||
|
|
@ -2,7 +2,6 @@ from agno.agent import Agent
|
||||||
from agno.db.sqlite import SqliteDb
|
from agno.db.sqlite import SqliteDb
|
||||||
from sql_optimizer_team.tools.engine.model_selector import get_model
|
from sql_optimizer_team.tools.engine.model_selector import get_model
|
||||||
from sql_optimizer_team.tools.core_tools import explain_query_core
|
from sql_optimizer_team.tools.core_tools import explain_query_core
|
||||||
from sql_optimizer_team.agents.prompts import AgentPromptTemplates
|
|
||||||
from sql_optimizer_team.tools.prompt_tools import supported_databases
|
from sql_optimizer_team.tools.prompt_tools import supported_databases
|
||||||
from sql_optimizer_team.tools.sql_tools import load_sql_from_file, ensure_non_empty
|
from sql_optimizer_team.tools.sql_tools import load_sql_from_file, ensure_non_empty
|
||||||
import os
|
import os
|
||||||
|
|
@ -33,7 +32,67 @@ sql_analyst_agent = Agent(
|
||||||
"- Se o usuário fornecer um caminho de arquivo, use load_sql_from_file().",
|
"- Se o usuário fornecer um caminho de arquivo, use load_sql_from_file().",
|
||||||
"- Preferência: use explain_query_core(database_type, sql) para gerar a explicação via core de negócio.",
|
"- Preferência: use explain_query_core(database_type, sql) para gerar a explicação via core de negócio.",
|
||||||
"- Use a template oficial abaixo para estruturar a explicação (SQL → natural).",
|
"- Use a template oficial abaixo para estruturar a explicação (SQL → natural).",
|
||||||
AgentPromptTemplates.SQL_TO_NATURAL.template.strip(),
|
"""
|
||||||
|
You are an expert $database_name database analyst and performance specialist. Your task is to translate the SQL query below into a detailed, precise natural-language description that another agent will later use to reconstruct and optimize the query.
|
||||||
|
|
||||||
|
$database_name SQL Query:
|
||||||
|
```sql
|
||||||
|
$query
|
||||||
|
```
|
||||||
|
|
||||||
|
Your explanation must follow these requirements:
|
||||||
|
|
||||||
|
1. **Describe the overall purpose**
|
||||||
|
- Explain clearly what the query is intended to accomplish and why (retrieve data, update rows, aggregate information, validate existence, create structures, etc.).
|
||||||
|
|
||||||
|
2. **List ALL involved database objects**
|
||||||
|
Explicitly list every:
|
||||||
|
- Table
|
||||||
|
- View
|
||||||
|
- CTE (Common Table Expression)
|
||||||
|
- Subquery or derived table
|
||||||
|
- Function
|
||||||
|
- Stored procedure, if referenced
|
||||||
|
- Temporary table
|
||||||
|
- Schema-qualified object
|
||||||
|
Use the exact names as they appear in the query.
|
||||||
|
|
||||||
|
3. **Describe all essential operations**
|
||||||
|
Explicitly state, using exact column names:
|
||||||
|
- Columns retrieved or modified
|
||||||
|
- Join types, join conditions, and which objects participate
|
||||||
|
- Filters and conditions (WHERE, boolean logic, comparisons)
|
||||||
|
- Aggregations (SUM, COUNT, etc.)
|
||||||
|
- Grouping and HAVING clauses
|
||||||
|
- Sorting (ORDER BY)
|
||||||
|
- Window functions
|
||||||
|
- DISTINCT, TOP, LIMIT, OFFSET, pagination
|
||||||
|
- Any $database_name-specific features used$specific_features
|
||||||
|
|
||||||
|
4. **Maintain strict factual accuracy**
|
||||||
|
- Do NOT infer business meaning unless directly implied.
|
||||||
|
- Do NOT rename or paraphrase column names; repeat them exactly.
|
||||||
|
|
||||||
|
5. **Use clear, structured natural language**
|
||||||
|
- Provide a step-by-step explanation that makes every operation and purpose explicit.
|
||||||
|
- The output must be complete enough that the query can be reconstructed.
|
||||||
|
|
||||||
|
6. **⚠️ CRITICAL: Identify Performance Issues**
|
||||||
|
Flag any of these CRITICAL performance problems found in the query:
|
||||||
|
- **NO WHERE CLAUSE** (BE CAREFUL - AVOID FALSE POSITIVES):
|
||||||
|
* ONLY flag if the MAIN/OUTER SELECT has absolutely NO WHERE keyword with filtering conditions
|
||||||
|
* If query HAS 'WHERE' followed by conditions (even old-style JOINs in WHERE), DO NOT flag
|
||||||
|
* Subqueries/EXISTS having WHERE does NOT mean main query has no WHERE
|
||||||
|
* CROSS APPLY/LATERAL with internal WHERE counts as filtered
|
||||||
|
* If truly no WHERE: Flag as CRITICAL (causes FULL TABLE SCAN, no predicate pushdown)
|
||||||
|
- **Non-SARGable patterns**: Functions on indexed columns in WHERE/JOIN (e.g., YEAR(date), UPPER(col))
|
||||||
|
- **Leading wildcards**: LIKE '%value%' patterns that prevent index usage
|
||||||
|
- **Implicit conversions**: Type mismatches in comparisons
|
||||||
|
- **NOLOCK/WITH (NOLOCK) hints**: If query uses WITH (NOLOCK), WITH (nolock), WITH(NOLOCK), (NOLOCK), (nolock) or NOLOCK/nolock (any case) → DO NOT REMOVE, but FLAG as **CRITICAL RISK**: "⚠️ WITH (NOLOCK) reads uncommitted/dirty data - CRITICAL: may cause INCORRECT FINANCIAL VALUES and data inconsistencies in production"
|
||||||
|
$analysis_requirements
|
||||||
|
|
||||||
|
Explanation:
|
||||||
|
""".strip(),
|
||||||
"- Entregue apenas a explicação natural estruturada conforme a prompt; não reescreva a SQL.",
|
"- Entregue apenas a explicação natural estruturada conforme a prompt; não reescreva a SQL.",
|
||||||
"- Identifique problemas críticos de performance conforme a prompt.",
|
"- Identifique problemas críticos de performance conforme a prompt.",
|
||||||
],
|
],
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,6 @@ from agno.agent import Agent
|
||||||
from agno.db.sqlite import SqliteDb
|
from agno.db.sqlite import SqliteDb
|
||||||
from sql_optimizer_team.tools.engine.model_selector import get_model
|
from sql_optimizer_team.tools.engine.model_selector import get_model
|
||||||
from sql_optimizer_team.tools.core_tools import optimize_query_core
|
from sql_optimizer_team.tools.core_tools import optimize_query_core
|
||||||
from sql_optimizer_team.agents.prompts import AgentPromptTemplates
|
|
||||||
from sql_optimizer_team.tools.prompt_tools import supported_databases
|
from sql_optimizer_team.tools.prompt_tools import supported_databases
|
||||||
from sql_optimizer_team.tools.sql_tools import load_sql_from_file, ensure_non_empty
|
from sql_optimizer_team.tools.sql_tools import load_sql_from_file, ensure_non_empty
|
||||||
import os
|
import os
|
||||||
|
|
@ -32,7 +31,66 @@ sql_optimizer_agent = Agent(
|
||||||
"- Exija banco alvo e SQL antes de otimizar.",
|
"- Exija banco alvo e SQL antes de otimizar.",
|
||||||
"- Use optimize_query_core(database_type, sql) para executar o core de negócio.",
|
"- Use optimize_query_core(database_type, sql) para executar o core de negócio.",
|
||||||
"- Use a template oficial abaixo para reescrever (natural → SQL) mantendo 100% da lógica.",
|
"- Use a template oficial abaixo para reescrever (natural → SQL) mantendo 100% da lógica.",
|
||||||
AgentPromptTemplates.NATURAL_TO_SQL.template.strip(),
|
"""
|
||||||
|
You are an expert $database_name SQL developer and query performance specialist.
|
||||||
|
Your task is to write an optimized SQL query based exclusively on the natural-language description provided below.
|
||||||
|
|
||||||
|
Description:
|
||||||
|
$explanation
|
||||||
|
|
||||||
|
⚠️ CRITICAL RULES - READ BEFORE GENERATING SQL:
|
||||||
|
|
||||||
|
1. **PRESERVE ALL BUSINESS LOGIC EXACTLY**
|
||||||
|
- Every CASE WHEN statement must have IDENTICAL conditions and results
|
||||||
|
- Every calculated column must use IDENTICAL formulas
|
||||||
|
- Every subquery must query the SAME tables with SAME filters
|
||||||
|
- Do NOT simplify, merge, or "improve" business logic - even if it looks redundant
|
||||||
|
- If description mentions specific conditions (cd_tp_apolice = 2, etc.), preserve them EXACTLY
|
||||||
|
|
||||||
|
2. **PRESERVE ALL TABLES AND COLUMNS**
|
||||||
|
- Include EVERY table mentioned in the description
|
||||||
|
- Include EVERY column mentioned in the description
|
||||||
|
- Use EXACT column names as described (no renaming)
|
||||||
|
- Use EXACT table aliases as described
|
||||||
|
|
||||||
|
3. **Translate the full described logic into SQL**
|
||||||
|
- Implement all actions, operations, filters, joins, and conditions exactly as stated.
|
||||||
|
- Use every object and column referenced in the description, using their exact names.
|
||||||
|
- If the description mentions specific filter values (e.g., cd_tipo_endosso = 0), use those EXACT values
|
||||||
|
|
||||||
|
4. **Write optimized SQL while preserving semantics**
|
||||||
|
- Apply $database_name best practices for performance.
|
||||||
|
- Use indexing-aware filtering, efficient join strategies, and clear expressions.
|
||||||
|
- Implement aggregations, groupings, window functions, or pagination when described.
|
||||||
|
- Prefer performant constructs commonly recommended for $database_name workloads.
|
||||||
|
- OPTIMIZATION means structure/hints/indexes - NOT changing logic
|
||||||
|
|
||||||
|
5. **Use $database_name-specific syntax and features**
|
||||||
|
- Apply native functions, operators, optimizer behaviors, or hints when appropriate.
|
||||||
|
- Incorporate $specific_requirements if provided.
|
||||||
|
|
||||||
|
6. **Ensure logical fidelity - ZERO TOLERANCE FOR CHANGES**
|
||||||
|
- The SQL must reflect PRECISELY the behavior described
|
||||||
|
- Do NOT add logic not explicitly stated
|
||||||
|
- Do NOT omit any step described
|
||||||
|
- Do NOT infer or assume details beyond what is explicitly stated
|
||||||
|
- Do NOT "simplify" complex CASE statements
|
||||||
|
- Do NOT merge or combine separate calculated columns
|
||||||
|
|
||||||
|
7. **Self-Verification Checklist** (perform before outputting):
|
||||||
|
- [ ] All tables from description are present in query
|
||||||
|
- [ ] All columns from description are present in SELECT
|
||||||
|
- [ ] All CASE conditions match description exactly
|
||||||
|
- [ ] All subquery filters match description exactly
|
||||||
|
- [ ] All JOIN conditions match description exactly
|
||||||
|
- [ ] No business logic was simplified or changed
|
||||||
|
|
||||||
|
8. **Output format**
|
||||||
|
- Provide ONLY the final, optimized SQL query.
|
||||||
|
- Do NOT include explanations, comments, or extra text.
|
||||||
|
|
||||||
|
Optimized SQL Query:
|
||||||
|
""".strip(),
|
||||||
"- Extraia e devolva SOMENTE optimized_query (sem explicações, sem markdown).",
|
"- Extraia e devolva SOMENTE optimized_query (sem explicações, sem markdown).",
|
||||||
"- Preserve 100% da lógica, colunas, aliases, filtros, joins e subqueries.",
|
"- Preserve 100% da lógica, colunas, aliases, filtros, joins e subqueries.",
|
||||||
],
|
],
|
||||||
|
|
|
||||||
|
|
@ -6,8 +6,225 @@ reducing code duplication and ensuring consistency.
|
||||||
|
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
|
from string import Template
|
||||||
|
|
||||||
from sql_optimizer_team.tools.engine.tools_api.prompt_tool import PromptGeneratorTool
|
from sql_optimizer_team.tools.engine.tools_api.prompt_tool import PromptGeneratorTool
|
||||||
from sql_optimizer_team.agents.prompts import AgentPromptTemplates
|
|
||||||
|
|
||||||
|
SQL_TO_NATURAL_TEMPLATE = Template("""
|
||||||
|
You are an expert $database_name database analyst and performance specialist. Your task is to translate the SQL query below into a detailed, precise natural-language description that another agent will later use to reconstruct and optimize the query.
|
||||||
|
|
||||||
|
$database_name SQL Query:
|
||||||
|
```sql
|
||||||
|
$query
|
||||||
|
```
|
||||||
|
|
||||||
|
Your explanation must follow these requirements:
|
||||||
|
|
||||||
|
1. **Describe the overall purpose**
|
||||||
|
- Explain clearly what the query is intended to accomplish and why (retrieve data, update rows, aggregate information, validate existence, create structures, etc.).
|
||||||
|
|
||||||
|
2. **List ALL involved database objects**
|
||||||
|
Explicitly list every:
|
||||||
|
- Table
|
||||||
|
- View
|
||||||
|
- CTE (Common Table Expression)
|
||||||
|
- Subquery or derived table
|
||||||
|
- Function
|
||||||
|
- Stored procedure, if referenced
|
||||||
|
- Temporary table
|
||||||
|
- Schema-qualified object
|
||||||
|
Use the exact names as they appear in the query.
|
||||||
|
|
||||||
|
3. **Describe all essential operations**
|
||||||
|
Explicitly state, using exact column names:
|
||||||
|
- Columns retrieved or modified
|
||||||
|
- Join types, join conditions, and which objects participate
|
||||||
|
- Filters and conditions (WHERE, boolean logic, comparisons)
|
||||||
|
- Aggregations (SUM, COUNT, etc.)
|
||||||
|
- Grouping and HAVING clauses
|
||||||
|
- Sorting (ORDER BY)
|
||||||
|
- Window functions
|
||||||
|
- DISTINCT, TOP, LIMIT, OFFSET, pagination
|
||||||
|
- Any $database_name-specific features used$specific_features
|
||||||
|
|
||||||
|
4. **Maintain strict factual accuracy**
|
||||||
|
- Do NOT infer business meaning unless directly implied.
|
||||||
|
- Do NOT rename or paraphrase column names; repeat them exactly.
|
||||||
|
|
||||||
|
5. **Use clear, structured natural language**
|
||||||
|
- Provide a step-by-step explanation that makes every operation and purpose explicit.
|
||||||
|
- The output must be complete enough that the query can be reconstructed.
|
||||||
|
|
||||||
|
6. **⚠️ CRITICAL: Identify Performance Issues**
|
||||||
|
Flag any of these CRITICAL performance problems found in the query:
|
||||||
|
- **NO WHERE CLAUSE** (BE CAREFUL - AVOID FALSE POSITIVES):
|
||||||
|
* ONLY flag if the MAIN/OUTER SELECT has absolutely NO WHERE keyword with filtering conditions
|
||||||
|
* If query HAS 'WHERE' followed by conditions (even old-style JOINs in WHERE), DO NOT flag
|
||||||
|
* Subqueries/EXISTS having WHERE does NOT mean main query has no WHERE
|
||||||
|
* CROSS APPLY/LATERAL with internal WHERE counts as filtered
|
||||||
|
* If truly no WHERE: Flag as CRITICAL (causes FULL TABLE SCAN, no predicate pushdown)
|
||||||
|
- **Non-SARGable patterns**: Functions on indexed columns in WHERE/JOIN (e.g., YEAR(date), UPPER(col))
|
||||||
|
- **Leading wildcards**: LIKE '%value%' patterns that prevent index usage
|
||||||
|
- **Implicit conversions**: Type mismatches in comparisons
|
||||||
|
- **NOLOCK/WITH (NOLOCK) hints**: If query uses WITH (NOLOCK), WITH (nolock), WITH(NOLOCK), (NOLOCK), (nolock) or NOLOCK/nolock (any case) → DO NOT REMOVE, but FLAG as **CRITICAL RISK**: "⚠️ WITH (NOLOCK) reads uncommitted/dirty data - CRITICAL: may cause INCORRECT FINANCIAL VALUES and data inconsistencies in production"
|
||||||
|
$analysis_requirements
|
||||||
|
|
||||||
|
Explanation:
|
||||||
|
""")
|
||||||
|
|
||||||
|
NATURAL_TO_SQL_TEMPLATE = Template("""
|
||||||
|
You are an expert $database_name SQL developer and query performance specialist.
|
||||||
|
Your task is to write an optimized SQL query based exclusively on the natural-language description provided below.
|
||||||
|
|
||||||
|
Description:
|
||||||
|
$explanation
|
||||||
|
|
||||||
|
⚠️ CRITICAL RULES - READ BEFORE GENERATING SQL:
|
||||||
|
|
||||||
|
1. **PRESERVE ALL BUSINESS LOGIC EXACTLY**
|
||||||
|
- Every CASE WHEN statement must have IDENTICAL conditions and results
|
||||||
|
- Every calculated column must use IDENTICAL formulas
|
||||||
|
- Every subquery must query the SAME tables with SAME filters
|
||||||
|
- Do NOT simplify, merge, or "improve" business logic - even if it looks redundant
|
||||||
|
- If description mentions specific conditions (cd_tp_apolice = 2, etc.), preserve them EXACTLY
|
||||||
|
|
||||||
|
2. **PRESERVE ALL TABLES AND COLUMNS**
|
||||||
|
- Include EVERY table mentioned in the description
|
||||||
|
- Include EVERY column mentioned in the description
|
||||||
|
- Use EXACT column names as described (no renaming)
|
||||||
|
- Use EXACT table aliases as described
|
||||||
|
|
||||||
|
3. **Translate the full described logic into SQL**
|
||||||
|
- Implement all actions, operations, filters, joins, and conditions exactly as stated.
|
||||||
|
- Use every object and column referenced in the description, using their exact names.
|
||||||
|
- If the description mentions specific filter values (e.g., cd_tipo_endosso = 0), use those EXACT values
|
||||||
|
|
||||||
|
4. **Write optimized SQL while preserving semantics**
|
||||||
|
- Apply $database_name best practices for performance.
|
||||||
|
- Use indexing-aware filtering, efficient join strategies, and clear expressions.
|
||||||
|
- Implement aggregations, groupings, window functions, or pagination when described.
|
||||||
|
- Prefer performant constructs commonly recommended for $database_name workloads.
|
||||||
|
- OPTIMIZATION means structure/hints/indexes - NOT changing logic
|
||||||
|
|
||||||
|
5. **Use $database_name-specific syntax and features**
|
||||||
|
- Apply native functions, operators, optimizer behaviors, or hints when appropriate.
|
||||||
|
- Incorporate $specific_requirements if provided.
|
||||||
|
|
||||||
|
6. **Ensure logical fidelity - ZERO TOLERANCE FOR CHANGES**
|
||||||
|
- The SQL must reflect PRECISELY the behavior described
|
||||||
|
- Do NOT add logic not explicitly stated
|
||||||
|
- Do NOT omit any step described
|
||||||
|
- Do NOT infer or assume details beyond what is explicitly stated
|
||||||
|
- Do NOT "simplify" complex CASE statements
|
||||||
|
- Do NOT merge or combine separate calculated columns
|
||||||
|
|
||||||
|
7. **Self-Verification Checklist** (perform before outputting):
|
||||||
|
- [ ] All tables from description are present in query
|
||||||
|
- [ ] All columns from description are present in SELECT
|
||||||
|
- [ ] All CASE conditions match description exactly
|
||||||
|
- [ ] All subquery filters match description exactly
|
||||||
|
- [ ] All JOIN conditions match description exactly
|
||||||
|
- [ ] No business logic was simplified or changed
|
||||||
|
|
||||||
|
8. **Output format**
|
||||||
|
- Provide ONLY the final, optimized SQL query.
|
||||||
|
- Do NOT include explanations, comments, or extra text.
|
||||||
|
|
||||||
|
Optimized SQL Query:
|
||||||
|
""")
|
||||||
|
|
||||||
|
CONSERVATIVE_ANALYSIS_TEMPLATE = Template("""
|
||||||
|
You are an expert $database_name database analyst and performance specialist.
|
||||||
|
|
||||||
|
Your task is to ANALYZE the SQL query below and provide SUGGESTIONS for improvement.
|
||||||
|
|
||||||
|
⚠️ CRITICAL: You must NOT rewrite or modify the query. Only provide analysis and suggestions.
|
||||||
|
|
||||||
|
$database_name SQL Query:
|
||||||
|
```sql
|
||||||
|
$query
|
||||||
|
```
|
||||||
|
|
||||||
|
Query Complexity Information:
|
||||||
|
- Columns: $column_count
|
||||||
|
- Tables: $table_count
|
||||||
|
- Subqueries: $subquery_count
|
||||||
|
- CASE statements: $case_count
|
||||||
|
- JOINs: $join_count
|
||||||
|
- Complexity Level: $complexity_level
|
||||||
|
|
||||||
|
Provide your analysis in the following structured format:
|
||||||
|
|
||||||
|
## PERFORMANCE ISSUES
|
||||||
|
List each performance issue found, with severity (CRITICAL/HIGH/MEDIUM/LOW):
|
||||||
|
- [SEVERITY] Issue description
|
||||||
|
- [SEVERITY] Issue description
|
||||||
|
|
||||||
|
## SUGGESTED INDEXES
|
||||||
|
List indexes that could improve this query:
|
||||||
|
- CREATE INDEX idx_name ON table(columns) -- Reason
|
||||||
|
|
||||||
|
## OPTIMIZATION SUGGESTIONS
|
||||||
|
List specific suggestions WITHOUT rewriting the query:
|
||||||
|
- Suggestion 1: Description of what could be improved and why
|
||||||
|
- Suggestion 2: Description of what could be improved and why
|
||||||
|
|
||||||
|
## RISK ASSESSMENT
|
||||||
|
- WITH (NOLOCK) usage: [Yes/No] - If yes, explain the risks
|
||||||
|
- Missing WHERE clause: [Yes/No] - If yes, explain the impact
|
||||||
|
- Implicit conversions: [Yes/No] - If yes, list them
|
||||||
|
|
||||||
|
## SUMMARY
|
||||||
|
Brief summary of the most important findings and priority order for addressing them.
|
||||||
|
|
||||||
|
Remember: DO NOT provide a rewritten query. Only analysis and suggestions.
|
||||||
|
""")
|
||||||
|
|
||||||
|
|
||||||
|
def _render_sql_to_natural(
|
||||||
|
database_name: str, query: str, specific_features: str = "", analysis_requirements: str = ""
|
||||||
|
) -> str:
|
||||||
|
return SQL_TO_NATURAL_TEMPLATE.substitute(
|
||||||
|
database_name=database_name,
|
||||||
|
query=query,
|
||||||
|
specific_features=f"\n{specific_features}" if specific_features else "",
|
||||||
|
analysis_requirements=f"\n{analysis_requirements}" if analysis_requirements else "",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _render_natural_to_sql(
|
||||||
|
database_name: str, explanation: str, specific_requirements: str
|
||||||
|
) -> str:
|
||||||
|
return NATURAL_TO_SQL_TEMPLATE.substitute(
|
||||||
|
database_name=database_name,
|
||||||
|
explanation=explanation,
|
||||||
|
specific_requirements="\n".join(
|
||||||
|
f"- {req}" for req in specific_requirements.split("\n") if req.strip()
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _render_conservative_analysis(
|
||||||
|
database_name: str,
|
||||||
|
query: str,
|
||||||
|
column_count: int = 0,
|
||||||
|
table_count: int = 0,
|
||||||
|
subquery_count: int = 0,
|
||||||
|
case_count: int = 0,
|
||||||
|
join_count: int = 0,
|
||||||
|
complexity_level: str = "unknown",
|
||||||
|
) -> str:
|
||||||
|
return CONSERVATIVE_ANALYSIS_TEMPLATE.substitute(
|
||||||
|
database_name=database_name,
|
||||||
|
query=query,
|
||||||
|
column_count=column_count,
|
||||||
|
table_count=table_count,
|
||||||
|
subquery_count=subquery_count,
|
||||||
|
case_count=case_count,
|
||||||
|
join_count=join_count,
|
||||||
|
complexity_level=complexity_level,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class BasePromptGenerator(PromptGeneratorTool, ABC):
|
class BasePromptGenerator(PromptGeneratorTool, ABC):
|
||||||
|
|
@ -70,7 +287,7 @@ class BasePromptGenerator(PromptGeneratorTool, ABC):
|
||||||
Returns:
|
Returns:
|
||||||
Formatted prompt for LLM
|
Formatted prompt for LLM
|
||||||
"""
|
"""
|
||||||
return AgentPromptTemplates.render_sql_to_natural(
|
return _render_sql_to_natural(
|
||||||
database_name=self.get_database_name(),
|
database_name=self.get_database_name(),
|
||||||
query=query,
|
query=query,
|
||||||
specific_features=self.get_specific_features(),
|
specific_features=self.get_specific_features(),
|
||||||
|
|
@ -86,7 +303,7 @@ class BasePromptGenerator(PromptGeneratorTool, ABC):
|
||||||
Returns:
|
Returns:
|
||||||
Formatted prompt for LLM
|
Formatted prompt for LLM
|
||||||
"""
|
"""
|
||||||
return AgentPromptTemplates.render_natural_to_sql(
|
return _render_natural_to_sql(
|
||||||
database_name=self.get_database_name(),
|
database_name=self.get_database_name(),
|
||||||
explanation=explanation,
|
explanation=explanation,
|
||||||
specific_requirements=self.get_specific_requirements(),
|
specific_requirements=self.get_specific_requirements(),
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue