refactor: Remove AgentPromptTemplates and integrate prompt templates directly into agents for improved clarity and maintainability
This commit is contained in:
parent
ae33ee5bca
commit
c6dd91810b
6 changed files with 414 additions and 242 deletions
|
|
@ -1,9 +1,12 @@
|
|||
"""SQL optimizer team agents."""
|
||||
"""SQL optimizer team agents.
|
||||
|
||||
from sql_optimizer_team.agents.sql_analyst_agent import sql_analyst_agent
|
||||
from sql_optimizer_team.agents.sql_optimizer_agent import sql_optimizer_agent
|
||||
from sql_optimizer_team.agents.sql_quality_agent import sql_quality_agent
|
||||
from sql_optimizer_team.agents.conservative_analysis_agent import conservative_analysis_agent
|
||||
This package uses lazy imports to avoid circular dependencies with tools.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
from typing import Any
|
||||
|
||||
__all__ = [
|
||||
"sql_analyst_agent",
|
||||
|
|
@ -11,3 +14,21 @@ __all__ = [
|
|||
"sql_quality_agent",
|
||||
"conservative_analysis_agent",
|
||||
]
|
||||
|
||||
_AGENT_MODULES = {
|
||||
"sql_analyst_agent": "sql_optimizer_team.agents.sql_analyst_agent",
|
||||
"sql_optimizer_agent": "sql_optimizer_team.agents.sql_optimizer_agent",
|
||||
"sql_quality_agent": "sql_optimizer_team.agents.sql_quality_agent",
|
||||
"conservative_analysis_agent": "sql_optimizer_team.agents.conservative_analysis_agent",
|
||||
}
|
||||
|
||||
|
||||
def __getattr__(name: str) -> Any:
|
||||
if name in _AGENT_MODULES:
|
||||
module = importlib.import_module(_AGENT_MODULES[name])
|
||||
return getattr(module, name)
|
||||
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
||||
|
||||
|
||||
def __dir__() -> list[str]:
|
||||
return sorted(list(globals().keys()) + list(__all__))
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
from agno.agent import Agent
|
||||
from agno.db.sqlite import SqliteDb
|
||||
from sql_optimizer_team.tools.engine.model_selector import get_model
|
||||
from sql_optimizer_team.agents.prompts import AgentPromptTemplates
|
||||
from sql_optimizer_team.tools.prompt_tools import supported_databases
|
||||
from sql_optimizer_team.tools.sql_tools import load_sql_from_file, ensure_non_empty
|
||||
import os
|
||||
|
|
@ -31,7 +30,52 @@ conservative_analysis_agent = Agent(
|
|||
"- Solicite banco e SQL se não estiverem presentes.",
|
||||
"- Se o usuário fornecer um caminho de arquivo, use load_sql_from_file().",
|
||||
"- Use a template oficial abaixo para a análise conservadora (sem reescrever a SQL).",
|
||||
AgentPromptTemplates.CONSERVATIVE_ANALYSIS.template.strip(),
|
||||
"""
|
||||
You are an expert $database_name database analyst and performance specialist.
|
||||
|
||||
Your task is to ANALYZE the SQL query below and provide SUGGESTIONS for improvement.
|
||||
|
||||
⚠️ CRITICAL: You must NOT rewrite or modify the query. Only provide analysis and suggestions.
|
||||
|
||||
$database_name SQL Query:
|
||||
```sql
|
||||
$query
|
||||
```
|
||||
|
||||
Query Complexity Information:
|
||||
- Columns: $column_count
|
||||
- Tables: $table_count
|
||||
- Subqueries: $subquery_count
|
||||
- CASE statements: $case_count
|
||||
- JOINs: $join_count
|
||||
- Complexity Level: $complexity_level
|
||||
|
||||
Provide your analysis in the following structured format:
|
||||
|
||||
## PERFORMANCE ISSUES
|
||||
List each performance issue found, with severity (CRITICAL/HIGH/MEDIUM/LOW):
|
||||
- [SEVERITY] Issue description
|
||||
- [SEVERITY] Issue description
|
||||
|
||||
## SUGGESTED INDEXES
|
||||
List indexes that could improve this query:
|
||||
- CREATE INDEX idx_name ON table(columns) -- Reason
|
||||
|
||||
## OPTIMIZATION SUGGESTIONS
|
||||
List specific suggestions WITHOUT rewriting the query:
|
||||
- Suggestion 1: Description of what could be improved and why
|
||||
- Suggestion 2: Description of what could be improved and why
|
||||
|
||||
## RISK ASSESSMENT
|
||||
- WITH (NOLOCK) usage: [Yes/No] - If yes, explain the risks
|
||||
- Missing WHERE clause: [Yes/No] - If yes, explain the impact
|
||||
- Implicit conversions: [Yes/No] - If yes, list them
|
||||
|
||||
## SUMMARY
|
||||
Brief summary of the most important findings and priority order for addressing them.
|
||||
|
||||
Remember: DO NOT provide a rewritten query. Only analysis and suggestions.
|
||||
""".strip(),
|
||||
"- NÃO reescreva a SQL em hipótese alguma.",
|
||||
],
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,227 +0,0 @@
|
|||
"""Prompt templates used by Agno agents.
|
||||
|
||||
This module keeps the canonical prompts alongside the agents to ensure
|
||||
all prompt content is owned and maintained by the agent layer.
|
||||
"""
|
||||
|
||||
from string import Template
|
||||
|
||||
|
||||
class AgentPromptTemplates:
|
||||
"""Collection of prompt templates for SQL optimization agents."""
|
||||
|
||||
SQL_TO_NATURAL = Template("""
|
||||
You are an expert $database_name database analyst and performance specialist. Your task is to translate the SQL query below into a detailed, precise natural-language description that another agent will later use to reconstruct and optimize the query.
|
||||
|
||||
$database_name SQL Query:
|
||||
```sql
|
||||
$query
|
||||
```
|
||||
|
||||
Your explanation must follow these requirements:
|
||||
|
||||
1. **Describe the overall purpose**
|
||||
- Explain clearly what the query is intended to accomplish and why (retrieve data, update rows, aggregate information, validate existence, create structures, etc.).
|
||||
|
||||
2. **List ALL involved database objects**
|
||||
Explicitly list every:
|
||||
- Table
|
||||
- View
|
||||
- CTE (Common Table Expression)
|
||||
- Subquery or derived table
|
||||
- Function
|
||||
- Stored procedure, if referenced
|
||||
- Temporary table
|
||||
- Schema-qualified object
|
||||
Use the exact names as they appear in the query.
|
||||
|
||||
3. **Describe all essential operations**
|
||||
Explicitly state, using exact column names:
|
||||
- Columns retrieved or modified
|
||||
- Join types, join conditions, and which objects participate
|
||||
- Filters and conditions (WHERE, boolean logic, comparisons)
|
||||
- Aggregations (SUM, COUNT, etc.)
|
||||
- Grouping and HAVING clauses
|
||||
- Sorting (ORDER BY)
|
||||
- Window functions
|
||||
- DISTINCT, TOP, LIMIT, OFFSET, pagination
|
||||
- Any $database_name-specific features used$specific_features
|
||||
|
||||
4. **Maintain strict factual accuracy**
|
||||
- Do NOT infer business meaning unless directly implied.
|
||||
- Do NOT rename or paraphrase column names; repeat them exactly.
|
||||
|
||||
5. **Use clear, structured natural language**
|
||||
- Provide a step-by-step explanation that makes every operation and purpose explicit.
|
||||
- The output must be complete enough that the query can be reconstructed.
|
||||
|
||||
6. **⚠️ CRITICAL: Identify Performance Issues**
|
||||
Flag any of these CRITICAL performance problems found in the query:
|
||||
- **NO WHERE CLAUSE** (BE CAREFUL - AVOID FALSE POSITIVES):
|
||||
* ONLY flag if the MAIN/OUTER SELECT has absolutely NO WHERE keyword with filtering conditions
|
||||
* If query HAS 'WHERE' followed by conditions (even old-style JOINs in WHERE), DO NOT flag
|
||||
* Subqueries/EXISTS having WHERE does NOT mean main query has no WHERE
|
||||
* CROSS APPLY/LATERAL with internal WHERE counts as filtered
|
||||
* If truly no WHERE: Flag as CRITICAL (causes FULL TABLE SCAN, no predicate pushdown)
|
||||
- **Non-SARGable patterns**: Functions on indexed columns in WHERE/JOIN (e.g., YEAR(date), UPPER(col))
|
||||
- **Leading wildcards**: LIKE '%value%' patterns that prevent index usage
|
||||
- **Implicit conversions**: Type mismatches in comparisons
|
||||
- **NOLOCK/WITH (NOLOCK) hints**: If query uses WITH (NOLOCK), WITH (nolock), WITH(NOLOCK), (NOLOCK), (nolock) or NOLOCK/nolock (any case) → DO NOT REMOVE, but FLAG as **CRITICAL RISK**: "⚠️ WITH (NOLOCK) reads uncommitted/dirty data - CRITICAL: may cause INCORRECT FINANCIAL VALUES and data inconsistencies in production"
|
||||
$analysis_requirements
|
||||
|
||||
Explanation:
|
||||
""")
|
||||
|
||||
NATURAL_TO_SQL = Template("""
|
||||
You are an expert $database_name SQL developer and query performance specialist.
|
||||
Your task is to write an optimized SQL query based exclusively on the natural-language description provided below.
|
||||
|
||||
Description:
|
||||
$explanation
|
||||
|
||||
⚠️ CRITICAL RULES - READ BEFORE GENERATING SQL:
|
||||
|
||||
1. **PRESERVE ALL BUSINESS LOGIC EXACTLY**
|
||||
- Every CASE WHEN statement must have IDENTICAL conditions and results
|
||||
- Every calculated column must use IDENTICAL formulas
|
||||
- Every subquery must query the SAME tables with SAME filters
|
||||
- Do NOT simplify, merge, or "improve" business logic - even if it looks redundant
|
||||
- If description mentions specific conditions (cd_tp_apolice = 2, etc.), preserve them EXACTLY
|
||||
|
||||
2. **PRESERVE ALL TABLES AND COLUMNS**
|
||||
- Include EVERY table mentioned in the description
|
||||
- Include EVERY column mentioned in the description
|
||||
- Use EXACT column names as described (no renaming)
|
||||
- Use EXACT table aliases as described
|
||||
|
||||
3. **Translate the full described logic into SQL**
|
||||
- Implement all actions, operations, filters, joins, and conditions exactly as stated.
|
||||
- Use every object and column referenced in the description, using their exact names.
|
||||
- If the description mentions specific filter values (e.g., cd_tipo_endosso = 0), use those EXACT values
|
||||
|
||||
4. **Write optimized SQL while preserving semantics**
|
||||
- Apply $database_name best practices for performance.
|
||||
- Use indexing-aware filtering, efficient join strategies, and clear expressions.
|
||||
- Implement aggregations, groupings, window functions, or pagination when described.
|
||||
- Prefer performant constructs commonly recommended for $database_name workloads.
|
||||
- OPTIMIZATION means structure/hints/indexes - NOT changing logic
|
||||
|
||||
5. **Use $database_name-specific syntax and features**
|
||||
- Apply native functions, operators, optimizer behaviors, or hints when appropriate.
|
||||
- Incorporate $specific_requirements if provided.
|
||||
|
||||
6. **Ensure logical fidelity - ZERO TOLERANCE FOR CHANGES**
|
||||
- The SQL must reflect PRECISELY the behavior described
|
||||
- Do NOT add logic not explicitly stated
|
||||
- Do NOT omit any step described
|
||||
- Do NOT infer or assume details beyond what is explicitly stated
|
||||
- Do NOT "simplify" complex CASE statements
|
||||
- Do NOT merge or combine separate calculated columns
|
||||
|
||||
7. **Self-Verification Checklist** (perform before outputting):
|
||||
- [ ] All tables from description are present in query
|
||||
- [ ] All columns from description are present in SELECT
|
||||
- [ ] All CASE conditions match description exactly
|
||||
- [ ] All subquery filters match description exactly
|
||||
- [ ] All JOIN conditions match description exactly
|
||||
- [ ] No business logic was simplified or changed
|
||||
|
||||
8. **Output format**
|
||||
- Provide ONLY the final, optimized SQL query.
|
||||
- Do NOT include explanations, comments, or extra text.
|
||||
|
||||
Optimized SQL Query:
|
||||
""")
|
||||
|
||||
CONSERVATIVE_ANALYSIS = Template("""
|
||||
You are an expert $database_name database analyst and performance specialist.
|
||||
|
||||
Your task is to ANALYZE the SQL query below and provide SUGGESTIONS for improvement.
|
||||
|
||||
⚠️ CRITICAL: You must NOT rewrite or modify the query. Only provide analysis and suggestions.
|
||||
|
||||
$database_name SQL Query:
|
||||
```sql
|
||||
$query
|
||||
```
|
||||
|
||||
Query Complexity Information:
|
||||
- Columns: $column_count
|
||||
- Tables: $table_count
|
||||
- Subqueries: $subquery_count
|
||||
- CASE statements: $case_count
|
||||
- JOINs: $join_count
|
||||
- Complexity Level: $complexity_level
|
||||
|
||||
Provide your analysis in the following structured format:
|
||||
|
||||
## PERFORMANCE ISSUES
|
||||
List each performance issue found, with severity (CRITICAL/HIGH/MEDIUM/LOW):
|
||||
- [SEVERITY] Issue description
|
||||
- [SEVERITY] Issue description
|
||||
|
||||
## SUGGESTED INDEXES
|
||||
List indexes that could improve this query:
|
||||
- CREATE INDEX idx_name ON table(columns) -- Reason
|
||||
|
||||
## OPTIMIZATION SUGGESTIONS
|
||||
List specific suggestions WITHOUT rewriting the query:
|
||||
- Suggestion 1: Description of what could be improved and why
|
||||
- Suggestion 2: Description of what could be improved and why
|
||||
|
||||
## RISK ASSESSMENT
|
||||
- WITH (NOLOCK) usage: [Yes/No] - If yes, explain the risks
|
||||
- Missing WHERE clause: [Yes/No] - If yes, explain the impact
|
||||
- Implicit conversions: [Yes/No] - If yes, list them
|
||||
|
||||
## SUMMARY
|
||||
Brief summary of the most important findings and priority order for addressing them.
|
||||
|
||||
Remember: DO NOT provide a rewritten query. Only analysis and suggestions.
|
||||
""")
|
||||
|
||||
@classmethod
|
||||
def render_sql_to_natural(
|
||||
cls, database_name: str, query: str, specific_features: str = "", analysis_requirements: str = ""
|
||||
) -> str:
|
||||
return cls.SQL_TO_NATURAL.substitute(
|
||||
database_name=database_name,
|
||||
query=query,
|
||||
specific_features=f"\n{specific_features}" if specific_features else "",
|
||||
analysis_requirements=f"\n{analysis_requirements}" if analysis_requirements else "",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def render_natural_to_sql(
|
||||
cls, database_name: str, explanation: str, specific_requirements: str
|
||||
) -> str:
|
||||
return cls.NATURAL_TO_SQL.substitute(
|
||||
database_name=database_name,
|
||||
explanation=explanation,
|
||||
specific_requirements="\n".join(
|
||||
f"- {req}" for req in specific_requirements.split("\n") if req.strip()
|
||||
),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def render_conservative_analysis(
|
||||
cls,
|
||||
database_name: str,
|
||||
query: str,
|
||||
column_count: int = 0,
|
||||
table_count: int = 0,
|
||||
subquery_count: int = 0,
|
||||
case_count: int = 0,
|
||||
join_count: int = 0,
|
||||
complexity_level: str = "unknown",
|
||||
) -> str:
|
||||
return cls.CONSERVATIVE_ANALYSIS.substitute(
|
||||
database_name=database_name,
|
||||
query=query,
|
||||
column_count=column_count,
|
||||
table_count=table_count,
|
||||
subquery_count=subquery_count,
|
||||
case_count=case_count,
|
||||
join_count=join_count,
|
||||
complexity_level=complexity_level,
|
||||
)
|
||||
|
|
@ -2,7 +2,6 @@ from agno.agent import Agent
|
|||
from agno.db.sqlite import SqliteDb
|
||||
from sql_optimizer_team.tools.engine.model_selector import get_model
|
||||
from sql_optimizer_team.tools.core_tools import explain_query_core
|
||||
from sql_optimizer_team.agents.prompts import AgentPromptTemplates
|
||||
from sql_optimizer_team.tools.prompt_tools import supported_databases
|
||||
from sql_optimizer_team.tools.sql_tools import load_sql_from_file, ensure_non_empty
|
||||
import os
|
||||
|
|
@ -32,8 +31,68 @@ sql_analyst_agent = Agent(
|
|||
"- Solicite banco e SQL se não estiverem presentes. Bancos suportados: use supported_databases().",
|
||||
"- Se o usuário fornecer um caminho de arquivo, use load_sql_from_file().",
|
||||
"- Preferência: use explain_query_core(database_type, sql) para gerar a explicação via core de negócio.",
|
||||
"- Use a template oficial abaixo para estruturar a explicação (SQL → natural).",
|
||||
AgentPromptTemplates.SQL_TO_NATURAL.template.strip(),
|
||||
"- Use a template oficial abaixo para estruturar a explicação (SQL → natural).",
|
||||
"""
|
||||
You are an expert $database_name database analyst and performance specialist. Your task is to translate the SQL query below into a detailed, precise natural-language description that another agent will later use to reconstruct and optimize the query.
|
||||
|
||||
$database_name SQL Query:
|
||||
```sql
|
||||
$query
|
||||
```
|
||||
|
||||
Your explanation must follow these requirements:
|
||||
|
||||
1. **Describe the overall purpose**
|
||||
- Explain clearly what the query is intended to accomplish and why (retrieve data, update rows, aggregate information, validate existence, create structures, etc.).
|
||||
|
||||
2. **List ALL involved database objects**
|
||||
Explicitly list every:
|
||||
- Table
|
||||
- View
|
||||
- CTE (Common Table Expression)
|
||||
- Subquery or derived table
|
||||
- Function
|
||||
- Stored procedure, if referenced
|
||||
- Temporary table
|
||||
- Schema-qualified object
|
||||
Use the exact names as they appear in the query.
|
||||
|
||||
3. **Describe all essential operations**
|
||||
Explicitly state, using exact column names:
|
||||
- Columns retrieved or modified
|
||||
- Join types, join conditions, and which objects participate
|
||||
- Filters and conditions (WHERE, boolean logic, comparisons)
|
||||
- Aggregations (SUM, COUNT, etc.)
|
||||
- Grouping and HAVING clauses
|
||||
- Sorting (ORDER BY)
|
||||
- Window functions
|
||||
- DISTINCT, TOP, LIMIT, OFFSET, pagination
|
||||
- Any $database_name-specific features used$specific_features
|
||||
|
||||
4. **Maintain strict factual accuracy**
|
||||
- Do NOT infer business meaning unless directly implied.
|
||||
- Do NOT rename or paraphrase column names; repeat them exactly.
|
||||
|
||||
5. **Use clear, structured natural language**
|
||||
- Provide a step-by-step explanation that makes every operation and purpose explicit.
|
||||
- The output must be complete enough that the query can be reconstructed.
|
||||
|
||||
6. **⚠️ CRITICAL: Identify Performance Issues**
|
||||
Flag any of these CRITICAL performance problems found in the query:
|
||||
- **NO WHERE CLAUSE** (BE CAREFUL - AVOID FALSE POSITIVES):
|
||||
* ONLY flag if the MAIN/OUTER SELECT has absolutely NO WHERE keyword with filtering conditions
|
||||
* If query HAS 'WHERE' followed by conditions (even old-style JOINs in WHERE), DO NOT flag
|
||||
* Subqueries/EXISTS having WHERE does NOT mean main query has no WHERE
|
||||
* CROSS APPLY/LATERAL with internal WHERE counts as filtered
|
||||
* If truly no WHERE: Flag as CRITICAL (causes FULL TABLE SCAN, no predicate pushdown)
|
||||
- **Non-SARGable patterns**: Functions on indexed columns in WHERE/JOIN (e.g., YEAR(date), UPPER(col))
|
||||
- **Leading wildcards**: LIKE '%value%' patterns that prevent index usage
|
||||
- **Implicit conversions**: Type mismatches in comparisons
|
||||
- **NOLOCK/WITH (NOLOCK) hints**: If query uses WITH (NOLOCK), WITH (nolock), WITH(NOLOCK), (NOLOCK), (nolock) or NOLOCK/nolock (any case) → DO NOT REMOVE, but FLAG as **CRITICAL RISK**: "⚠️ WITH (NOLOCK) reads uncommitted/dirty data - CRITICAL: may cause INCORRECT FINANCIAL VALUES and data inconsistencies in production"
|
||||
$analysis_requirements
|
||||
|
||||
Explanation:
|
||||
""".strip(),
|
||||
"- Entregue apenas a explicação natural estruturada conforme a prompt; não reescreva a SQL.",
|
||||
"- Identifique problemas críticos de performance conforme a prompt.",
|
||||
],
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ from agno.agent import Agent
|
|||
from agno.db.sqlite import SqliteDb
|
||||
from sql_optimizer_team.tools.engine.model_selector import get_model
|
||||
from sql_optimizer_team.tools.core_tools import optimize_query_core
|
||||
from sql_optimizer_team.agents.prompts import AgentPromptTemplates
|
||||
from sql_optimizer_team.tools.prompt_tools import supported_databases
|
||||
from sql_optimizer_team.tools.sql_tools import load_sql_from_file, ensure_non_empty
|
||||
import os
|
||||
|
|
@ -32,7 +31,66 @@ sql_optimizer_agent = Agent(
|
|||
"- Exija banco alvo e SQL antes de otimizar.",
|
||||
"- Use optimize_query_core(database_type, sql) para executar o core de negócio.",
|
||||
"- Use a template oficial abaixo para reescrever (natural → SQL) mantendo 100% da lógica.",
|
||||
AgentPromptTemplates.NATURAL_TO_SQL.template.strip(),
|
||||
"""
|
||||
You are an expert $database_name SQL developer and query performance specialist.
|
||||
Your task is to write an optimized SQL query based exclusively on the natural-language description provided below.
|
||||
|
||||
Description:
|
||||
$explanation
|
||||
|
||||
⚠️ CRITICAL RULES - READ BEFORE GENERATING SQL:
|
||||
|
||||
1. **PRESERVE ALL BUSINESS LOGIC EXACTLY**
|
||||
- Every CASE WHEN statement must have IDENTICAL conditions and results
|
||||
- Every calculated column must use IDENTICAL formulas
|
||||
- Every subquery must query the SAME tables with SAME filters
|
||||
- Do NOT simplify, merge, or "improve" business logic - even if it looks redundant
|
||||
- If description mentions specific conditions (cd_tp_apolice = 2, etc.), preserve them EXACTLY
|
||||
|
||||
2. **PRESERVE ALL TABLES AND COLUMNS**
|
||||
- Include EVERY table mentioned in the description
|
||||
- Include EVERY column mentioned in the description
|
||||
- Use EXACT column names as described (no renaming)
|
||||
- Use EXACT table aliases as described
|
||||
|
||||
3. **Translate the full described logic into SQL**
|
||||
- Implement all actions, operations, filters, joins, and conditions exactly as stated.
|
||||
- Use every object and column referenced in the description, using their exact names.
|
||||
- If the description mentions specific filter values (e.g., cd_tipo_endosso = 0), use those EXACT values
|
||||
|
||||
4. **Write optimized SQL while preserving semantics**
|
||||
- Apply $database_name best practices for performance.
|
||||
- Use indexing-aware filtering, efficient join strategies, and clear expressions.
|
||||
- Implement aggregations, groupings, window functions, or pagination when described.
|
||||
- Prefer performant constructs commonly recommended for $database_name workloads.
|
||||
- OPTIMIZATION means structure/hints/indexes - NOT changing logic
|
||||
|
||||
5. **Use $database_name-specific syntax and features**
|
||||
- Apply native functions, operators, optimizer behaviors, or hints when appropriate.
|
||||
- Incorporate $specific_requirements if provided.
|
||||
|
||||
6. **Ensure logical fidelity - ZERO TOLERANCE FOR CHANGES**
|
||||
- The SQL must reflect PRECISELY the behavior described
|
||||
- Do NOT add logic not explicitly stated
|
||||
- Do NOT omit any step described
|
||||
- Do NOT infer or assume details beyond what is explicitly stated
|
||||
- Do NOT "simplify" complex CASE statements
|
||||
- Do NOT merge or combine separate calculated columns
|
||||
|
||||
7. **Self-Verification Checklist** (perform before outputting):
|
||||
- [ ] All tables from description are present in query
|
||||
- [ ] All columns from description are present in SELECT
|
||||
- [ ] All CASE conditions match description exactly
|
||||
- [ ] All subquery filters match description exactly
|
||||
- [ ] All JOIN conditions match description exactly
|
||||
- [ ] No business logic was simplified or changed
|
||||
|
||||
8. **Output format**
|
||||
- Provide ONLY the final, optimized SQL query.
|
||||
- Do NOT include explanations, comments, or extra text.
|
||||
|
||||
Optimized SQL Query:
|
||||
""".strip(),
|
||||
"- Extraia e devolva SOMENTE optimized_query (sem explicações, sem markdown).",
|
||||
"- Preserve 100% da lógica, colunas, aliases, filtros, joins e subqueries.",
|
||||
],
|
||||
|
|
|
|||
|
|
@ -6,8 +6,225 @@ reducing code duplication and ensuring consistency.
|
|||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from string import Template
|
||||
|
||||
from sql_optimizer_team.tools.engine.tools_api.prompt_tool import PromptGeneratorTool
|
||||
from sql_optimizer_team.agents.prompts import AgentPromptTemplates
|
||||
|
||||
|
||||
SQL_TO_NATURAL_TEMPLATE = Template("""
|
||||
You are an expert $database_name database analyst and performance specialist. Your task is to translate the SQL query below into a detailed, precise natural-language description that another agent will later use to reconstruct and optimize the query.
|
||||
|
||||
$database_name SQL Query:
|
||||
```sql
|
||||
$query
|
||||
```
|
||||
|
||||
Your explanation must follow these requirements:
|
||||
|
||||
1. **Describe the overall purpose**
|
||||
- Explain clearly what the query is intended to accomplish and why (retrieve data, update rows, aggregate information, validate existence, create structures, etc.).
|
||||
|
||||
2. **List ALL involved database objects**
|
||||
Explicitly list every:
|
||||
- Table
|
||||
- View
|
||||
- CTE (Common Table Expression)
|
||||
- Subquery or derived table
|
||||
- Function
|
||||
- Stored procedure, if referenced
|
||||
- Temporary table
|
||||
- Schema-qualified object
|
||||
Use the exact names as they appear in the query.
|
||||
|
||||
3. **Describe all essential operations**
|
||||
Explicitly state, using exact column names:
|
||||
- Columns retrieved or modified
|
||||
- Join types, join conditions, and which objects participate
|
||||
- Filters and conditions (WHERE, boolean logic, comparisons)
|
||||
- Aggregations (SUM, COUNT, etc.)
|
||||
- Grouping and HAVING clauses
|
||||
- Sorting (ORDER BY)
|
||||
- Window functions
|
||||
- DISTINCT, TOP, LIMIT, OFFSET, pagination
|
||||
- Any $database_name-specific features used$specific_features
|
||||
|
||||
4. **Maintain strict factual accuracy**
|
||||
- Do NOT infer business meaning unless directly implied.
|
||||
- Do NOT rename or paraphrase column names; repeat them exactly.
|
||||
|
||||
5. **Use clear, structured natural language**
|
||||
- Provide a step-by-step explanation that makes every operation and purpose explicit.
|
||||
- The output must be complete enough that the query can be reconstructed.
|
||||
|
||||
6. **⚠️ CRITICAL: Identify Performance Issues**
|
||||
Flag any of these CRITICAL performance problems found in the query:
|
||||
- **NO WHERE CLAUSE** (BE CAREFUL - AVOID FALSE POSITIVES):
|
||||
* ONLY flag if the MAIN/OUTER SELECT has absolutely NO WHERE keyword with filtering conditions
|
||||
* If query HAS 'WHERE' followed by conditions (even old-style JOINs in WHERE), DO NOT flag
|
||||
* Subqueries/EXISTS having WHERE does NOT mean main query has no WHERE
|
||||
* CROSS APPLY/LATERAL with internal WHERE counts as filtered
|
||||
* If truly no WHERE: Flag as CRITICAL (causes FULL TABLE SCAN, no predicate pushdown)
|
||||
- **Non-SARGable patterns**: Functions on indexed columns in WHERE/JOIN (e.g., YEAR(date), UPPER(col))
|
||||
- **Leading wildcards**: LIKE '%value%' patterns that prevent index usage
|
||||
- **Implicit conversions**: Type mismatches in comparisons
|
||||
- **NOLOCK/WITH (NOLOCK) hints**: If query uses WITH (NOLOCK), WITH (nolock), WITH(NOLOCK), (NOLOCK), (nolock) or NOLOCK/nolock (any case) → DO NOT REMOVE, but FLAG as **CRITICAL RISK**: "⚠️ WITH (NOLOCK) reads uncommitted/dirty data - CRITICAL: may cause INCORRECT FINANCIAL VALUES and data inconsistencies in production"
|
||||
$analysis_requirements
|
||||
|
||||
Explanation:
|
||||
""")
|
||||
|
||||
NATURAL_TO_SQL_TEMPLATE = Template("""
|
||||
You are an expert $database_name SQL developer and query performance specialist.
|
||||
Your task is to write an optimized SQL query based exclusively on the natural-language description provided below.
|
||||
|
||||
Description:
|
||||
$explanation
|
||||
|
||||
⚠️ CRITICAL RULES - READ BEFORE GENERATING SQL:
|
||||
|
||||
1. **PRESERVE ALL BUSINESS LOGIC EXACTLY**
|
||||
- Every CASE WHEN statement must have IDENTICAL conditions and results
|
||||
- Every calculated column must use IDENTICAL formulas
|
||||
- Every subquery must query the SAME tables with SAME filters
|
||||
- Do NOT simplify, merge, or "improve" business logic - even if it looks redundant
|
||||
- If description mentions specific conditions (cd_tp_apolice = 2, etc.), preserve them EXACTLY
|
||||
|
||||
2. **PRESERVE ALL TABLES AND COLUMNS**
|
||||
- Include EVERY table mentioned in the description
|
||||
- Include EVERY column mentioned in the description
|
||||
- Use EXACT column names as described (no renaming)
|
||||
- Use EXACT table aliases as described
|
||||
|
||||
3. **Translate the full described logic into SQL**
|
||||
- Implement all actions, operations, filters, joins, and conditions exactly as stated.
|
||||
- Use every object and column referenced in the description, using their exact names.
|
||||
- If the description mentions specific filter values (e.g., cd_tipo_endosso = 0), use those EXACT values
|
||||
|
||||
4. **Write optimized SQL while preserving semantics**
|
||||
- Apply $database_name best practices for performance.
|
||||
- Use indexing-aware filtering, efficient join strategies, and clear expressions.
|
||||
- Implement aggregations, groupings, window functions, or pagination when described.
|
||||
- Prefer performant constructs commonly recommended for $database_name workloads.
|
||||
- OPTIMIZATION means structure/hints/indexes - NOT changing logic
|
||||
|
||||
5. **Use $database_name-specific syntax and features**
|
||||
- Apply native functions, operators, optimizer behaviors, or hints when appropriate.
|
||||
- Incorporate $specific_requirements if provided.
|
||||
|
||||
6. **Ensure logical fidelity - ZERO TOLERANCE FOR CHANGES**
|
||||
- The SQL must reflect PRECISELY the behavior described
|
||||
- Do NOT add logic not explicitly stated
|
||||
- Do NOT omit any step described
|
||||
- Do NOT infer or assume details beyond what is explicitly stated
|
||||
- Do NOT "simplify" complex CASE statements
|
||||
- Do NOT merge or combine separate calculated columns
|
||||
|
||||
7. **Self-Verification Checklist** (perform before outputting):
|
||||
- [ ] All tables from description are present in query
|
||||
- [ ] All columns from description are present in SELECT
|
||||
- [ ] All CASE conditions match description exactly
|
||||
- [ ] All subquery filters match description exactly
|
||||
- [ ] All JOIN conditions match description exactly
|
||||
- [ ] No business logic was simplified or changed
|
||||
|
||||
8. **Output format**
|
||||
- Provide ONLY the final, optimized SQL query.
|
||||
- Do NOT include explanations, comments, or extra text.
|
||||
|
||||
Optimized SQL Query:
|
||||
""")
|
||||
|
||||
CONSERVATIVE_ANALYSIS_TEMPLATE = Template("""
|
||||
You are an expert $database_name database analyst and performance specialist.
|
||||
|
||||
Your task is to ANALYZE the SQL query below and provide SUGGESTIONS for improvement.
|
||||
|
||||
⚠️ CRITICAL: You must NOT rewrite or modify the query. Only provide analysis and suggestions.
|
||||
|
||||
$database_name SQL Query:
|
||||
```sql
|
||||
$query
|
||||
```
|
||||
|
||||
Query Complexity Information:
|
||||
- Columns: $column_count
|
||||
- Tables: $table_count
|
||||
- Subqueries: $subquery_count
|
||||
- CASE statements: $case_count
|
||||
- JOINs: $join_count
|
||||
- Complexity Level: $complexity_level
|
||||
|
||||
Provide your analysis in the following structured format:
|
||||
|
||||
## PERFORMANCE ISSUES
|
||||
List each performance issue found, with severity (CRITICAL/HIGH/MEDIUM/LOW):
|
||||
- [SEVERITY] Issue description
|
||||
- [SEVERITY] Issue description
|
||||
|
||||
## SUGGESTED INDEXES
|
||||
List indexes that could improve this query:
|
||||
- CREATE INDEX idx_name ON table(columns) -- Reason
|
||||
|
||||
## OPTIMIZATION SUGGESTIONS
|
||||
List specific suggestions WITHOUT rewriting the query:
|
||||
- Suggestion 1: Description of what could be improved and why
|
||||
- Suggestion 2: Description of what could be improved and why
|
||||
|
||||
## RISK ASSESSMENT
|
||||
- WITH (NOLOCK) usage: [Yes/No] - If yes, explain the risks
|
||||
- Missing WHERE clause: [Yes/No] - If yes, explain the impact
|
||||
- Implicit conversions: [Yes/No] - If yes, list them
|
||||
|
||||
## SUMMARY
|
||||
Brief summary of the most important findings and priority order for addressing them.
|
||||
|
||||
Remember: DO NOT provide a rewritten query. Only analysis and suggestions.
|
||||
""")
|
||||
|
||||
|
||||
def _render_sql_to_natural(
|
||||
database_name: str, query: str, specific_features: str = "", analysis_requirements: str = ""
|
||||
) -> str:
|
||||
return SQL_TO_NATURAL_TEMPLATE.substitute(
|
||||
database_name=database_name,
|
||||
query=query,
|
||||
specific_features=f"\n{specific_features}" if specific_features else "",
|
||||
analysis_requirements=f"\n{analysis_requirements}" if analysis_requirements else "",
|
||||
)
|
||||
|
||||
|
||||
def _render_natural_to_sql(
|
||||
database_name: str, explanation: str, specific_requirements: str
|
||||
) -> str:
|
||||
return NATURAL_TO_SQL_TEMPLATE.substitute(
|
||||
database_name=database_name,
|
||||
explanation=explanation,
|
||||
specific_requirements="\n".join(
|
||||
f"- {req}" for req in specific_requirements.split("\n") if req.strip()
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _render_conservative_analysis(
|
||||
database_name: str,
|
||||
query: str,
|
||||
column_count: int = 0,
|
||||
table_count: int = 0,
|
||||
subquery_count: int = 0,
|
||||
case_count: int = 0,
|
||||
join_count: int = 0,
|
||||
complexity_level: str = "unknown",
|
||||
) -> str:
|
||||
return CONSERVATIVE_ANALYSIS_TEMPLATE.substitute(
|
||||
database_name=database_name,
|
||||
query=query,
|
||||
column_count=column_count,
|
||||
table_count=table_count,
|
||||
subquery_count=subquery_count,
|
||||
case_count=case_count,
|
||||
join_count=join_count,
|
||||
complexity_level=complexity_level,
|
||||
)
|
||||
|
||||
|
||||
class BasePromptGenerator(PromptGeneratorTool, ABC):
|
||||
|
|
@ -70,7 +287,7 @@ class BasePromptGenerator(PromptGeneratorTool, ABC):
|
|||
Returns:
|
||||
Formatted prompt for LLM
|
||||
"""
|
||||
return AgentPromptTemplates.render_sql_to_natural(
|
||||
return _render_sql_to_natural(
|
||||
database_name=self.get_database_name(),
|
||||
query=query,
|
||||
specific_features=self.get_specific_features(),
|
||||
|
|
@ -86,7 +303,7 @@ class BasePromptGenerator(PromptGeneratorTool, ABC):
|
|||
Returns:
|
||||
Formatted prompt for LLM
|
||||
"""
|
||||
return AgentPromptTemplates.render_natural_to_sql(
|
||||
return _render_natural_to_sql(
|
||||
database_name=self.get_database_name(),
|
||||
explanation=explanation,
|
||||
specific_requirements=self.get_specific_requirements(),
|
||||
|
|
|
|||
Loading…
Reference in a new issue