refactor: Remove AgentPromptTemplates and integrate prompt templates directly into agents for improved clarity and maintainability

This commit is contained in:
william.dias 2026-01-23 10:36:55 -03:00
parent ae33ee5bca
commit c6dd91810b
6 changed files with 414 additions and 242 deletions

View file

@ -1,9 +1,12 @@
"""SQL optimizer team agents."""
"""SQL optimizer team agents.
from sql_optimizer_team.agents.sql_analyst_agent import sql_analyst_agent
from sql_optimizer_team.agents.sql_optimizer_agent import sql_optimizer_agent
from sql_optimizer_team.agents.sql_quality_agent import sql_quality_agent
from sql_optimizer_team.agents.conservative_analysis_agent import conservative_analysis_agent
This package uses lazy imports to avoid circular dependencies with tools.
"""
from __future__ import annotations
import importlib
from typing import Any
__all__ = [
"sql_analyst_agent",
@ -11,3 +14,21 @@ __all__ = [
"sql_quality_agent",
"conservative_analysis_agent",
]
_AGENT_MODULES = {
"sql_analyst_agent": "sql_optimizer_team.agents.sql_analyst_agent",
"sql_optimizer_agent": "sql_optimizer_team.agents.sql_optimizer_agent",
"sql_quality_agent": "sql_optimizer_team.agents.sql_quality_agent",
"conservative_analysis_agent": "sql_optimizer_team.agents.conservative_analysis_agent",
}
def __getattr__(name: str) -> Any:
if name in _AGENT_MODULES:
module = importlib.import_module(_AGENT_MODULES[name])
return getattr(module, name)
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
def __dir__() -> list[str]:
return sorted(list(globals().keys()) + list(__all__))

View file

@ -1,7 +1,6 @@
from agno.agent import Agent
from agno.db.sqlite import SqliteDb
from sql_optimizer_team.tools.engine.model_selector import get_model
from sql_optimizer_team.agents.prompts import AgentPromptTemplates
from sql_optimizer_team.tools.prompt_tools import supported_databases
from sql_optimizer_team.tools.sql_tools import load_sql_from_file, ensure_non_empty
import os
@ -31,7 +30,52 @@ conservative_analysis_agent = Agent(
"- Solicite banco e SQL se não estiverem presentes.",
"- Se o usuário fornecer um caminho de arquivo, use load_sql_from_file().",
"- Use a template oficial abaixo para a análise conservadora (sem reescrever a SQL).",
AgentPromptTemplates.CONSERVATIVE_ANALYSIS.template.strip(),
"""
You are an expert $database_name database analyst and performance specialist.
Your task is to ANALYZE the SQL query below and provide SUGGESTIONS for improvement.
CRITICAL: You must NOT rewrite or modify the query. Only provide analysis and suggestions.
$database_name SQL Query:
```sql
$query
```
Query Complexity Information:
- Columns: $column_count
- Tables: $table_count
- Subqueries: $subquery_count
- CASE statements: $case_count
- JOINs: $join_count
- Complexity Level: $complexity_level
Provide your analysis in the following structured format:
## PERFORMANCE ISSUES
List each performance issue found, with severity (CRITICAL/HIGH/MEDIUM/LOW):
- [SEVERITY] Issue description
- [SEVERITY] Issue description
## SUGGESTED INDEXES
List indexes that could improve this query:
- CREATE INDEX idx_name ON table(columns) -- Reason
## OPTIMIZATION SUGGESTIONS
List specific suggestions WITHOUT rewriting the query:
- Suggestion 1: Description of what could be improved and why
- Suggestion 2: Description of what could be improved and why
## RISK ASSESSMENT
- WITH (NOLOCK) usage: [Yes/No] - If yes, explain the risks
- Missing WHERE clause: [Yes/No] - If yes, explain the impact
- Implicit conversions: [Yes/No] - If yes, list them
## SUMMARY
Brief summary of the most important findings and priority order for addressing them.
Remember: DO NOT provide a rewritten query. Only analysis and suggestions.
""".strip(),
"- NÃO reescreva a SQL em hipótese alguma.",
],
)

View file

@ -1,227 +0,0 @@
"""Prompt templates used by Agno agents.
This module keeps the canonical prompts alongside the agents to ensure
all prompt content is owned and maintained by the agent layer.
"""
from string import Template
class AgentPromptTemplates:
"""Collection of prompt templates for SQL optimization agents."""
SQL_TO_NATURAL = Template("""
You are an expert $database_name database analyst and performance specialist. Your task is to translate the SQL query below into a detailed, precise natural-language description that another agent will later use to reconstruct and optimize the query.
$database_name SQL Query:
```sql
$query
```
Your explanation must follow these requirements:
1. **Describe the overall purpose**
- Explain clearly what the query is intended to accomplish and why (retrieve data, update rows, aggregate information, validate existence, create structures, etc.).
2. **List ALL involved database objects**
Explicitly list every:
- Table
- View
- CTE (Common Table Expression)
- Subquery or derived table
- Function
- Stored procedure, if referenced
- Temporary table
- Schema-qualified object
Use the exact names as they appear in the query.
3. **Describe all essential operations**
Explicitly state, using exact column names:
- Columns retrieved or modified
- Join types, join conditions, and which objects participate
- Filters and conditions (WHERE, boolean logic, comparisons)
- Aggregations (SUM, COUNT, etc.)
- Grouping and HAVING clauses
- Sorting (ORDER BY)
- Window functions
- DISTINCT, TOP, LIMIT, OFFSET, pagination
- Any $database_name-specific features used$specific_features
4. **Maintain strict factual accuracy**
- Do NOT infer business meaning unless directly implied.
- Do NOT rename or paraphrase column names; repeat them exactly.
5. **Use clear, structured natural language**
- Provide a step-by-step explanation that makes every operation and purpose explicit.
- The output must be complete enough that the query can be reconstructed.
6. ** CRITICAL: Identify Performance Issues**
Flag any of these CRITICAL performance problems found in the query:
- **NO WHERE CLAUSE** (BE CAREFUL - AVOID FALSE POSITIVES):
* ONLY flag if the MAIN/OUTER SELECT has absolutely NO WHERE keyword with filtering conditions
* If query HAS 'WHERE' followed by conditions (even old-style JOINs in WHERE), DO NOT flag
* Subqueries/EXISTS having WHERE does NOT mean main query has no WHERE
* CROSS APPLY/LATERAL with internal WHERE counts as filtered
* If truly no WHERE: Flag as CRITICAL (causes FULL TABLE SCAN, no predicate pushdown)
- **Non-SARGable patterns**: Functions on indexed columns in WHERE/JOIN (e.g., YEAR(date), UPPER(col))
- **Leading wildcards**: LIKE '%value%' patterns that prevent index usage
- **Implicit conversions**: Type mismatches in comparisons
- **NOLOCK/WITH (NOLOCK) hints**: If query uses WITH (NOLOCK), WITH (nolock), WITH(NOLOCK), (NOLOCK), (nolock) or NOLOCK/nolock (any case) DO NOT REMOVE, but FLAG as **CRITICAL RISK**: "⚠️ WITH (NOLOCK) reads uncommitted/dirty data - CRITICAL: may cause INCORRECT FINANCIAL VALUES and data inconsistencies in production"
$analysis_requirements
Explanation:
""")
NATURAL_TO_SQL = Template("""
You are an expert $database_name SQL developer and query performance specialist.
Your task is to write an optimized SQL query based exclusively on the natural-language description provided below.
Description:
$explanation
CRITICAL RULES - READ BEFORE GENERATING SQL:
1. **PRESERVE ALL BUSINESS LOGIC EXACTLY**
- Every CASE WHEN statement must have IDENTICAL conditions and results
- Every calculated column must use IDENTICAL formulas
- Every subquery must query the SAME tables with SAME filters
- Do NOT simplify, merge, or "improve" business logic - even if it looks redundant
- If description mentions specific conditions (cd_tp_apolice = 2, etc.), preserve them EXACTLY
2. **PRESERVE ALL TABLES AND COLUMNS**
- Include EVERY table mentioned in the description
- Include EVERY column mentioned in the description
- Use EXACT column names as described (no renaming)
- Use EXACT table aliases as described
3. **Translate the full described logic into SQL**
- Implement all actions, operations, filters, joins, and conditions exactly as stated.
- Use every object and column referenced in the description, using their exact names.
- If the description mentions specific filter values (e.g., cd_tipo_endosso = 0), use those EXACT values
4. **Write optimized SQL while preserving semantics**
- Apply $database_name best practices for performance.
- Use indexing-aware filtering, efficient join strategies, and clear expressions.
- Implement aggregations, groupings, window functions, or pagination when described.
- Prefer performant constructs commonly recommended for $database_name workloads.
- OPTIMIZATION means structure/hints/indexes - NOT changing logic
5. **Use $database_name-specific syntax and features**
- Apply native functions, operators, optimizer behaviors, or hints when appropriate.
- Incorporate $specific_requirements if provided.
6. **Ensure logical fidelity - ZERO TOLERANCE FOR CHANGES**
- The SQL must reflect PRECISELY the behavior described
- Do NOT add logic not explicitly stated
- Do NOT omit any step described
- Do NOT infer or assume details beyond what is explicitly stated
- Do NOT "simplify" complex CASE statements
- Do NOT merge or combine separate calculated columns
7. **Self-Verification Checklist** (perform before outputting):
- [ ] All tables from description are present in query
- [ ] All columns from description are present in SELECT
- [ ] All CASE conditions match description exactly
- [ ] All subquery filters match description exactly
- [ ] All JOIN conditions match description exactly
- [ ] No business logic was simplified or changed
8. **Output format**
- Provide ONLY the final, optimized SQL query.
- Do NOT include explanations, comments, or extra text.
Optimized SQL Query:
""")
CONSERVATIVE_ANALYSIS = Template("""
You are an expert $database_name database analyst and performance specialist.
Your task is to ANALYZE the SQL query below and provide SUGGESTIONS for improvement.
CRITICAL: You must NOT rewrite or modify the query. Only provide analysis and suggestions.
$database_name SQL Query:
```sql
$query
```
Query Complexity Information:
- Columns: $column_count
- Tables: $table_count
- Subqueries: $subquery_count
- CASE statements: $case_count
- JOINs: $join_count
- Complexity Level: $complexity_level
Provide your analysis in the following structured format:
## PERFORMANCE ISSUES
List each performance issue found, with severity (CRITICAL/HIGH/MEDIUM/LOW):
- [SEVERITY] Issue description
- [SEVERITY] Issue description
## SUGGESTED INDEXES
List indexes that could improve this query:
- CREATE INDEX idx_name ON table(columns) -- Reason
## OPTIMIZATION SUGGESTIONS
List specific suggestions WITHOUT rewriting the query:
- Suggestion 1: Description of what could be improved and why
- Suggestion 2: Description of what could be improved and why
## RISK ASSESSMENT
- WITH (NOLOCK) usage: [Yes/No] - If yes, explain the risks
- Missing WHERE clause: [Yes/No] - If yes, explain the impact
- Implicit conversions: [Yes/No] - If yes, list them
## SUMMARY
Brief summary of the most important findings and priority order for addressing them.
Remember: DO NOT provide a rewritten query. Only analysis and suggestions.
""")
@classmethod
def render_sql_to_natural(
cls, database_name: str, query: str, specific_features: str = "", analysis_requirements: str = ""
) -> str:
return cls.SQL_TO_NATURAL.substitute(
database_name=database_name,
query=query,
specific_features=f"\n{specific_features}" if specific_features else "",
analysis_requirements=f"\n{analysis_requirements}" if analysis_requirements else "",
)
@classmethod
def render_natural_to_sql(
cls, database_name: str, explanation: str, specific_requirements: str
) -> str:
return cls.NATURAL_TO_SQL.substitute(
database_name=database_name,
explanation=explanation,
specific_requirements="\n".join(
f"- {req}" for req in specific_requirements.split("\n") if req.strip()
),
)
@classmethod
def render_conservative_analysis(
cls,
database_name: str,
query: str,
column_count: int = 0,
table_count: int = 0,
subquery_count: int = 0,
case_count: int = 0,
join_count: int = 0,
complexity_level: str = "unknown",
) -> str:
return cls.CONSERVATIVE_ANALYSIS.substitute(
database_name=database_name,
query=query,
column_count=column_count,
table_count=table_count,
subquery_count=subquery_count,
case_count=case_count,
join_count=join_count,
complexity_level=complexity_level,
)

View file

@ -2,7 +2,6 @@ from agno.agent import Agent
from agno.db.sqlite import SqliteDb
from sql_optimizer_team.tools.engine.model_selector import get_model
from sql_optimizer_team.tools.core_tools import explain_query_core
from sql_optimizer_team.agents.prompts import AgentPromptTemplates
from sql_optimizer_team.tools.prompt_tools import supported_databases
from sql_optimizer_team.tools.sql_tools import load_sql_from_file, ensure_non_empty
import os
@ -32,8 +31,68 @@ sql_analyst_agent = Agent(
"- Solicite banco e SQL se não estiverem presentes. Bancos suportados: use supported_databases().",
"- Se o usuário fornecer um caminho de arquivo, use load_sql_from_file().",
"- Preferência: use explain_query_core(database_type, sql) para gerar a explicação via core de negócio.",
"- Use a template oficial abaixo para estruturar a explicação (SQL → natural).",
AgentPromptTemplates.SQL_TO_NATURAL.template.strip(),
"- Use a template oficial abaixo para estruturar a explicação (SQL → natural).",
"""
You are an expert $database_name database analyst and performance specialist. Your task is to translate the SQL query below into a detailed, precise natural-language description that another agent will later use to reconstruct and optimize the query.
$database_name SQL Query:
```sql
$query
```
Your explanation must follow these requirements:
1. **Describe the overall purpose**
- Explain clearly what the query is intended to accomplish and why (retrieve data, update rows, aggregate information, validate existence, create structures, etc.).
2. **List ALL involved database objects**
Explicitly list every:
- Table
- View
- CTE (Common Table Expression)
- Subquery or derived table
- Function
- Stored procedure, if referenced
- Temporary table
- Schema-qualified object
Use the exact names as they appear in the query.
3. **Describe all essential operations**
Explicitly state, using exact column names:
- Columns retrieved or modified
- Join types, join conditions, and which objects participate
- Filters and conditions (WHERE, boolean logic, comparisons)
- Aggregations (SUM, COUNT, etc.)
- Grouping and HAVING clauses
- Sorting (ORDER BY)
- Window functions
- DISTINCT, TOP, LIMIT, OFFSET, pagination
- Any $database_name-specific features used$specific_features
4. **Maintain strict factual accuracy**
- Do NOT infer business meaning unless directly implied.
- Do NOT rename or paraphrase column names; repeat them exactly.
5. **Use clear, structured natural language**
- Provide a step-by-step explanation that makes every operation and purpose explicit.
- The output must be complete enough that the query can be reconstructed.
6. ** CRITICAL: Identify Performance Issues**
Flag any of these CRITICAL performance problems found in the query:
- **NO WHERE CLAUSE** (BE CAREFUL - AVOID FALSE POSITIVES):
* ONLY flag if the MAIN/OUTER SELECT has absolutely NO WHERE keyword with filtering conditions
* If query HAS 'WHERE' followed by conditions (even old-style JOINs in WHERE), DO NOT flag
* Subqueries/EXISTS having WHERE does NOT mean main query has no WHERE
* CROSS APPLY/LATERAL with internal WHERE counts as filtered
* If truly no WHERE: Flag as CRITICAL (causes FULL TABLE SCAN, no predicate pushdown)
- **Non-SARGable patterns**: Functions on indexed columns in WHERE/JOIN (e.g., YEAR(date), UPPER(col))
- **Leading wildcards**: LIKE '%value%' patterns that prevent index usage
- **Implicit conversions**: Type mismatches in comparisons
- **NOLOCK/WITH (NOLOCK) hints**: If query uses WITH (NOLOCK), WITH (nolock), WITH(NOLOCK), (NOLOCK), (nolock) or NOLOCK/nolock (any case) DO NOT REMOVE, but FLAG as **CRITICAL RISK**: "⚠️ WITH (NOLOCK) reads uncommitted/dirty data - CRITICAL: may cause INCORRECT FINANCIAL VALUES and data inconsistencies in production"
$analysis_requirements
Explanation:
""".strip(),
"- Entregue apenas a explicação natural estruturada conforme a prompt; não reescreva a SQL.",
"- Identifique problemas críticos de performance conforme a prompt.",
],

View file

@ -2,7 +2,6 @@ from agno.agent import Agent
from agno.db.sqlite import SqliteDb
from sql_optimizer_team.tools.engine.model_selector import get_model
from sql_optimizer_team.tools.core_tools import optimize_query_core
from sql_optimizer_team.agents.prompts import AgentPromptTemplates
from sql_optimizer_team.tools.prompt_tools import supported_databases
from sql_optimizer_team.tools.sql_tools import load_sql_from_file, ensure_non_empty
import os
@ -32,7 +31,66 @@ sql_optimizer_agent = Agent(
"- Exija banco alvo e SQL antes de otimizar.",
"- Use optimize_query_core(database_type, sql) para executar o core de negócio.",
"- Use a template oficial abaixo para reescrever (natural → SQL) mantendo 100% da lógica.",
AgentPromptTemplates.NATURAL_TO_SQL.template.strip(),
"""
You are an expert $database_name SQL developer and query performance specialist.
Your task is to write an optimized SQL query based exclusively on the natural-language description provided below.
Description:
$explanation
CRITICAL RULES - READ BEFORE GENERATING SQL:
1. **PRESERVE ALL BUSINESS LOGIC EXACTLY**
- Every CASE WHEN statement must have IDENTICAL conditions and results
- Every calculated column must use IDENTICAL formulas
- Every subquery must query the SAME tables with SAME filters
- Do NOT simplify, merge, or "improve" business logic - even if it looks redundant
- If description mentions specific conditions (cd_tp_apolice = 2, etc.), preserve them EXACTLY
2. **PRESERVE ALL TABLES AND COLUMNS**
- Include EVERY table mentioned in the description
- Include EVERY column mentioned in the description
- Use EXACT column names as described (no renaming)
- Use EXACT table aliases as described
3. **Translate the full described logic into SQL**
- Implement all actions, operations, filters, joins, and conditions exactly as stated.
- Use every object and column referenced in the description, using their exact names.
- If the description mentions specific filter values (e.g., cd_tipo_endosso = 0), use those EXACT values
4. **Write optimized SQL while preserving semantics**
- Apply $database_name best practices for performance.
- Use indexing-aware filtering, efficient join strategies, and clear expressions.
- Implement aggregations, groupings, window functions, or pagination when described.
- Prefer performant constructs commonly recommended for $database_name workloads.
- OPTIMIZATION means structure/hints/indexes - NOT changing logic
5. **Use $database_name-specific syntax and features**
- Apply native functions, operators, optimizer behaviors, or hints when appropriate.
- Incorporate $specific_requirements if provided.
6. **Ensure logical fidelity - ZERO TOLERANCE FOR CHANGES**
- The SQL must reflect PRECISELY the behavior described
- Do NOT add logic not explicitly stated
- Do NOT omit any step described
- Do NOT infer or assume details beyond what is explicitly stated
- Do NOT "simplify" complex CASE statements
- Do NOT merge or combine separate calculated columns
7. **Self-Verification Checklist** (perform before outputting):
- [ ] All tables from description are present in query
- [ ] All columns from description are present in SELECT
- [ ] All CASE conditions match description exactly
- [ ] All subquery filters match description exactly
- [ ] All JOIN conditions match description exactly
- [ ] No business logic was simplified or changed
8. **Output format**
- Provide ONLY the final, optimized SQL query.
- Do NOT include explanations, comments, or extra text.
Optimized SQL Query:
""".strip(),
"- Extraia e devolva SOMENTE optimized_query (sem explicações, sem markdown).",
"- Preserve 100% da lógica, colunas, aliases, filtros, joins e subqueries.",
],

View file

@ -6,8 +6,225 @@ reducing code duplication and ensuring consistency.
from abc import ABC, abstractmethod
from string import Template
from sql_optimizer_team.tools.engine.tools_api.prompt_tool import PromptGeneratorTool
from sql_optimizer_team.agents.prompts import AgentPromptTemplates
SQL_TO_NATURAL_TEMPLATE = Template("""
You are an expert $database_name database analyst and performance specialist. Your task is to translate the SQL query below into a detailed, precise natural-language description that another agent will later use to reconstruct and optimize the query.
$database_name SQL Query:
```sql
$query
```
Your explanation must follow these requirements:
1. **Describe the overall purpose**
- Explain clearly what the query is intended to accomplish and why (retrieve data, update rows, aggregate information, validate existence, create structures, etc.).
2. **List ALL involved database objects**
Explicitly list every:
- Table
- View
- CTE (Common Table Expression)
- Subquery or derived table
- Function
- Stored procedure, if referenced
- Temporary table
- Schema-qualified object
Use the exact names as they appear in the query.
3. **Describe all essential operations**
Explicitly state, using exact column names:
- Columns retrieved or modified
- Join types, join conditions, and which objects participate
- Filters and conditions (WHERE, boolean logic, comparisons)
- Aggregations (SUM, COUNT, etc.)
- Grouping and HAVING clauses
- Sorting (ORDER BY)
- Window functions
- DISTINCT, TOP, LIMIT, OFFSET, pagination
- Any $database_name-specific features used$specific_features
4. **Maintain strict factual accuracy**
- Do NOT infer business meaning unless directly implied.
- Do NOT rename or paraphrase column names; repeat them exactly.
5. **Use clear, structured natural language**
- Provide a step-by-step explanation that makes every operation and purpose explicit.
- The output must be complete enough that the query can be reconstructed.
6. ** CRITICAL: Identify Performance Issues**
Flag any of these CRITICAL performance problems found in the query:
- **NO WHERE CLAUSE** (BE CAREFUL - AVOID FALSE POSITIVES):
* ONLY flag if the MAIN/OUTER SELECT has absolutely NO WHERE keyword with filtering conditions
* If query HAS 'WHERE' followed by conditions (even old-style JOINs in WHERE), DO NOT flag
* Subqueries/EXISTS having WHERE does NOT mean main query has no WHERE
* CROSS APPLY/LATERAL with internal WHERE counts as filtered
* If truly no WHERE: Flag as CRITICAL (causes FULL TABLE SCAN, no predicate pushdown)
- **Non-SARGable patterns**: Functions on indexed columns in WHERE/JOIN (e.g., YEAR(date), UPPER(col))
- **Leading wildcards**: LIKE '%value%' patterns that prevent index usage
- **Implicit conversions**: Type mismatches in comparisons
- **NOLOCK/WITH (NOLOCK) hints**: If query uses WITH (NOLOCK), WITH (nolock), WITH(NOLOCK), (NOLOCK), (nolock) or NOLOCK/nolock (any case) DO NOT REMOVE, but FLAG as **CRITICAL RISK**: "⚠️ WITH (NOLOCK) reads uncommitted/dirty data - CRITICAL: may cause INCORRECT FINANCIAL VALUES and data inconsistencies in production"
$analysis_requirements
Explanation:
""")
NATURAL_TO_SQL_TEMPLATE = Template("""
You are an expert $database_name SQL developer and query performance specialist.
Your task is to write an optimized SQL query based exclusively on the natural-language description provided below.
Description:
$explanation
CRITICAL RULES - READ BEFORE GENERATING SQL:
1. **PRESERVE ALL BUSINESS LOGIC EXACTLY**
- Every CASE WHEN statement must have IDENTICAL conditions and results
- Every calculated column must use IDENTICAL formulas
- Every subquery must query the SAME tables with SAME filters
- Do NOT simplify, merge, or "improve" business logic - even if it looks redundant
- If description mentions specific conditions (cd_tp_apolice = 2, etc.), preserve them EXACTLY
2. **PRESERVE ALL TABLES AND COLUMNS**
- Include EVERY table mentioned in the description
- Include EVERY column mentioned in the description
- Use EXACT column names as described (no renaming)
- Use EXACT table aliases as described
3. **Translate the full described logic into SQL**
- Implement all actions, operations, filters, joins, and conditions exactly as stated.
- Use every object and column referenced in the description, using their exact names.
- If the description mentions specific filter values (e.g., cd_tipo_endosso = 0), use those EXACT values
4. **Write optimized SQL while preserving semantics**
- Apply $database_name best practices for performance.
- Use indexing-aware filtering, efficient join strategies, and clear expressions.
- Implement aggregations, groupings, window functions, or pagination when described.
- Prefer performant constructs commonly recommended for $database_name workloads.
- OPTIMIZATION means structure/hints/indexes - NOT changing logic
5. **Use $database_name-specific syntax and features**
- Apply native functions, operators, optimizer behaviors, or hints when appropriate.
- Incorporate $specific_requirements if provided.
6. **Ensure logical fidelity - ZERO TOLERANCE FOR CHANGES**
- The SQL must reflect PRECISELY the behavior described
- Do NOT add logic not explicitly stated
- Do NOT omit any step described
- Do NOT infer or assume details beyond what is explicitly stated
- Do NOT "simplify" complex CASE statements
- Do NOT merge or combine separate calculated columns
7. **Self-Verification Checklist** (perform before outputting):
- [ ] All tables from description are present in query
- [ ] All columns from description are present in SELECT
- [ ] All CASE conditions match description exactly
- [ ] All subquery filters match description exactly
- [ ] All JOIN conditions match description exactly
- [ ] No business logic was simplified or changed
8. **Output format**
- Provide ONLY the final, optimized SQL query.
- Do NOT include explanations, comments, or extra text.
Optimized SQL Query:
""")
CONSERVATIVE_ANALYSIS_TEMPLATE = Template("""
You are an expert $database_name database analyst and performance specialist.
Your task is to ANALYZE the SQL query below and provide SUGGESTIONS for improvement.
CRITICAL: You must NOT rewrite or modify the query. Only provide analysis and suggestions.
$database_name SQL Query:
```sql
$query
```
Query Complexity Information:
- Columns: $column_count
- Tables: $table_count
- Subqueries: $subquery_count
- CASE statements: $case_count
- JOINs: $join_count
- Complexity Level: $complexity_level
Provide your analysis in the following structured format:
## PERFORMANCE ISSUES
List each performance issue found, with severity (CRITICAL/HIGH/MEDIUM/LOW):
- [SEVERITY] Issue description
- [SEVERITY] Issue description
## SUGGESTED INDEXES
List indexes that could improve this query:
- CREATE INDEX idx_name ON table(columns) -- Reason
## OPTIMIZATION SUGGESTIONS
List specific suggestions WITHOUT rewriting the query:
- Suggestion 1: Description of what could be improved and why
- Suggestion 2: Description of what could be improved and why
## RISK ASSESSMENT
- WITH (NOLOCK) usage: [Yes/No] - If yes, explain the risks
- Missing WHERE clause: [Yes/No] - If yes, explain the impact
- Implicit conversions: [Yes/No] - If yes, list them
## SUMMARY
Brief summary of the most important findings and priority order for addressing them.
Remember: DO NOT provide a rewritten query. Only analysis and suggestions.
""")
def _render_sql_to_natural(
database_name: str, query: str, specific_features: str = "", analysis_requirements: str = ""
) -> str:
return SQL_TO_NATURAL_TEMPLATE.substitute(
database_name=database_name,
query=query,
specific_features=f"\n{specific_features}" if specific_features else "",
analysis_requirements=f"\n{analysis_requirements}" if analysis_requirements else "",
)
def _render_natural_to_sql(
database_name: str, explanation: str, specific_requirements: str
) -> str:
return NATURAL_TO_SQL_TEMPLATE.substitute(
database_name=database_name,
explanation=explanation,
specific_requirements="\n".join(
f"- {req}" for req in specific_requirements.split("\n") if req.strip()
),
)
def _render_conservative_analysis(
database_name: str,
query: str,
column_count: int = 0,
table_count: int = 0,
subquery_count: int = 0,
case_count: int = 0,
join_count: int = 0,
complexity_level: str = "unknown",
) -> str:
return CONSERVATIVE_ANALYSIS_TEMPLATE.substitute(
database_name=database_name,
query=query,
column_count=column_count,
table_count=table_count,
subquery_count=subquery_count,
case_count=case_count,
join_count=join_count,
complexity_level=complexity_level,
)
class BasePromptGenerator(PromptGeneratorTool, ABC):
@ -70,7 +287,7 @@ class BasePromptGenerator(PromptGeneratorTool, ABC):
Returns:
Formatted prompt for LLM
"""
return AgentPromptTemplates.render_sql_to_natural(
return _render_sql_to_natural(
database_name=self.get_database_name(),
query=query,
specific_features=self.get_specific_features(),
@ -86,7 +303,7 @@ class BasePromptGenerator(PromptGeneratorTool, ABC):
Returns:
Formatted prompt for LLM
"""
return AgentPromptTemplates.render_natural_to_sql(
return _render_natural_to_sql(
database_name=self.get_database_name(),
explanation=explanation,
specific_requirements=self.get_specific_requirements(),