diff --git a/src/sql_optimizer_team/agents/__init__.py b/src/sql_optimizer_team/agents/__init__.py index d6ba771..e0613ef 100644 --- a/src/sql_optimizer_team/agents/__init__.py +++ b/src/sql_optimizer_team/agents/__init__.py @@ -1,9 +1,12 @@ -"""SQL optimizer team agents.""" +"""SQL optimizer team agents. -from sql_optimizer_team.agents.sql_analyst_agent import sql_analyst_agent -from sql_optimizer_team.agents.sql_optimizer_agent import sql_optimizer_agent -from sql_optimizer_team.agents.sql_quality_agent import sql_quality_agent -from sql_optimizer_team.agents.conservative_analysis_agent import conservative_analysis_agent +This package uses lazy imports to avoid circular dependencies with tools. +""" + +from __future__ import annotations + +import importlib +from typing import Any __all__ = [ "sql_analyst_agent", @@ -11,3 +14,21 @@ __all__ = [ "sql_quality_agent", "conservative_analysis_agent", ] + +_AGENT_MODULES = { + "sql_analyst_agent": "sql_optimizer_team.agents.sql_analyst_agent", + "sql_optimizer_agent": "sql_optimizer_team.agents.sql_optimizer_agent", + "sql_quality_agent": "sql_optimizer_team.agents.sql_quality_agent", + "conservative_analysis_agent": "sql_optimizer_team.agents.conservative_analysis_agent", +} + + +def __getattr__(name: str) -> Any: + if name in _AGENT_MODULES: + module = importlib.import_module(_AGENT_MODULES[name]) + return getattr(module, name) + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + + +def __dir__() -> list[str]: + return sorted(list(globals().keys()) + list(__all__)) diff --git a/src/sql_optimizer_team/agents/conservative_analysis_agent.py b/src/sql_optimizer_team/agents/conservative_analysis_agent.py index 4315d0a..9772d42 100644 --- a/src/sql_optimizer_team/agents/conservative_analysis_agent.py +++ b/src/sql_optimizer_team/agents/conservative_analysis_agent.py @@ -1,7 +1,6 @@ from agno.agent import Agent from agno.db.sqlite import SqliteDb from sql_optimizer_team.tools.engine.model_selector import get_model -from sql_optimizer_team.agents.prompts import AgentPromptTemplates from sql_optimizer_team.tools.prompt_tools import supported_databases from sql_optimizer_team.tools.sql_tools import load_sql_from_file, ensure_non_empty import os @@ -31,7 +30,52 @@ conservative_analysis_agent = Agent( "- Solicite banco e SQL se não estiverem presentes.", "- Se o usuário fornecer um caminho de arquivo, use load_sql_from_file().", "- Use a template oficial abaixo para a análise conservadora (sem reescrever a SQL).", - AgentPromptTemplates.CONSERVATIVE_ANALYSIS.template.strip(), + """ + You are an expert $database_name database analyst and performance specialist. + + Your task is to ANALYZE the SQL query below and provide SUGGESTIONS for improvement. + + ⚠️ CRITICAL: You must NOT rewrite or modify the query. Only provide analysis and suggestions. + + $database_name SQL Query: + ```sql + $query + ``` + + Query Complexity Information: + - Columns: $column_count + - Tables: $table_count + - Subqueries: $subquery_count + - CASE statements: $case_count + - JOINs: $join_count + - Complexity Level: $complexity_level + + Provide your analysis in the following structured format: + + ## PERFORMANCE ISSUES + List each performance issue found, with severity (CRITICAL/HIGH/MEDIUM/LOW): + - [SEVERITY] Issue description + - [SEVERITY] Issue description + + ## SUGGESTED INDEXES + List indexes that could improve this query: + - CREATE INDEX idx_name ON table(columns) -- Reason + + ## OPTIMIZATION SUGGESTIONS + List specific suggestions WITHOUT rewriting the query: + - Suggestion 1: Description of what could be improved and why + - Suggestion 2: Description of what could be improved and why + + ## RISK ASSESSMENT + - WITH (NOLOCK) usage: [Yes/No] - If yes, explain the risks + - Missing WHERE clause: [Yes/No] - If yes, explain the impact + - Implicit conversions: [Yes/No] - If yes, list them + + ## SUMMARY + Brief summary of the most important findings and priority order for addressing them. + + Remember: DO NOT provide a rewritten query. Only analysis and suggestions. + """.strip(), "- NÃO reescreva a SQL em hipótese alguma.", ], ) diff --git a/src/sql_optimizer_team/agents/prompts.py b/src/sql_optimizer_team/agents/prompts.py deleted file mode 100644 index 9d1f7c8..0000000 --- a/src/sql_optimizer_team/agents/prompts.py +++ /dev/null @@ -1,227 +0,0 @@ -"""Prompt templates used by Agno agents. - -This module keeps the canonical prompts alongside the agents to ensure -all prompt content is owned and maintained by the agent layer. -""" - -from string import Template - - -class AgentPromptTemplates: - """Collection of prompt templates for SQL optimization agents.""" - - SQL_TO_NATURAL = Template(""" - You are an expert $database_name database analyst and performance specialist. Your task is to translate the SQL query below into a detailed, precise natural-language description that another agent will later use to reconstruct and optimize the query. - - $database_name SQL Query: - ```sql - $query - ``` - - Your explanation must follow these requirements: - - 1. **Describe the overall purpose** - - Explain clearly what the query is intended to accomplish and why (retrieve data, update rows, aggregate information, validate existence, create structures, etc.). - - 2. **List ALL involved database objects** - Explicitly list every: - - Table - - View - - CTE (Common Table Expression) - - Subquery or derived table - - Function - - Stored procedure, if referenced - - Temporary table - - Schema-qualified object - Use the exact names as they appear in the query. - - 3. **Describe all essential operations** - Explicitly state, using exact column names: - - Columns retrieved or modified - - Join types, join conditions, and which objects participate - - Filters and conditions (WHERE, boolean logic, comparisons) - - Aggregations (SUM, COUNT, etc.) - - Grouping and HAVING clauses - - Sorting (ORDER BY) - - Window functions - - DISTINCT, TOP, LIMIT, OFFSET, pagination - - Any $database_name-specific features used$specific_features - - 4. **Maintain strict factual accuracy** - - Do NOT infer business meaning unless directly implied. - - Do NOT rename or paraphrase column names; repeat them exactly. - - 5. **Use clear, structured natural language** - - Provide a step-by-step explanation that makes every operation and purpose explicit. - - The output must be complete enough that the query can be reconstructed. - - 6. **⚠️ CRITICAL: Identify Performance Issues** - Flag any of these CRITICAL performance problems found in the query: - - **NO WHERE CLAUSE** (BE CAREFUL - AVOID FALSE POSITIVES): - * ONLY flag if the MAIN/OUTER SELECT has absolutely NO WHERE keyword with filtering conditions - * If query HAS 'WHERE' followed by conditions (even old-style JOINs in WHERE), DO NOT flag - * Subqueries/EXISTS having WHERE does NOT mean main query has no WHERE - * CROSS APPLY/LATERAL with internal WHERE counts as filtered - * If truly no WHERE: Flag as CRITICAL (causes FULL TABLE SCAN, no predicate pushdown) - - **Non-SARGable patterns**: Functions on indexed columns in WHERE/JOIN (e.g., YEAR(date), UPPER(col)) - - **Leading wildcards**: LIKE '%value%' patterns that prevent index usage - - **Implicit conversions**: Type mismatches in comparisons - - **NOLOCK/WITH (NOLOCK) hints**: If query uses WITH (NOLOCK), WITH (nolock), WITH(NOLOCK), (NOLOCK), (nolock) or NOLOCK/nolock (any case) → DO NOT REMOVE, but FLAG as **CRITICAL RISK**: "⚠️ WITH (NOLOCK) reads uncommitted/dirty data - CRITICAL: may cause INCORRECT FINANCIAL VALUES and data inconsistencies in production" - $analysis_requirements - - Explanation: - """) - - NATURAL_TO_SQL = Template(""" - You are an expert $database_name SQL developer and query performance specialist. - Your task is to write an optimized SQL query based exclusively on the natural-language description provided below. - - Description: - $explanation - - ⚠️ CRITICAL RULES - READ BEFORE GENERATING SQL: - - 1. **PRESERVE ALL BUSINESS LOGIC EXACTLY** - - Every CASE WHEN statement must have IDENTICAL conditions and results - - Every calculated column must use IDENTICAL formulas - - Every subquery must query the SAME tables with SAME filters - - Do NOT simplify, merge, or "improve" business logic - even if it looks redundant - - If description mentions specific conditions (cd_tp_apolice = 2, etc.), preserve them EXACTLY - - 2. **PRESERVE ALL TABLES AND COLUMNS** - - Include EVERY table mentioned in the description - - Include EVERY column mentioned in the description - - Use EXACT column names as described (no renaming) - - Use EXACT table aliases as described - - 3. **Translate the full described logic into SQL** - - Implement all actions, operations, filters, joins, and conditions exactly as stated. - - Use every object and column referenced in the description, using their exact names. - - If the description mentions specific filter values (e.g., cd_tipo_endosso = 0), use those EXACT values - - 4. **Write optimized SQL while preserving semantics** - - Apply $database_name best practices for performance. - - Use indexing-aware filtering, efficient join strategies, and clear expressions. - - Implement aggregations, groupings, window functions, or pagination when described. - - Prefer performant constructs commonly recommended for $database_name workloads. - - OPTIMIZATION means structure/hints/indexes - NOT changing logic - - 5. **Use $database_name-specific syntax and features** - - Apply native functions, operators, optimizer behaviors, or hints when appropriate. - - Incorporate $specific_requirements if provided. - - 6. **Ensure logical fidelity - ZERO TOLERANCE FOR CHANGES** - - The SQL must reflect PRECISELY the behavior described - - Do NOT add logic not explicitly stated - - Do NOT omit any step described - - Do NOT infer or assume details beyond what is explicitly stated - - Do NOT "simplify" complex CASE statements - - Do NOT merge or combine separate calculated columns - - 7. **Self-Verification Checklist** (perform before outputting): - - [ ] All tables from description are present in query - - [ ] All columns from description are present in SELECT - - [ ] All CASE conditions match description exactly - - [ ] All subquery filters match description exactly - - [ ] All JOIN conditions match description exactly - - [ ] No business logic was simplified or changed - - 8. **Output format** - - Provide ONLY the final, optimized SQL query. - - Do NOT include explanations, comments, or extra text. - - Optimized SQL Query: - """) - - CONSERVATIVE_ANALYSIS = Template(""" - You are an expert $database_name database analyst and performance specialist. - - Your task is to ANALYZE the SQL query below and provide SUGGESTIONS for improvement. - - ⚠️ CRITICAL: You must NOT rewrite or modify the query. Only provide analysis and suggestions. - - $database_name SQL Query: - ```sql - $query - ``` - - Query Complexity Information: - - Columns: $column_count - - Tables: $table_count - - Subqueries: $subquery_count - - CASE statements: $case_count - - JOINs: $join_count - - Complexity Level: $complexity_level - - Provide your analysis in the following structured format: - - ## PERFORMANCE ISSUES - List each performance issue found, with severity (CRITICAL/HIGH/MEDIUM/LOW): - - [SEVERITY] Issue description - - [SEVERITY] Issue description - - ## SUGGESTED INDEXES - List indexes that could improve this query: - - CREATE INDEX idx_name ON table(columns) -- Reason - - ## OPTIMIZATION SUGGESTIONS - List specific suggestions WITHOUT rewriting the query: - - Suggestion 1: Description of what could be improved and why - - Suggestion 2: Description of what could be improved and why - - ## RISK ASSESSMENT - - WITH (NOLOCK) usage: [Yes/No] - If yes, explain the risks - - Missing WHERE clause: [Yes/No] - If yes, explain the impact - - Implicit conversions: [Yes/No] - If yes, list them - - ## SUMMARY - Brief summary of the most important findings and priority order for addressing them. - - Remember: DO NOT provide a rewritten query. Only analysis and suggestions. - """) - - @classmethod - def render_sql_to_natural( - cls, database_name: str, query: str, specific_features: str = "", analysis_requirements: str = "" - ) -> str: - return cls.SQL_TO_NATURAL.substitute( - database_name=database_name, - query=query, - specific_features=f"\n{specific_features}" if specific_features else "", - analysis_requirements=f"\n{analysis_requirements}" if analysis_requirements else "", - ) - - @classmethod - def render_natural_to_sql( - cls, database_name: str, explanation: str, specific_requirements: str - ) -> str: - return cls.NATURAL_TO_SQL.substitute( - database_name=database_name, - explanation=explanation, - specific_requirements="\n".join( - f"- {req}" for req in specific_requirements.split("\n") if req.strip() - ), - ) - - @classmethod - def render_conservative_analysis( - cls, - database_name: str, - query: str, - column_count: int = 0, - table_count: int = 0, - subquery_count: int = 0, - case_count: int = 0, - join_count: int = 0, - complexity_level: str = "unknown", - ) -> str: - return cls.CONSERVATIVE_ANALYSIS.substitute( - database_name=database_name, - query=query, - column_count=column_count, - table_count=table_count, - subquery_count=subquery_count, - case_count=case_count, - join_count=join_count, - complexity_level=complexity_level, - ) diff --git a/src/sql_optimizer_team/agents/sql_analyst_agent.py b/src/sql_optimizer_team/agents/sql_analyst_agent.py index 172f069..961c3e3 100644 --- a/src/sql_optimizer_team/agents/sql_analyst_agent.py +++ b/src/sql_optimizer_team/agents/sql_analyst_agent.py @@ -2,7 +2,6 @@ from agno.agent import Agent from agno.db.sqlite import SqliteDb from sql_optimizer_team.tools.engine.model_selector import get_model from sql_optimizer_team.tools.core_tools import explain_query_core -from sql_optimizer_team.agents.prompts import AgentPromptTemplates from sql_optimizer_team.tools.prompt_tools import supported_databases from sql_optimizer_team.tools.sql_tools import load_sql_from_file, ensure_non_empty import os @@ -32,8 +31,68 @@ sql_analyst_agent = Agent( "- Solicite banco e SQL se não estiverem presentes. Bancos suportados: use supported_databases().", "- Se o usuário fornecer um caminho de arquivo, use load_sql_from_file().", "- Preferência: use explain_query_core(database_type, sql) para gerar a explicação via core de negócio.", - "- Use a template oficial abaixo para estruturar a explicação (SQL → natural).", - AgentPromptTemplates.SQL_TO_NATURAL.template.strip(), + "- Use a template oficial abaixo para estruturar a explicação (SQL → natural).", + """ + You are an expert $database_name database analyst and performance specialist. Your task is to translate the SQL query below into a detailed, precise natural-language description that another agent will later use to reconstruct and optimize the query. + + $database_name SQL Query: + ```sql + $query + ``` + + Your explanation must follow these requirements: + + 1. **Describe the overall purpose** + - Explain clearly what the query is intended to accomplish and why (retrieve data, update rows, aggregate information, validate existence, create structures, etc.). + + 2. **List ALL involved database objects** + Explicitly list every: + - Table + - View + - CTE (Common Table Expression) + - Subquery or derived table + - Function + - Stored procedure, if referenced + - Temporary table + - Schema-qualified object + Use the exact names as they appear in the query. + + 3. **Describe all essential operations** + Explicitly state, using exact column names: + - Columns retrieved or modified + - Join types, join conditions, and which objects participate + - Filters and conditions (WHERE, boolean logic, comparisons) + - Aggregations (SUM, COUNT, etc.) + - Grouping and HAVING clauses + - Sorting (ORDER BY) + - Window functions + - DISTINCT, TOP, LIMIT, OFFSET, pagination + - Any $database_name-specific features used$specific_features + + 4. **Maintain strict factual accuracy** + - Do NOT infer business meaning unless directly implied. + - Do NOT rename or paraphrase column names; repeat them exactly. + + 5. **Use clear, structured natural language** + - Provide a step-by-step explanation that makes every operation and purpose explicit. + - The output must be complete enough that the query can be reconstructed. + + 6. **⚠️ CRITICAL: Identify Performance Issues** + Flag any of these CRITICAL performance problems found in the query: + - **NO WHERE CLAUSE** (BE CAREFUL - AVOID FALSE POSITIVES): + * ONLY flag if the MAIN/OUTER SELECT has absolutely NO WHERE keyword with filtering conditions + * If query HAS 'WHERE' followed by conditions (even old-style JOINs in WHERE), DO NOT flag + * Subqueries/EXISTS having WHERE does NOT mean main query has no WHERE + * CROSS APPLY/LATERAL with internal WHERE counts as filtered + * If truly no WHERE: Flag as CRITICAL (causes FULL TABLE SCAN, no predicate pushdown) + - **Non-SARGable patterns**: Functions on indexed columns in WHERE/JOIN (e.g., YEAR(date), UPPER(col)) + - **Leading wildcards**: LIKE '%value%' patterns that prevent index usage + - **Implicit conversions**: Type mismatches in comparisons + - **NOLOCK/WITH (NOLOCK) hints**: If query uses WITH (NOLOCK), WITH (nolock), WITH(NOLOCK), (NOLOCK), (nolock) or NOLOCK/nolock (any case) → DO NOT REMOVE, but FLAG as **CRITICAL RISK**: "⚠️ WITH (NOLOCK) reads uncommitted/dirty data - CRITICAL: may cause INCORRECT FINANCIAL VALUES and data inconsistencies in production" + $analysis_requirements + + Explanation: + """.strip(), "- Entregue apenas a explicação natural estruturada conforme a prompt; não reescreva a SQL.", "- Identifique problemas críticos de performance conforme a prompt.", ], diff --git a/src/sql_optimizer_team/agents/sql_optimizer_agent.py b/src/sql_optimizer_team/agents/sql_optimizer_agent.py index 15c04e0..a3dde66 100644 --- a/src/sql_optimizer_team/agents/sql_optimizer_agent.py +++ b/src/sql_optimizer_team/agents/sql_optimizer_agent.py @@ -2,7 +2,6 @@ from agno.agent import Agent from agno.db.sqlite import SqliteDb from sql_optimizer_team.tools.engine.model_selector import get_model from sql_optimizer_team.tools.core_tools import optimize_query_core -from sql_optimizer_team.agents.prompts import AgentPromptTemplates from sql_optimizer_team.tools.prompt_tools import supported_databases from sql_optimizer_team.tools.sql_tools import load_sql_from_file, ensure_non_empty import os @@ -32,7 +31,66 @@ sql_optimizer_agent = Agent( "- Exija banco alvo e SQL antes de otimizar.", "- Use optimize_query_core(database_type, sql) para executar o core de negócio.", "- Use a template oficial abaixo para reescrever (natural → SQL) mantendo 100% da lógica.", - AgentPromptTemplates.NATURAL_TO_SQL.template.strip(), + """ + You are an expert $database_name SQL developer and query performance specialist. + Your task is to write an optimized SQL query based exclusively on the natural-language description provided below. + + Description: + $explanation + + ⚠️ CRITICAL RULES - READ BEFORE GENERATING SQL: + + 1. **PRESERVE ALL BUSINESS LOGIC EXACTLY** + - Every CASE WHEN statement must have IDENTICAL conditions and results + - Every calculated column must use IDENTICAL formulas + - Every subquery must query the SAME tables with SAME filters + - Do NOT simplify, merge, or "improve" business logic - even if it looks redundant + - If description mentions specific conditions (cd_tp_apolice = 2, etc.), preserve them EXACTLY + + 2. **PRESERVE ALL TABLES AND COLUMNS** + - Include EVERY table mentioned in the description + - Include EVERY column mentioned in the description + - Use EXACT column names as described (no renaming) + - Use EXACT table aliases as described + + 3. **Translate the full described logic into SQL** + - Implement all actions, operations, filters, joins, and conditions exactly as stated. + - Use every object and column referenced in the description, using their exact names. + - If the description mentions specific filter values (e.g., cd_tipo_endosso = 0), use those EXACT values + + 4. **Write optimized SQL while preserving semantics** + - Apply $database_name best practices for performance. + - Use indexing-aware filtering, efficient join strategies, and clear expressions. + - Implement aggregations, groupings, window functions, or pagination when described. + - Prefer performant constructs commonly recommended for $database_name workloads. + - OPTIMIZATION means structure/hints/indexes - NOT changing logic + + 5. **Use $database_name-specific syntax and features** + - Apply native functions, operators, optimizer behaviors, or hints when appropriate. + - Incorporate $specific_requirements if provided. + + 6. **Ensure logical fidelity - ZERO TOLERANCE FOR CHANGES** + - The SQL must reflect PRECISELY the behavior described + - Do NOT add logic not explicitly stated + - Do NOT omit any step described + - Do NOT infer or assume details beyond what is explicitly stated + - Do NOT "simplify" complex CASE statements + - Do NOT merge or combine separate calculated columns + + 7. **Self-Verification Checklist** (perform before outputting): + - [ ] All tables from description are present in query + - [ ] All columns from description are present in SELECT + - [ ] All CASE conditions match description exactly + - [ ] All subquery filters match description exactly + - [ ] All JOIN conditions match description exactly + - [ ] No business logic was simplified or changed + + 8. **Output format** + - Provide ONLY the final, optimized SQL query. + - Do NOT include explanations, comments, or extra text. + + Optimized SQL Query: + """.strip(), "- Extraia e devolva SOMENTE optimized_query (sem explicações, sem markdown).", "- Preserve 100% da lógica, colunas, aliases, filtros, joins e subqueries.", ], diff --git a/src/sql_optimizer_team/tools/engine/prompt_tools/base_generator.py b/src/sql_optimizer_team/tools/engine/prompt_tools/base_generator.py index 677dc7a..46ef151 100644 --- a/src/sql_optimizer_team/tools/engine/prompt_tools/base_generator.py +++ b/src/sql_optimizer_team/tools/engine/prompt_tools/base_generator.py @@ -6,8 +6,225 @@ reducing code duplication and ensuring consistency. from abc import ABC, abstractmethod +from string import Template + from sql_optimizer_team.tools.engine.tools_api.prompt_tool import PromptGeneratorTool -from sql_optimizer_team.agents.prompts import AgentPromptTemplates + + +SQL_TO_NATURAL_TEMPLATE = Template(""" + You are an expert $database_name database analyst and performance specialist. Your task is to translate the SQL query below into a detailed, precise natural-language description that another agent will later use to reconstruct and optimize the query. + + $database_name SQL Query: + ```sql + $query + ``` + + Your explanation must follow these requirements: + + 1. **Describe the overall purpose** + - Explain clearly what the query is intended to accomplish and why (retrieve data, update rows, aggregate information, validate existence, create structures, etc.). + + 2. **List ALL involved database objects** + Explicitly list every: + - Table + - View + - CTE (Common Table Expression) + - Subquery or derived table + - Function + - Stored procedure, if referenced + - Temporary table + - Schema-qualified object + Use the exact names as they appear in the query. + + 3. **Describe all essential operations** + Explicitly state, using exact column names: + - Columns retrieved or modified + - Join types, join conditions, and which objects participate + - Filters and conditions (WHERE, boolean logic, comparisons) + - Aggregations (SUM, COUNT, etc.) + - Grouping and HAVING clauses + - Sorting (ORDER BY) + - Window functions + - DISTINCT, TOP, LIMIT, OFFSET, pagination + - Any $database_name-specific features used$specific_features + + 4. **Maintain strict factual accuracy** + - Do NOT infer business meaning unless directly implied. + - Do NOT rename or paraphrase column names; repeat them exactly. + + 5. **Use clear, structured natural language** + - Provide a step-by-step explanation that makes every operation and purpose explicit. + - The output must be complete enough that the query can be reconstructed. + + 6. **⚠️ CRITICAL: Identify Performance Issues** + Flag any of these CRITICAL performance problems found in the query: + - **NO WHERE CLAUSE** (BE CAREFUL - AVOID FALSE POSITIVES): + * ONLY flag if the MAIN/OUTER SELECT has absolutely NO WHERE keyword with filtering conditions + * If query HAS 'WHERE' followed by conditions (even old-style JOINs in WHERE), DO NOT flag + * Subqueries/EXISTS having WHERE does NOT mean main query has no WHERE + * CROSS APPLY/LATERAL with internal WHERE counts as filtered + * If truly no WHERE: Flag as CRITICAL (causes FULL TABLE SCAN, no predicate pushdown) + - **Non-SARGable patterns**: Functions on indexed columns in WHERE/JOIN (e.g., YEAR(date), UPPER(col)) + - **Leading wildcards**: LIKE '%value%' patterns that prevent index usage + - **Implicit conversions**: Type mismatches in comparisons + - **NOLOCK/WITH (NOLOCK) hints**: If query uses WITH (NOLOCK), WITH (nolock), WITH(NOLOCK), (NOLOCK), (nolock) or NOLOCK/nolock (any case) → DO NOT REMOVE, but FLAG as **CRITICAL RISK**: "⚠️ WITH (NOLOCK) reads uncommitted/dirty data - CRITICAL: may cause INCORRECT FINANCIAL VALUES and data inconsistencies in production" + $analysis_requirements + + Explanation: +""") + +NATURAL_TO_SQL_TEMPLATE = Template(""" + You are an expert $database_name SQL developer and query performance specialist. + Your task is to write an optimized SQL query based exclusively on the natural-language description provided below. + + Description: + $explanation + + ⚠️ CRITICAL RULES - READ BEFORE GENERATING SQL: + + 1. **PRESERVE ALL BUSINESS LOGIC EXACTLY** + - Every CASE WHEN statement must have IDENTICAL conditions and results + - Every calculated column must use IDENTICAL formulas + - Every subquery must query the SAME tables with SAME filters + - Do NOT simplify, merge, or "improve" business logic - even if it looks redundant + - If description mentions specific conditions (cd_tp_apolice = 2, etc.), preserve them EXACTLY + + 2. **PRESERVE ALL TABLES AND COLUMNS** + - Include EVERY table mentioned in the description + - Include EVERY column mentioned in the description + - Use EXACT column names as described (no renaming) + - Use EXACT table aliases as described + + 3. **Translate the full described logic into SQL** + - Implement all actions, operations, filters, joins, and conditions exactly as stated. + - Use every object and column referenced in the description, using their exact names. + - If the description mentions specific filter values (e.g., cd_tipo_endosso = 0), use those EXACT values + + 4. **Write optimized SQL while preserving semantics** + - Apply $database_name best practices for performance. + - Use indexing-aware filtering, efficient join strategies, and clear expressions. + - Implement aggregations, groupings, window functions, or pagination when described. + - Prefer performant constructs commonly recommended for $database_name workloads. + - OPTIMIZATION means structure/hints/indexes - NOT changing logic + + 5. **Use $database_name-specific syntax and features** + - Apply native functions, operators, optimizer behaviors, or hints when appropriate. + - Incorporate $specific_requirements if provided. + + 6. **Ensure logical fidelity - ZERO TOLERANCE FOR CHANGES** + - The SQL must reflect PRECISELY the behavior described + - Do NOT add logic not explicitly stated + - Do NOT omit any step described + - Do NOT infer or assume details beyond what is explicitly stated + - Do NOT "simplify" complex CASE statements + - Do NOT merge or combine separate calculated columns + + 7. **Self-Verification Checklist** (perform before outputting): + - [ ] All tables from description are present in query + - [ ] All columns from description are present in SELECT + - [ ] All CASE conditions match description exactly + - [ ] All subquery filters match description exactly + - [ ] All JOIN conditions match description exactly + - [ ] No business logic was simplified or changed + + 8. **Output format** + - Provide ONLY the final, optimized SQL query. + - Do NOT include explanations, comments, or extra text. + + Optimized SQL Query: +""") + +CONSERVATIVE_ANALYSIS_TEMPLATE = Template(""" + You are an expert $database_name database analyst and performance specialist. + + Your task is to ANALYZE the SQL query below and provide SUGGESTIONS for improvement. + + ⚠️ CRITICAL: You must NOT rewrite or modify the query. Only provide analysis and suggestions. + + $database_name SQL Query: + ```sql + $query + ``` + + Query Complexity Information: + - Columns: $column_count + - Tables: $table_count + - Subqueries: $subquery_count + - CASE statements: $case_count + - JOINs: $join_count + - Complexity Level: $complexity_level + + Provide your analysis in the following structured format: + + ## PERFORMANCE ISSUES + List each performance issue found, with severity (CRITICAL/HIGH/MEDIUM/LOW): + - [SEVERITY] Issue description + - [SEVERITY] Issue description + + ## SUGGESTED INDEXES + List indexes that could improve this query: + - CREATE INDEX idx_name ON table(columns) -- Reason + + ## OPTIMIZATION SUGGESTIONS + List specific suggestions WITHOUT rewriting the query: + - Suggestion 1: Description of what could be improved and why + - Suggestion 2: Description of what could be improved and why + + ## RISK ASSESSMENT + - WITH (NOLOCK) usage: [Yes/No] - If yes, explain the risks + - Missing WHERE clause: [Yes/No] - If yes, explain the impact + - Implicit conversions: [Yes/No] - If yes, list them + + ## SUMMARY + Brief summary of the most important findings and priority order for addressing them. + + Remember: DO NOT provide a rewritten query. Only analysis and suggestions. +""") + + +def _render_sql_to_natural( + database_name: str, query: str, specific_features: str = "", analysis_requirements: str = "" +) -> str: + return SQL_TO_NATURAL_TEMPLATE.substitute( + database_name=database_name, + query=query, + specific_features=f"\n{specific_features}" if specific_features else "", + analysis_requirements=f"\n{analysis_requirements}" if analysis_requirements else "", + ) + + +def _render_natural_to_sql( + database_name: str, explanation: str, specific_requirements: str +) -> str: + return NATURAL_TO_SQL_TEMPLATE.substitute( + database_name=database_name, + explanation=explanation, + specific_requirements="\n".join( + f"- {req}" for req in specific_requirements.split("\n") if req.strip() + ), + ) + + +def _render_conservative_analysis( + database_name: str, + query: str, + column_count: int = 0, + table_count: int = 0, + subquery_count: int = 0, + case_count: int = 0, + join_count: int = 0, + complexity_level: str = "unknown", +) -> str: + return CONSERVATIVE_ANALYSIS_TEMPLATE.substitute( + database_name=database_name, + query=query, + column_count=column_count, + table_count=table_count, + subquery_count=subquery_count, + case_count=case_count, + join_count=join_count, + complexity_level=complexity_level, + ) class BasePromptGenerator(PromptGeneratorTool, ABC): @@ -70,7 +287,7 @@ class BasePromptGenerator(PromptGeneratorTool, ABC): Returns: Formatted prompt for LLM """ - return AgentPromptTemplates.render_sql_to_natural( + return _render_sql_to_natural( database_name=self.get_database_name(), query=query, specific_features=self.get_specific_features(), @@ -86,7 +303,7 @@ class BasePromptGenerator(PromptGeneratorTool, ABC): Returns: Formatted prompt for LLM """ - return AgentPromptTemplates.render_natural_to_sql( + return _render_natural_to_sql( database_name=self.get_database_name(), explanation=explanation, specific_requirements=self.get_specific_requirements(),