response redundancy fixed and proper backend communication

This commit is contained in:
2026-05-05 17:58:58 +05:30
parent 711d691870
commit ba61963d6f
12 changed files with 880 additions and 287 deletions
+117 -18
View File
@@ -12,7 +12,7 @@ from typing import List, Optional, Dict
import pandas as pd
from config.llm_client import get_llm_client, get_model_name
from config.llm_client import get_llm_client, get_model_name, chat_with_retry
logger = logging.getLogger(__name__)
@@ -35,6 +35,7 @@ class GenAgent:
retry_count: int = 0,
strict_data_instruction: Optional[str] = None,
supplementary_context: Optional[pd.DataFrame] = None,
sql: Optional[str] = None,
) -> str:
"""
Generate a data-grounded response.
@@ -46,6 +47,7 @@ class GenAgent:
rag_chunks: Retrieved chunks with similarity scores (Phase 2)
retry_issues: QA Agent issues from previous attempt
retry_count: Current retry number (0-2)
sql: The SQL query that produced the data context
Returns:
Markdown-formatted response string
@@ -53,12 +55,19 @@ class GenAgent:
temperature = max(0.1, self.base_temperature - (retry_count * 0.2))
prompt = self._build_prompt(
question, soul_content, data_context, rag_chunks, retry_issues,
strict_data_instruction, supplementary_context,
question,
soul_content,
data_context,
rag_chunks,
retry_issues,
strict_data_instruction,
supplementary_context,
sql,
)
try:
response = self.client.chat.completions.create(
response = chat_with_retry(
self.client,
model=self.model,
messages=[
{"role": "system", "content": soul_content},
@@ -108,7 +117,8 @@ class GenAgent:
Use bullet points, bold key numbers, and keep it concise."""
try:
response = self.client.chat.completions.create(
response = chat_with_retry(
self.client,
model=self.model,
messages=[
{"role": "system", "content": soul_content},
@@ -131,18 +141,28 @@ Use bullet points, bold key numbers, and keep it concise."""
retry_issues: Optional[List[str]],
strict_data_instruction: Optional[str] = None,
supplementary_context: Optional[pd.DataFrame] = None,
sql: Optional[str] = None,
) -> str:
"""Build the augmented prompt for response generation."""
parts = []
# Strict data instruction (on retry — prevents hallucination)
# Strict data instruction — prevents hallucination
if strict_data_instruction:
parts.append(f"## ⚠️ STRICT REQUIREMENT\n{strict_data_instruction}\n")
# Data context
# SQL query that produced the data (so the model knows what filters were applied)
if sql:
parts.append(f"## SQL Query Used\n```sql\n{sql}\n```\n")
# Data context with computed summaries
if data_context is not None and len(data_context) > 0:
parts.append("## Data Context (query results for the user's question)")
parts.append(data_context.to_markdown(index=False))
# Compute summary statistics to help the LLM cite precise numbers
summary = self._compute_summary(data_context)
if summary:
parts.append(f"\n### Computed Summary\n{summary}")
else:
parts.append("## Data Context\nNo query results available.")
@@ -150,22 +170,31 @@ Use bullet points, bold key numbers, and keep it concise."""
if supplementary_context is not None and len(supplementary_context) > 0:
parts.append("\n## Benchmark Data (top-performing branches for comparison)")
parts.append(supplementary_context.to_markdown(index=False))
bench_summary = self._compute_summary(supplementary_context)
if bench_summary:
parts.append(f"\n### Benchmark Summary\n{bench_summary}")
parts.append(
"\nUse this benchmark data to compare the queried branch's performance "
"against top performers. Identify which channels and strategies work "
"best, and recommend specific, actionable improvements based on what "
"top-performing branches are doing differently."
"\n### How to use benchmark data\n"
"Compare the queried branch's metrics against these top performers:\n"
"- If the queried branch's ROI is lower than benchmarks, recommend shifting budget to higher-ROI channels\n"
"- If a channel underperforms vs benchmarks, suggest reducing spend or optimizing it\n"
"- Cite SPECIFIC numbers: 'Your Email ROI is 2.29 vs the top performer's 2.50'\n"
"- Be concrete: 'Shift $X from Facebook to Email based on the ROI difference'"
)
# RAG chunks (Phase 2)
if rag_chunks:
parts.append("\n## Historical Business Context (retrieved from intelligence layer)")
if strict_data_instruction:
parts.append("⚠️ ONLY use historical context that is about branches/entities in the Data Context above. IGNORE any historical context about other branches.")
parts.append(
"\n## Historical Business Context (retrieved from intelligence layer)"
)
parts.append(
"⚠️ ONLY use historical context that is about branches/entities in the Data Context above. IGNORE any historical context about other branches."
)
for i, chunk in enumerate(rag_chunks, 1):
sim = chunk.get("similarity", 0)
parts.append(f"{i}. {chunk['text']} (relevance: {sim:.2f})")
parts.append("\nBase suggestions on historical context. Cite specific data points.")
# Retry instructions
if retry_issues:
@@ -173,12 +202,82 @@ Use bullet points, bold key numbers, and keep it concise."""
parts.append("Your previous response had these problems. Fix them:")
for issue in retry_issues:
parts.append(f"- {issue}")
parts.append("Be more precise. Only state facts supported by the data above.")
parts.append("Do NOT introduce any new branches, cities, or figures that are not in the Data Context.")
parts.append(
"Be more precise. Only state facts supported by the data above."
)
parts.append(
"Do NOT introduce any new branches, cities, or figures that are not in the Data Context."
)
# User question
parts.append(f"\n## User Question\n{question}")
parts.append("\nProvide a professional, data-grounded response. Cite specific numbers from the data.")
# Response quality instructions
parts.append(
"\n## Response Quality Rules\n"
"1. ALWAYS cite specific numbers from the Data Context (e.g., '$29,941 revenue', 'ROI of 2.29')\n"
"2. When comparing channels or branches, use EXACT figures from the data — never round unless using ~\n"
"3. For recommendations, reference specific metrics: 'Email has ROI 2.29 vs Facebook's 2.06 — consider reallocating budget'\n"
"4. Structure your answer with clear sections: Data Summary → Analysis → Recommendations\n"
"5. Do NOT give generic advice — every recommendation must tie to a specific data point\n"
"6. Do NOT mention branches, cities, or figures that are not in the Data Context above\n"
"7. Keep the response concise but data-dense — prefer bullet points over paragraphs"
)
return "\n".join(parts)
def _compute_summary(self, df: pd.DataFrame) -> str:
"""Compute summary statistics from a DataFrame to help the LLM cite precise numbers."""
if df is None or len(df) == 0:
return ""
lines = []
numeric_cols = df.select_dtypes(include=["number"]).columns.tolist()
# Total row
totals = {}
for col in numeric_cols:
total = df[col].sum()
if total != 0:
totals[col] = total
if totals:
total_parts = []
for col, val in totals.items():
if val >= 1_000_000:
total_parts.append(f"Total {col}: ${val / 1_000_000:.2f}M")
elif val >= 1_000:
total_parts.append(f"Total {col}: ${val:,.2f}")
else:
total_parts.append(f"Total {col}: {val:,.0f}")
lines.append(" | ".join(total_parts))
# ROI if revenue and spend columns exist
rev_col = next((c for c in numeric_cols if "revenue" in c.lower()), None)
spend_col = next((c for c in numeric_cols if "spend" in c.lower()), None)
if rev_col and spend_col:
total_rev = df[rev_col].sum()
total_spend = df[spend_col].sum()
if total_spend > 0:
lines.append(f"Overall ROI: {total_rev / total_spend:.2f}")
# Per-row highlights (top/bottom)
if rev_col and len(df) > 1:
idx_max = df[rev_col].idxmax()
idx_min = df[rev_col].idxmin()
label_col = None
for candidate in ["branch", "channel", "country", "name"]:
if candidate in df.columns:
label_col = candidate
break
if label_col:
top = df.loc[idx_max]
bot = df.loc[idx_min]
lines.append(
f"Highest {rev_col}: {top[label_col]} (${top[rev_col]:,.2f})"
)
lines.append(
f"Lowest {rev_col}: {bot[label_col]} (${bot[rev_col]:,.2f})"
)
return "\n".join(lines) if lines else ""
+26 -19
View File
@@ -118,7 +118,9 @@ class Orchestrator:
qa_threshold = client_config.hallucination_threshold
if supplementary_context is not None and len(supplementary_context) > 0:
qa_threshold = min(qa_threshold, 0.5)
logger.info(f"Using relaxed QA threshold ({qa_threshold}) for enriched context")
logger.info(
f"Using relaxed QA threshold ({qa_threshold}) for enriched context"
)
best_response = None
best_score = 0.0
@@ -128,23 +130,23 @@ class Orchestrator:
for attempt in range(MAX_RETRIES + 1):
retry_issues = qa_result["issues"] if attempt > 0 else None
# On retry, add explicit data-only instruction to prevent hallucination
strict_data_instruction = None
if attempt > 0:
if supplementary_context is not None and len(supplementary_context) > 0:
strict_data_instruction = (
"CRITICAL: Only use data from the Data Context and Benchmark Data "
"sections provided. Do NOT invent figures or branch names that are "
"not present in either of those sections. You MAY reference benchmark "
"branches for comparison and recommendations."
)
else:
strict_data_instruction = (
"CRITICAL: Do NOT mention any branches, figures, or historical data "
"that are not in the SQL query result provided. Stick strictly to the "
"data. If historical context from RAG is about different branches than "
"what the query returned, IGNORE that context entirely."
)
# Always provide strict data grounding instruction to prevent
# the Gen Agent from hallucinating branch/figure data from RAG
# chunks that don't match the actual SQL query results.
if supplementary_context is not None and len(supplementary_context) > 0:
strict_data_instruction = (
"CRITICAL: Only use data from the Data Context and Benchmark Data "
"sections provided. Do NOT invent figures or branch names that are "
"not present in either of those sections. You MAY reference benchmark "
"branches for comparison and recommendations."
)
else:
strict_data_instruction = (
"CRITICAL: Do NOT mention any branches, figures, or historical data "
"that are not in the SQL query result provided. Stick strictly to the "
"data. If historical context from RAG is about different branches than "
"what the query returned, IGNORE that context entirely."
)
response = self.gen_agent.generate(
question=message.text,
@@ -155,6 +157,7 @@ class Orchestrator:
retry_count=attempt,
strict_data_instruction=strict_data_instruction,
supplementary_context=supplementary_context,
sql=sql,
)
qa_result = self.qa_agent.evaluate(
@@ -163,6 +166,7 @@ class Orchestrator:
threshold=qa_threshold,
supplementary_context=supplementary_context,
user_question=message.text,
sql=sql,
)
# Track best response (prefer longer, richer responses over "no data" stubs)
@@ -256,7 +260,9 @@ class Orchestrator:
top_performers = db.execute_query(enrichment_sql, (client_id,))
if top_performers is not None and len(top_performers) > 0:
logger.info(f"Enrichment: fetched {len(top_performers)} top performer rows")
logger.info(
f"Enrichment: fetched {len(top_performers)} top performer rows"
)
return top_performers
except Exception as e:
@@ -273,6 +279,7 @@ class Orchestrator:
"""Log interaction for monitoring."""
try:
from rag.monitoring import log_interaction
log_interaction(
client_id=client_config.client_id,
query=message.text,
+121 -7
View File
@@ -9,11 +9,12 @@ Threshold from client YAML hallucination_threshold (default 0.75).
import json
import logging
import re
from typing import Optional, List, Dict
import pandas as pd
from config.llm_client import get_llm_client, get_model_name
from config.llm_client import get_llm_client, get_model_name, chat_with_retry
logger = logging.getLogger(__name__)
@@ -32,7 +33,9 @@ Your job: verify that the response ONLY contains claims supported by the provide
### 1. Branch Name Validation (CRITICAL)
- Extract ALL branch/city names mentioned in the response
- Compare against the branch names in the Data Context above
- If ANY branch name appears in the response but NOT in the Data Context, this is a HALLUCINATION
- Branch/entity names listed under "Valid Entities from User Question" are VALID even if not listed in query results
- Branch/entity names listed under "Branches/entities filtered in SQL WHERE clause" are VALID even if not in result rows (e.g., if SQL has WHERE branch = 'X', then 'X' is valid context)
- If ANY branch name appears in the response but NOT in the Data Context, the valid-entities list, or the SQL WHERE clause filters, this is a HALLUCINATION
- Deduct 0.3 from score for EACH unrelated branch mentioned
### 2. Numerical Accuracy (CRITICAL)
@@ -83,6 +86,7 @@ class QAAgent:
threshold: float = 0.75,
supplementary_context: Optional[pd.DataFrame] = None,
user_question: str = "",
sql: Optional[str] = None,
) -> Dict:
"""
Evaluate a response for faithfulness.
@@ -93,6 +97,7 @@ class QAAgent:
threshold: Minimum score to pass (from client YAML)
supplementary_context: Benchmark data (top performers) that is also valid ground truth
user_question: The user's original question (entities mentioned here are valid context)
sql: The SQL query that produced the data context (branch/entity filters are valid context)
Returns:
Dict with score (float), passed (bool), issues (list[str])
@@ -103,6 +108,20 @@ class QAAgent:
else:
data_str = "No structured data available."
# Include the SQL query so QA understands what filters were applied
# (e.g., branch names in WHERE clause are valid context even if not in result rows)
if sql:
data_str += (
f"\n\n### SQL Query (defines the data scope)\n```sql\n{sql}\n```"
)
# Extract branch/entity filters from SQL WHERE clause
where_branches = self._extract_where_entities(sql)
if where_branches:
data_str += (
f"\nBranches/entities filtered in SQL WHERE clause (VALID context): "
f"{', '.join(sorted(where_branches))}"
)
# Include supplementary (benchmark) context as valid ground truth
if supplementary_context is not None and len(supplementary_context) > 0:
data_str += "\n\n### Benchmark Data (also valid ground truth)\n"
@@ -110,7 +129,16 @@ class QAAgent:
# Include user question so QA knows which entities are valid context
if user_question:
data_str += f"\n\n### User Question Context\nThe user asked: \"{user_question}\"\nBranch/entity names mentioned in the user's question are valid to reference in the response."
entities = self._extract_entities(user_question)
if entities:
entity_list = ", ".join(sorted(entities))
else:
entity_list = "(none)"
data_str += (
"\n\n### User Question Context\n"
f'The user asked: "{user_question}"\n'
f"Valid Entities from User Question: {entity_list}"
)
prompt = EVAL_PROMPT.format(
data_context=data_str,
@@ -119,10 +147,14 @@ class QAAgent:
)
try:
result = self.client.chat.completions.create(
result = chat_with_retry(
self.client,
model=self.model,
messages=[
{"role": "system", "content": "You are a strict QA evaluator. Return only valid JSON. Pay special attention to branch names and figures that appear in the response but NOT in the data context — these are hallucinations."},
{
"role": "system",
"content": "You are a strict QA evaluator. Return only valid JSON. Pay special attention to branch names and figures that appear in the response but NOT in the data context — these are hallucinations.",
},
{"role": "user", "content": prompt},
],
temperature=0.1,
@@ -140,7 +172,11 @@ class QAAgent:
except Exception as e:
logger.error(f"QA evaluation failed: {e}")
# On failure, pass with warning
return {"score": 0.5, "passed": True, "issues": [f"QA evaluation error: {str(e)}"]}
return {
"score": 0.5,
"passed": True,
"issues": [f"QA evaluation error: {str(e)}"],
}
def _parse_response(self, raw: str, threshold: float) -> Dict:
"""Parse JSON response from QA LLM call."""
@@ -162,4 +198,82 @@ class QAAgent:
}
except (json.JSONDecodeError, ValueError) as e:
logger.warning(f"Could not parse QA response: {e}. Raw: {raw[:200]}")
return {"score": 0.5, "passed": True, "issues": ["QA response parsing failed"]}
return {
"score": 0.5,
"passed": True,
"issues": ["QA response parsing failed"],
}
def _extract_where_entities(self, sql: str) -> List[str]:
"""Extract branch/city entity names from SQL WHERE clause filters."""
if not sql:
return []
entities = set()
# Match patterns like: branch = 'Seattle', city = 'Toronto'
for match in re.finditer(
r"(?:branch|city|country)\s*=\s*'([^']+)'",
sql,
re.IGNORECASE,
):
val = match.group(1).strip()
if val and len(val) > 1:
entities.add(val)
# Also handle IN ('val1', 'val2') patterns
for match in re.finditer(
r"(?:branch|city|country)\s+IN\s*\(([^)]+)\)",
sql,
re.IGNORECASE,
):
for val in re.findall(r"'([^']+)'", match.group(1)):
if val and len(val) > 1:
entities.add(val)
return list(entities)
def _extract_entities(self, text: str) -> List[str]:
"""Extract likely branch/city entities from a user question."""
if not text:
return []
lowered = text.lower()
patterns = [
r"\bbranch\s+([a-z][a-z\s\-']{1,60})",
r"\bin\s+([a-z][a-z\s\-']{1,60})",
r"\bfor\s+the\s+([a-z][a-z\s\-']{1,60})\s+branch",
]
stops = {
"the",
"a",
"an",
"my",
"our",
"this",
"that",
"these",
"those",
"branch",
"branches",
"revenue",
"sales",
"roi",
"profit",
"performance",
}
entities = set()
for pattern in patterns:
for match in re.findall(pattern, lowered):
candidate = match.strip(" .,!?:;\"'")
candidate = " ".join(candidate.split())
if not candidate:
continue
if candidate in stops:
continue
if any(word in stops for word in candidate.split()):
candidate = " ".join(w for w in candidate.split() if w not in stops)
candidate = candidate.strip()
if len(candidate) < 2:
continue
entities.add(candidate.title())
return list(entities)
+29 -13
View File
@@ -12,7 +12,7 @@ import logging
from datetime import datetime
from typing import Optional
from config.llm_client import get_llm_client, get_model_name
from config.llm_client import get_llm_client, get_model_name, chat_with_retry
from config.client_loader import ClientConfig
from config.settings import get_settings
from skills.web_search import web_search
@@ -99,8 +99,7 @@ class ScoutAgent:
# Format results for LLM
results_text = "\n\n".join(
f"**{r['title']}** ({r['url']})\n{r['content']}"
for r in all_results
f"**{r['title']}** ({r['url']})\n{r['content']}" for r in all_results
)
# Summarize with Groq
@@ -113,10 +112,14 @@ class ScoutAgent:
)
try:
response = self.client.chat.completions.create(
response = chat_with_retry(
self.client,
model=self.model,
messages=[
{"role": "system", "content": "You are a business intelligence scout."},
{
"role": "system",
"content": "You are a business intelligence scout.",
},
{"role": "user", "content": prompt},
],
temperature=0.3,
@@ -126,11 +129,15 @@ class ScoutAgent:
result = response.choices[0].message.content.strip()
if result == "NO_RELEVANT_NEWS":
logger.info(f"[{client_config.client_id}] Scout: no relevant news found")
logger.info(
f"[{client_config.client_id}] Scout: no relevant news found"
)
return None
section = f"## 🔭 Market Intelligence\n\n{result}"
logger.info(f"[{client_config.client_id}] Scout: generated intelligence section")
logger.info(
f"[{client_config.client_id}] Scout: generated intelligence section"
)
return section
except Exception as e:
@@ -157,12 +164,18 @@ class ScoutAgent:
scout_config = client_config.scout
# Search with the user's query directly
results = web_search(query, max_results=5, lookback_days=scout_config.news_lookback_days)
results = web_search(
query, max_results=5, lookback_days=scout_config.news_lookback_days
)
# Also search with competitor names if they appear in the query
for competitor in scout_config.competitors:
if competitor.lower() in query.lower():
extra = web_search(f"{competitor} latest news", max_results=3, lookback_days=scout_config.news_lookback_days)
extra = web_search(
f"{competitor} latest news",
max_results=3,
lookback_days=scout_config.news_lookback_days,
)
results.extend(extra)
if not results:
@@ -179,8 +192,7 @@ class ScoutAgent:
# Format results for LLM
results_text = "\n\n".join(
f"**{r['title']}** ({r['url']})\n{r['content']}"
for r in unique_results
f"**{r['title']}** ({r['url']})\n{r['content']}" for r in unique_results
)
prompt = QUERY_PROMPT.format(
@@ -192,10 +204,14 @@ class ScoutAgent:
)
try:
response = self.client.chat.completions.create(
response = chat_with_retry(
self.client,
model=self.model,
messages=[
{"role": "system", "content": "You are a business intelligence scout."},
{
"role": "system",
"content": "You are a business intelligence scout.",
},
{"role": "user", "content": prompt},
],
temperature=0.3,