mirror of
https://github.com/Manoj-HV30/clawrity.git
synced 2026-05-16 19:35:21 +00:00
prototype
This commit is contained in:
@@ -0,0 +1,184 @@
|
||||
"""
|
||||
Clawrity — Gen Agent
|
||||
|
||||
Generates newsletter-style, data-grounded responses using LLM.
|
||||
Supports NVIDIA NIM and Groq via OpenAI-compatible API.
|
||||
Temperature 0.7 (reduced by 0.2 on each retry).
|
||||
Augmented with SOUL.md + live query results + RAG chunks (Phase 2).
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Optional, Dict
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from config.llm_client import get_llm_client, get_model_name
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GenAgent:
|
||||
"""Response generation agent using LLM (NVIDIA NIM or Groq)."""
|
||||
|
||||
def __init__(self):
|
||||
self.client = get_llm_client()
|
||||
self.model = get_model_name()
|
||||
self.base_temperature = 0.7
|
||||
|
||||
def generate(
|
||||
self,
|
||||
question: str,
|
||||
soul_content: str,
|
||||
data_context: Optional[pd.DataFrame] = None,
|
||||
rag_chunks: Optional[List[Dict]] = None,
|
||||
retry_issues: Optional[List[str]] = None,
|
||||
retry_count: int = 0,
|
||||
strict_data_instruction: Optional[str] = None,
|
||||
supplementary_context: Optional[pd.DataFrame] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Generate a data-grounded response.
|
||||
|
||||
Args:
|
||||
question: User's original question
|
||||
soul_content: SOUL.md content for personality/rules
|
||||
data_context: DataFrame from PostgreSQL query results
|
||||
rag_chunks: Retrieved chunks with similarity scores (Phase 2)
|
||||
retry_issues: QA Agent issues from previous attempt
|
||||
retry_count: Current retry number (0-2)
|
||||
|
||||
Returns:
|
||||
Markdown-formatted response string
|
||||
"""
|
||||
temperature = max(0.1, self.base_temperature - (retry_count * 0.2))
|
||||
|
||||
prompt = self._build_prompt(
|
||||
question, soul_content, data_context, rag_chunks, retry_issues,
|
||||
strict_data_instruction, supplementary_context,
|
||||
)
|
||||
|
||||
try:
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{"role": "system", "content": soul_content},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
temperature=temperature,
|
||||
max_tokens=2048,
|
||||
)
|
||||
result = response.choices[0].message.content.strip()
|
||||
logger.info(
|
||||
f"Gen Agent produced {len(result)} chars "
|
||||
f"(temp={temperature}, retry={retry_count})"
|
||||
)
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Gen Agent failed: {e}")
|
||||
return f"I encountered an error generating your response. Please try again."
|
||||
|
||||
def generate_digest(
|
||||
self,
|
||||
soul_content: str,
|
||||
data_context: pd.DataFrame,
|
||||
rag_chunks: Optional[List[Dict]] = None,
|
||||
) -> str:
|
||||
"""Generate a daily digest newsletter."""
|
||||
prompt = f"""Generate a professional daily business intelligence digest.
|
||||
|
||||
## Performance Data (Last 7 Days)
|
||||
{data_context.to_markdown(index=False) if data_context is not None and len(data_context) > 0 else "No data available."}
|
||||
|
||||
"""
|
||||
if rag_chunks:
|
||||
prompt += "## Historical Context\n"
|
||||
for i, chunk in enumerate(rag_chunks, 1):
|
||||
sim = chunk.get("similarity", 0)
|
||||
prompt += f"{i}. {chunk['text']} (relevance: {sim:.2f})\n"
|
||||
prompt += "\n"
|
||||
|
||||
prompt += """Format as a newsletter with:
|
||||
1. **Executive Summary** — key highlights in 2-3 sentences
|
||||
2. **Top Performers** — best performing branches
|
||||
3. **Attention Required** — bottom 3 branches by revenue (ALWAYS include this)
|
||||
4. **Channel Insights** — spending efficiency across channels
|
||||
5. **Recommendations** — specific, data-backed suggestions
|
||||
|
||||
Use bullet points, bold key numbers, and keep it concise."""
|
||||
|
||||
try:
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{"role": "system", "content": soul_content},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
temperature=0.7,
|
||||
max_tokens=3000,
|
||||
)
|
||||
return response.choices[0].message.content.strip()
|
||||
except Exception as e:
|
||||
logger.error(f"Digest generation failed: {e}")
|
||||
return "Daily digest generation encountered an error."
|
||||
|
||||
def _build_prompt(
|
||||
self,
|
||||
question: str,
|
||||
soul_content: str,
|
||||
data_context: Optional[pd.DataFrame],
|
||||
rag_chunks: Optional[List[Dict]],
|
||||
retry_issues: Optional[List[str]],
|
||||
strict_data_instruction: Optional[str] = None,
|
||||
supplementary_context: Optional[pd.DataFrame] = None,
|
||||
) -> str:
|
||||
"""Build the augmented prompt for response generation."""
|
||||
parts = []
|
||||
|
||||
# Strict data instruction (on retry — prevents hallucination)
|
||||
if strict_data_instruction:
|
||||
parts.append(f"## ⚠️ STRICT REQUIREMENT\n{strict_data_instruction}\n")
|
||||
|
||||
# Data context
|
||||
if data_context is not None and len(data_context) > 0:
|
||||
parts.append("## Data Context (query results for the user's question)")
|
||||
parts.append(data_context.to_markdown(index=False))
|
||||
else:
|
||||
parts.append("## Data Context\nNo query results available.")
|
||||
|
||||
# Supplementary context (top performers for comparison)
|
||||
if supplementary_context is not None and len(supplementary_context) > 0:
|
||||
parts.append("\n## Benchmark Data (top-performing branches for comparison)")
|
||||
parts.append(supplementary_context.to_markdown(index=False))
|
||||
parts.append(
|
||||
"\nUse this benchmark data to compare the queried branch's performance "
|
||||
"against top performers. Identify which channels and strategies work "
|
||||
"best, and recommend specific, actionable improvements based on what "
|
||||
"top-performing branches are doing differently."
|
||||
)
|
||||
|
||||
# RAG chunks (Phase 2)
|
||||
if rag_chunks:
|
||||
parts.append("\n## Historical Business Context (retrieved from intelligence layer)")
|
||||
if strict_data_instruction:
|
||||
parts.append("⚠️ ONLY use historical context that is about branches/entities in the Data Context above. IGNORE any historical context about other branches.")
|
||||
for i, chunk in enumerate(rag_chunks, 1):
|
||||
sim = chunk.get("similarity", 0)
|
||||
parts.append(f"{i}. {chunk['text']} (relevance: {sim:.2f})")
|
||||
parts.append("\nBase suggestions on historical context. Cite specific data points.")
|
||||
|
||||
# Retry instructions
|
||||
if retry_issues:
|
||||
parts.append("\n## IMPORTANT — Previous Response Issues")
|
||||
parts.append("Your previous response had these problems. Fix them:")
|
||||
for issue in retry_issues:
|
||||
parts.append(f"- {issue}")
|
||||
parts.append("Be more precise. Only state facts supported by the data above.")
|
||||
parts.append("Do NOT introduce any new branches, cities, or figures that are not in the Data Context.")
|
||||
|
||||
# User question
|
||||
parts.append(f"\n## User Question\n{question}")
|
||||
|
||||
parts.append("\nProvide a professional, data-grounded response. Cite specific numbers from the data.")
|
||||
|
||||
return "\n".join(parts)
|
||||
@@ -0,0 +1,294 @@
|
||||
"""
|
||||
Clawrity — Orchestrator
|
||||
|
||||
Coordinates the full message pipeline:
|
||||
NormalisedMessage → NL-to-SQL → PostgreSQL → (RAG Retriever) → Gen Agent → QA Agent → Response
|
||||
|
||||
Max 2 retries per query. Returns best attempt with confidence warning after max retries.
|
||||
|
||||
Context enrichment: when a query returns sparse data (≤3 rows) and the question
|
||||
asks for recommendations, automatically pulls top-performing branches as comparison
|
||||
context so the Gen Agent can give actionable suggestions.
|
||||
"""
|
||||
|
||||
import re
|
||||
import logging
|
||||
import time
|
||||
from typing import Dict, Optional, List
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from agents.gen_agent import GenAgent
|
||||
from agents.qa_agent import QAAgent
|
||||
from channels.protocol_adapter import NormalisedMessage
|
||||
from config.client_loader import ClientConfig
|
||||
from skills.nl_to_sql import NLToSQL
|
||||
from skills.postgres_connector import get_connector
|
||||
from soul.soul_loader import load_soul
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MAX_RETRIES = 2
|
||||
|
||||
# Keywords that signal the user wants recommendations, not just raw data
|
||||
_RECOMMENDATION_KEYWORDS = re.compile(
|
||||
r"\b(improve|increase|boost|grow|fix|help|recommend|suggest|advice|strategy|"
|
||||
r"what (should|can|do)|how (to|can|do|should))\b",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
class Orchestrator:
|
||||
"""Pipeline orchestrator — the central brain of Clawrity."""
|
||||
|
||||
def __init__(self):
|
||||
self.nl_to_sql = NLToSQL()
|
||||
self.gen_agent = GenAgent()
|
||||
self.qa_agent = QAAgent()
|
||||
self.retriever = None # Set in Phase 2 via set_retriever()
|
||||
|
||||
def set_retriever(self, retriever):
|
||||
"""Attach the RAG retriever (Phase 2)."""
|
||||
self.retriever = retriever
|
||||
|
||||
async def process(
|
||||
self,
|
||||
message: NormalisedMessage,
|
||||
client_config: ClientConfig,
|
||||
) -> Dict:
|
||||
"""
|
||||
Process a user message through the full pipeline.
|
||||
|
||||
Returns:
|
||||
Dict with: response, qa_score, qa_passed, retries, metadata
|
||||
"""
|
||||
start_time = time.time()
|
||||
db = get_connector()
|
||||
|
||||
# Load SOUL
|
||||
soul_content = load_soul(client_config)
|
||||
|
||||
# Step 1: NL-to-SQL
|
||||
schema_meta = db.get_spend_data_schema(client_config.client_id)
|
||||
sql = self.nl_to_sql.generate_sql(
|
||||
question=message.text,
|
||||
client_id=client_config.client_id,
|
||||
schema_metadata=schema_meta,
|
||||
)
|
||||
|
||||
# Step 2: Execute SQL
|
||||
data_context = None
|
||||
if sql:
|
||||
try:
|
||||
data_context = db.execute_query(sql)
|
||||
logger.info(f"SQL returned {len(data_context)} rows")
|
||||
except Exception as e:
|
||||
logger.error(f"SQL execution failed: {e}")
|
||||
data_context = pd.DataFrame()
|
||||
else:
|
||||
data_context = pd.DataFrame()
|
||||
|
||||
# Step 2b: Context enrichment for sparse results
|
||||
# When data is sparse and the user wants recommendations, pull
|
||||
# top performers and channel benchmarks as supplementary context
|
||||
supplementary_context = None
|
||||
if self._needs_enrichment(message.text, data_context):
|
||||
supplementary_context = self._enrich_context(
|
||||
db, client_config.client_id, message.text, data_context
|
||||
)
|
||||
if supplementary_context is not None:
|
||||
logger.info(
|
||||
f"Context enriched: {len(supplementary_context)} supplementary rows"
|
||||
)
|
||||
|
||||
# Step 3: RAG Retrieval (Phase 2)
|
||||
rag_chunks = None
|
||||
if self.retriever:
|
||||
try:
|
||||
rag_chunks = self.retriever.retrieve(
|
||||
query=message.text,
|
||||
client_id=client_config.client_id,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"RAG retrieval failed: {e}")
|
||||
|
||||
# Step 4: Gen Agent → QA Agent loop (max 2 retries)
|
||||
# When supplementary context is provided (enrichment mode), use a relaxed
|
||||
# QA threshold since the response naturally references broader benchmark data
|
||||
qa_threshold = client_config.hallucination_threshold
|
||||
if supplementary_context is not None and len(supplementary_context) > 0:
|
||||
qa_threshold = min(qa_threshold, 0.5)
|
||||
logger.info(f"Using relaxed QA threshold ({qa_threshold}) for enriched context")
|
||||
|
||||
best_response = None
|
||||
best_score = 0.0
|
||||
qa_result = {"score": 0, "passed": False, "issues": []}
|
||||
retries = 0
|
||||
|
||||
for attempt in range(MAX_RETRIES + 1):
|
||||
retry_issues = qa_result["issues"] if attempt > 0 else None
|
||||
|
||||
# On retry, add explicit data-only instruction to prevent hallucination
|
||||
strict_data_instruction = None
|
||||
if attempt > 0:
|
||||
if supplementary_context is not None and len(supplementary_context) > 0:
|
||||
strict_data_instruction = (
|
||||
"CRITICAL: Only use data from the Data Context and Benchmark Data "
|
||||
"sections provided. Do NOT invent figures or branch names that are "
|
||||
"not present in either of those sections. You MAY reference benchmark "
|
||||
"branches for comparison and recommendations."
|
||||
)
|
||||
else:
|
||||
strict_data_instruction = (
|
||||
"CRITICAL: Do NOT mention any branches, figures, or historical data "
|
||||
"that are not in the SQL query result provided. Stick strictly to the "
|
||||
"data. If historical context from RAG is about different branches than "
|
||||
"what the query returned, IGNORE that context entirely."
|
||||
)
|
||||
|
||||
response = self.gen_agent.generate(
|
||||
question=message.text,
|
||||
soul_content=soul_content,
|
||||
data_context=data_context,
|
||||
rag_chunks=rag_chunks,
|
||||
retry_issues=retry_issues,
|
||||
retry_count=attempt,
|
||||
strict_data_instruction=strict_data_instruction,
|
||||
supplementary_context=supplementary_context,
|
||||
)
|
||||
|
||||
qa_result = self.qa_agent.evaluate(
|
||||
response=response,
|
||||
data_context=data_context,
|
||||
threshold=qa_threshold,
|
||||
supplementary_context=supplementary_context,
|
||||
user_question=message.text,
|
||||
)
|
||||
|
||||
# Track best response (prefer longer, richer responses over "no data" stubs)
|
||||
if qa_result["score"] > best_score or (
|
||||
qa_result["score"] == best_score
|
||||
and best_response is not None
|
||||
and len(response) > len(best_response)
|
||||
):
|
||||
best_score = qa_result["score"]
|
||||
best_response = response
|
||||
|
||||
if qa_result["passed"]:
|
||||
logger.info(f"QA passed on attempt {attempt + 1}")
|
||||
break
|
||||
else:
|
||||
retries += 1
|
||||
logger.warning(
|
||||
f"QA failed on attempt {attempt + 1}: "
|
||||
f"score={qa_result['score']:.2f}, issues={qa_result['issues']}"
|
||||
)
|
||||
|
||||
# If max retries exceeded, use best response with confidence warning
|
||||
final_response = best_response or response
|
||||
if not qa_result["passed"] and retries >= MAX_RETRIES:
|
||||
final_response += (
|
||||
"\n\n---\n"
|
||||
f"⚠️ *Confidence: {best_score:.0%} — "
|
||||
f"This response may contain approximations. "
|
||||
f"Please verify critical numbers against your source data.*"
|
||||
)
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
result = {
|
||||
"response": final_response,
|
||||
"qa_score": best_score,
|
||||
"qa_passed": qa_result["passed"],
|
||||
"retries": retries,
|
||||
"sql": sql,
|
||||
"data_rows": len(data_context) if data_context is not None else 0,
|
||||
"rag_chunks_used": len(rag_chunks) if rag_chunks else 0,
|
||||
"elapsed_seconds": round(elapsed, 2),
|
||||
}
|
||||
|
||||
# Log interaction
|
||||
self._log_interaction(message, client_config, result)
|
||||
|
||||
return result
|
||||
|
||||
def _needs_enrichment(
|
||||
self,
|
||||
question: str,
|
||||
data_context: Optional[pd.DataFrame],
|
||||
) -> bool:
|
||||
"""Check if the query result is too sparse for a recommendation question."""
|
||||
# Only enrich if data is sparse
|
||||
if data_context is not None and len(data_context) > 3:
|
||||
return False
|
||||
|
||||
# Only enrich if user is asking for recommendations/improvement
|
||||
return bool(_RECOMMENDATION_KEYWORDS.search(question))
|
||||
|
||||
def _enrich_context(
|
||||
self,
|
||||
db,
|
||||
client_id: str,
|
||||
question: str,
|
||||
data_context: Optional[pd.DataFrame],
|
||||
) -> Optional[pd.DataFrame]:
|
||||
"""
|
||||
Pull supplementary context: top-performing branches and channel
|
||||
benchmarks to help Gen Agent give actionable recommendations.
|
||||
"""
|
||||
try:
|
||||
# Get top 5 branches by ROI for comparison
|
||||
enrichment_sql = """
|
||||
SELECT branch, country, channel,
|
||||
SUM(spend) as total_spend,
|
||||
SUM(revenue) as total_revenue,
|
||||
SUM(leads) as total_leads,
|
||||
SUM(conversions) as total_conversions,
|
||||
ROUND((SUM(revenue)/NULLIF(SUM(spend),0))::numeric, 2) as roi
|
||||
FROM spend_data
|
||||
WHERE client_id = %s
|
||||
AND date >= CURRENT_DATE - INTERVAL '90 days'
|
||||
GROUP BY branch, country, channel
|
||||
HAVING SUM(spend) > 0
|
||||
ORDER BY roi DESC
|
||||
LIMIT 10
|
||||
"""
|
||||
top_performers = db.execute_query(enrichment_sql, (client_id,))
|
||||
|
||||
if top_performers is not None and len(top_performers) > 0:
|
||||
logger.info(f"Enrichment: fetched {len(top_performers)} top performer rows")
|
||||
return top_performers
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Context enrichment failed: {e}")
|
||||
|
||||
return None
|
||||
|
||||
def _log_interaction(
|
||||
self,
|
||||
message: NormalisedMessage,
|
||||
client_config: ClientConfig,
|
||||
result: Dict,
|
||||
):
|
||||
"""Log interaction for monitoring."""
|
||||
try:
|
||||
from rag.monitoring import log_interaction
|
||||
log_interaction(
|
||||
client_id=client_config.client_id,
|
||||
query=message.text,
|
||||
num_chunks=result.get("rag_chunks_used", 0),
|
||||
chunk_types_used=[], # Populated when retriever provides this info
|
||||
qa_score=result.get("qa_score", 0),
|
||||
qa_passed=result.get("qa_passed", False),
|
||||
retries=result.get("retries", 0),
|
||||
response_length=len(result.get("response", "")),
|
||||
elapsed_seconds=result.get("elapsed_seconds", 0),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f"Monitoring log failed: {e}")
|
||||
|
||||
logger.info(
|
||||
f"[{client_config.client_id}] Query processed: "
|
||||
f"score={result['qa_score']:.2f}, passed={result['qa_passed']}, "
|
||||
f"retries={result['retries']}, time={result['elapsed_seconds']}s"
|
||||
)
|
||||
@@ -0,0 +1,165 @@
|
||||
"""
|
||||
Clawrity — QA Agent
|
||||
|
||||
Evaluates Gen Agent responses for faithfulness against data context.
|
||||
Uses Groq LLM at temperature 0.1 for strict, deterministic evaluation.
|
||||
Returns JSON: { score, passed, issues }
|
||||
Threshold from client YAML hallucination_threshold (default 0.75).
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Optional, List, Dict
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from config.llm_client import get_llm_client, get_model_name
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
EVAL_PROMPT = """You are a strict quality assurance evaluator for business intelligence responses.
|
||||
|
||||
Your job: verify that the response ONLY contains claims supported by the provided data.
|
||||
|
||||
## Data Context (ground truth)
|
||||
{data_context}
|
||||
|
||||
## Response to Evaluate
|
||||
{response}
|
||||
|
||||
## Evaluation Criteria
|
||||
|
||||
### 1. Branch Name Validation (CRITICAL)
|
||||
- Extract ALL branch/city names mentioned in the response
|
||||
- Compare against the branch names in the Data Context above
|
||||
- If ANY branch name appears in the response but NOT in the Data Context, this is a HALLUCINATION
|
||||
- Deduct 0.3 from score for EACH unrelated branch mentioned
|
||||
|
||||
### 2. Numerical Accuracy (CRITICAL)
|
||||
- ALL revenue, spend, lead, conversion, and ROI figures in the response must match the Data Context EXACTLY
|
||||
- If a number is mentioned that does not appear in the Data Context, deduct 0.2 from score
|
||||
- Rounded numbers are acceptable only if clearly approximate (e.g., "~$1.2M")
|
||||
|
||||
### 3. Historical Context Relevance
|
||||
- If the response includes historical context or trends, it is acceptable ONLY if it directly supports the answer about branches/entities present in the Data Context
|
||||
- Historical context about branches NOT in the current Data Context must be penalized: deduct 0.3 from score
|
||||
- Example: If Data Context shows Toronto, Vancouver, Dubai but response mentions "Lawton showed 16436% growth" — this is IRRELEVANT historical context and must be penalized
|
||||
|
||||
### 4. Completeness
|
||||
- Does the response address the user's question?
|
||||
- Are key data points from the Data Context included?
|
||||
|
||||
### 5. Appropriate Hedging
|
||||
- Does the response use uncertain language for inferences?
|
||||
- Recommendations should be clearly marked as suggestions, not facts
|
||||
|
||||
## Scoring
|
||||
Start at 1.0 and deduct points per the rules above. Minimum score is 0.0.
|
||||
|
||||
Return a JSON object with exactly this structure:
|
||||
{{
|
||||
"score": <float between 0.0 and 1.0>,
|
||||
"passed": <true if score >= {threshold}>,
|
||||
"issues": [<list of specific issues found, empty if none>]
|
||||
}}
|
||||
|
||||
IMPORTANT: If score < {threshold}, include in issues list exactly which branches, figures, or historical data were mentioned that do NOT appear in the Data Context. Format as:
|
||||
"Mentioned branches/figures not in current query result: [list them]"
|
||||
|
||||
Return ONLY the JSON. No other text."""
|
||||
|
||||
|
||||
class QAAgent:
|
||||
"""Quality assurance agent for validating Gen Agent responses."""
|
||||
|
||||
def __init__(self):
|
||||
self.client = get_llm_client()
|
||||
self.model = get_model_name()
|
||||
|
||||
def evaluate(
|
||||
self,
|
||||
response: str,
|
||||
data_context: Optional[pd.DataFrame] = None,
|
||||
threshold: float = 0.75,
|
||||
supplementary_context: Optional[pd.DataFrame] = None,
|
||||
user_question: str = "",
|
||||
) -> Dict:
|
||||
"""
|
||||
Evaluate a response for faithfulness.
|
||||
|
||||
Args:
|
||||
response: Gen Agent's response text
|
||||
data_context: The data the response should be grounded in
|
||||
threshold: Minimum score to pass (from client YAML)
|
||||
supplementary_context: Benchmark data (top performers) that is also valid ground truth
|
||||
user_question: The user's original question (entities mentioned here are valid context)
|
||||
|
||||
Returns:
|
||||
Dict with score (float), passed (bool), issues (list[str])
|
||||
"""
|
||||
data_str = ""
|
||||
if data_context is not None and len(data_context) > 0:
|
||||
data_str = data_context.to_markdown(index=False)
|
||||
else:
|
||||
data_str = "No structured data available."
|
||||
|
||||
# Include supplementary (benchmark) context as valid ground truth
|
||||
if supplementary_context is not None and len(supplementary_context) > 0:
|
||||
data_str += "\n\n### Benchmark Data (also valid ground truth)\n"
|
||||
data_str += supplementary_context.to_markdown(index=False)
|
||||
|
||||
# Include user question so QA knows which entities are valid context
|
||||
if user_question:
|
||||
data_str += f"\n\n### User Question Context\nThe user asked: \"{user_question}\"\nBranch/entity names mentioned in the user's question are valid to reference in the response."
|
||||
|
||||
prompt = EVAL_PROMPT.format(
|
||||
data_context=data_str,
|
||||
response=response,
|
||||
threshold=threshold,
|
||||
)
|
||||
|
||||
try:
|
||||
result = self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a strict QA evaluator. Return only valid JSON. Pay special attention to branch names and figures that appear in the response but NOT in the data context — these are hallucinations."},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
temperature=0.1,
|
||||
max_tokens=512,
|
||||
)
|
||||
|
||||
raw = result.choices[0].message.content.strip()
|
||||
evaluation = self._parse_response(raw, threshold)
|
||||
logger.info(
|
||||
f"QA evaluation: score={evaluation['score']:.2f}, "
|
||||
f"passed={evaluation['passed']}, issues={len(evaluation['issues'])}"
|
||||
)
|
||||
return evaluation
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"QA evaluation failed: {e}")
|
||||
# On failure, pass with warning
|
||||
return {"score": 0.5, "passed": True, "issues": [f"QA evaluation error: {str(e)}"]}
|
||||
|
||||
def _parse_response(self, raw: str, threshold: float) -> Dict:
|
||||
"""Parse JSON response from QA LLM call."""
|
||||
try:
|
||||
# Strip markdown code fences if present
|
||||
cleaned = raw.strip()
|
||||
if cleaned.startswith("```"):
|
||||
cleaned = cleaned.split("\n", 1)[1] if "\n" in cleaned else cleaned[3:]
|
||||
if cleaned.endswith("```"):
|
||||
cleaned = cleaned[:-3]
|
||||
cleaned = cleaned.strip()
|
||||
|
||||
data = json.loads(cleaned)
|
||||
score = float(data.get("score", 0.5))
|
||||
return {
|
||||
"score": score,
|
||||
"passed": score >= threshold,
|
||||
"issues": data.get("issues", []),
|
||||
}
|
||||
except (json.JSONDecodeError, ValueError) as e:
|
||||
logger.warning(f"Could not parse QA response: {e}. Raw: {raw[:200]}")
|
||||
return {"score": 0.5, "passed": True, "issues": ["QA response parsing failed"]}
|
||||
@@ -0,0 +1,214 @@
|
||||
"""
|
||||
Clawrity — Scout Agent
|
||||
|
||||
Fetches real-time competitor updates and sector-specific news.
|
||||
Runs inside HEARTBEAT digest job ONLY — never on ad-hoc /chat queries.
|
||||
Appends "Market Intelligence" section to morning digest.
|
||||
|
||||
If nothing relevant is found, the section is omitted entirely — no filler.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from config.llm_client import get_llm_client, get_model_name
|
||||
from config.client_loader import ClientConfig
|
||||
from config.settings import get_settings
|
||||
from skills.web_search import web_search
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SCOUT_PROMPT = """You are a business intelligence scout for {client_name}.
|
||||
Their sector: {sector}
|
||||
Their competitors: {competitors}
|
||||
|
||||
Below are web search results from the last {lookback} day(s).
|
||||
Extract ONLY what is directly relevant to this client's business.
|
||||
Ignore anything generic or unrelated to their sector.
|
||||
If nothing is relevant, respond with exactly: NO_RELEVANT_NEWS
|
||||
|
||||
Format relevant findings as a clean "Market Intelligence" section with bullet points.
|
||||
Each bullet should summarize one key finding with its source.
|
||||
|
||||
Results:
|
||||
{search_results}"""
|
||||
|
||||
QUERY_PROMPT = """You are a business intelligence scout for {client_name}.
|
||||
Sector: {sector}
|
||||
Competitors: {competitors}
|
||||
|
||||
The user asked: "{query}"
|
||||
|
||||
Below are web search results. Extract ONLY what is directly relevant to the
|
||||
user's question and this client's business context. Ignore generic or unrelated content.
|
||||
If nothing is relevant, respond with exactly: NO_RELEVANT_NEWS
|
||||
|
||||
Format findings as concise bullet points with sources.
|
||||
|
||||
Results:
|
||||
{search_results}"""
|
||||
|
||||
|
||||
class ScoutAgent:
|
||||
"""Competitor and sector intelligence agent."""
|
||||
|
||||
def __init__(self):
|
||||
self.client = get_llm_client()
|
||||
self.model = get_model_name()
|
||||
|
||||
async def gather_intelligence(
|
||||
self,
|
||||
client_config: ClientConfig,
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Fetch and summarize competitor/sector news for digest.
|
||||
|
||||
Args:
|
||||
client_config: Client config with scout section
|
||||
|
||||
Returns:
|
||||
Formatted "Market Intelligence" markdown section, or None if nothing relevant
|
||||
"""
|
||||
scout_config = client_config.scout
|
||||
if not scout_config.sector and not scout_config.competitors:
|
||||
logger.info(f"[{client_config.client_id}] No scout config — skipping")
|
||||
return None
|
||||
|
||||
lookback = scout_config.news_lookback_days
|
||||
today = datetime.now().strftime("%Y-%m-%d")
|
||||
|
||||
# Gather search results
|
||||
all_results = []
|
||||
|
||||
# Search for each competitor
|
||||
for competitor in scout_config.competitors:
|
||||
query = f"{competitor} latest news"
|
||||
results = web_search(query, max_results=3, lookback_days=lookback)
|
||||
all_results.extend(results)
|
||||
|
||||
# Search for sector keywords
|
||||
for keyword in scout_config.keywords[:3]: # Limit to 3 keywords
|
||||
query = f"{keyword} news {today}"
|
||||
results = web_search(query, max_results=3, lookback_days=lookback)
|
||||
all_results.extend(results)
|
||||
|
||||
if not all_results:
|
||||
logger.info(f"[{client_config.client_id}] No search results found")
|
||||
return None
|
||||
|
||||
# Format results for LLM
|
||||
results_text = "\n\n".join(
|
||||
f"**{r['title']}** ({r['url']})\n{r['content']}"
|
||||
for r in all_results
|
||||
)
|
||||
|
||||
# Summarize with Groq
|
||||
prompt = SCOUT_PROMPT.format(
|
||||
client_name=client_config.client_name,
|
||||
sector=scout_config.sector,
|
||||
competitors=", ".join(scout_config.competitors),
|
||||
lookback=lookback,
|
||||
search_results=results_text,
|
||||
)
|
||||
|
||||
try:
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a business intelligence scout."},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
temperature=0.3,
|
||||
max_tokens=1024,
|
||||
)
|
||||
|
||||
result = response.choices[0].message.content.strip()
|
||||
|
||||
if result == "NO_RELEVANT_NEWS":
|
||||
logger.info(f"[{client_config.client_id}] Scout: no relevant news found")
|
||||
return None
|
||||
|
||||
section = f"## 🔭 Market Intelligence\n\n{result}"
|
||||
logger.info(f"[{client_config.client_id}] Scout: generated intelligence section")
|
||||
return section
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Scout Agent failed: {e}")
|
||||
return None
|
||||
|
||||
async def search_query(
|
||||
self,
|
||||
client_config: ClientConfig,
|
||||
query: str,
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Run a targeted scout search for a specific user query.
|
||||
|
||||
Used by the /scout endpoint for ad-hoc competitor/news queries.
|
||||
|
||||
Args:
|
||||
client_config: Client config with scout section
|
||||
query: User's specific question about competitors/market
|
||||
|
||||
Returns:
|
||||
Formatted intelligence summary, or None if nothing relevant
|
||||
"""
|
||||
scout_config = client_config.scout
|
||||
|
||||
# Search with the user's query directly
|
||||
results = web_search(query, max_results=5, lookback_days=scout_config.news_lookback_days)
|
||||
|
||||
# Also search with competitor names if they appear in the query
|
||||
for competitor in scout_config.competitors:
|
||||
if competitor.lower() in query.lower():
|
||||
extra = web_search(f"{competitor} latest news", max_results=3, lookback_days=scout_config.news_lookback_days)
|
||||
results.extend(extra)
|
||||
|
||||
if not results:
|
||||
logger.info(f"[{client_config.client_id}] Scout query returned no results")
|
||||
return None
|
||||
|
||||
# Deduplicate by URL
|
||||
seen_urls = set()
|
||||
unique_results = []
|
||||
for r in results:
|
||||
if r["url"] not in seen_urls:
|
||||
seen_urls.add(r["url"])
|
||||
unique_results.append(r)
|
||||
|
||||
# Format results for LLM
|
||||
results_text = "\n\n".join(
|
||||
f"**{r['title']}** ({r['url']})\n{r['content']}"
|
||||
for r in unique_results
|
||||
)
|
||||
|
||||
prompt = QUERY_PROMPT.format(
|
||||
client_name=client_config.client_name,
|
||||
sector=scout_config.sector,
|
||||
competitors=", ".join(scout_config.competitors),
|
||||
query=query,
|
||||
search_results=results_text,
|
||||
)
|
||||
|
||||
try:
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a business intelligence scout."},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
temperature=0.3,
|
||||
max_tokens=1024,
|
||||
)
|
||||
|
||||
result = response.choices[0].message.content.strip()
|
||||
|
||||
if result == "NO_RELEVANT_NEWS":
|
||||
return None
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Scout query failed: {e}")
|
||||
return None
|
||||
Reference in New Issue
Block a user