prototype

This commit is contained in:
2026-05-04 22:00:38 +05:30
commit 711d691870
48 changed files with 5093 additions and 0 deletions
View File
+18
View File
@@ -0,0 +1,18 @@
# HEARTBEAT — ACME Corporation
## Schedule
- trigger: daily
- time: "08:00"
- timezone: "Asia/Kolkata"
## Digest Tasks
1. Pull last 7 days spend + revenue per branch
2. Identify bottom 3 performing branches by revenue
3. Generate newsletter-style summary via Gen Agent → QA Agent
4. Run Scout Agent for competitor + sector news
5. Append Market Intelligence section to digest
6. Push complete digest to Slack channel
## Retry
- on_failure: retry after 15 minutes
- max_retries: 3
+124
View File
@@ -0,0 +1,124 @@
"""
Clawrity — HEARTBEAT Loader
Parses HEARTBEAT.md files to extract schedule, digest tasks, and retry config.
HEARTBEAT.md drives autonomous daily digest generation per client.
"""
import re
import logging
from pathlib import Path
from typing import Optional, Dict, Any
from config.client_loader import ClientConfig
logger = logging.getLogger(__name__)
class HeartbeatConfig:
"""Parsed heartbeat configuration."""
def __init__(self):
self.trigger: str = "daily"
self.time: str = "08:00"
self.timezone: str = "UTC"
self.retry_delay_minutes: int = 15
self.max_retries: int = 3
self.tasks: list = []
self.raw_content: str = ""
@property
def hour(self) -> int:
"""Extract hour from time string."""
return int(self.time.split(":")[0])
@property
def minute(self) -> int:
"""Extract minute from time string."""
return int(self.time.split(":")[1])
def load_heartbeat(client_config: ClientConfig) -> HeartbeatConfig:
"""
Load and parse the HEARTBEAT.md file for a client.
Args:
client_config: The client's configuration containing heartbeat_file path.
Returns:
Parsed HeartbeatConfig with schedule, tasks, and retry settings.
"""
config = HeartbeatConfig()
heartbeat_path = Path(client_config.heartbeat_file)
# Use client YAML timezone as fallback
config.timezone = client_config.timezone
if not heartbeat_path.exists():
logger.warning(
f"HEARTBEAT file not found at {heartbeat_path} for client "
f"{client_config.client_id}. Using defaults from client YAML."
)
config.time = client_config.digest_schedule
return config
try:
content = heartbeat_path.read_text(encoding="utf-8")
config.raw_content = content
_parse_heartbeat(content, config)
logger.info(
f"Loaded HEARTBEAT for {client_config.client_id}: "
f"{config.trigger} at {config.time} {config.timezone}"
)
except Exception as e:
logger.error(f"Error parsing HEARTBEAT file {heartbeat_path}: {e}")
config.time = client_config.digest_schedule
return config
def _parse_heartbeat(content: str, config: HeartbeatConfig) -> None:
"""Parse markdown content and extract structured config."""
lines = content.split("\n")
current_section = None
task_lines = []
for line in lines:
stripped = line.strip()
# Detect section headers
if stripped.startswith("## "):
current_section = stripped[3:].strip().lower()
continue
if current_section == "schedule":
# Parse key-value pairs like "- trigger: daily"
match = re.match(r"-\s*(\w+):\s*\"?([^\"]+)\"?", stripped)
if match:
key, value = match.group(1).strip(), match.group(2).strip()
if key == "trigger":
config.trigger = value
elif key == "time":
config.time = value
elif key == "timezone":
config.timezone = value
elif current_section == "digest tasks":
# Parse numbered list items
match = re.match(r"\d+\.\s+(.*)", stripped)
if match:
config.tasks.append(match.group(1).strip())
elif current_section == "retry":
# Parse retry config
match = re.match(r"-\s*(\w+):\s*(.+)", stripped)
if match:
key, value = match.group(1).strip(), match.group(2).strip()
if "retry" in key and "after" in value:
# Extract minutes from "retry after 15 minutes"
mins = re.search(r"(\d+)", value)
if mins:
config.retry_delay_minutes = int(mins.group(1))
elif key == "max_retries":
config.max_retries = int(value)
+295
View File
@@ -0,0 +1,295 @@
"""
Clawrity — HEARTBEAT Scheduler
APScheduler AsyncIOScheduler fires digest jobs per client at configured times.
Schedule: ETL at 02:00 → RAG re-index at 03:00 → Digest + Scout at configured time.
Retry: on failure, retry after N minutes, max retries from HEARTBEAT.md.
"""
import asyncio
import json
import logging
import os
from datetime import datetime
from typing import Dict, Optional
import httpx
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.cron import CronTrigger
from agents.orchestrator import Orchestrator
from channels.protocol_adapter import NormalisedMessage
from config.client_loader import ClientConfig
from config.settings import get_settings
from heartbeat.heartbeat_loader import load_heartbeat
from skills.postgres_connector import get_connector
from soul.soul_loader import load_soul
logger = logging.getLogger(__name__)
async def run_digest(
client_config: ClientConfig,
orchestrator: Orchestrator,
retry_count: int = 0,
) -> Optional[str]:
"""
Run the daily digest for a client.
Steps:
1. Query bottom 3 branches by revenue (last 7 days)
2. Gen Agent → QA Agent pipeline for digest
3. Scout Agent for competitor/sector news
4. Push to Slack webhook
5. Log success/failure to JSONL
Returns:
Full digest text if successful, None on failure
"""
from agents.gen_agent import GenAgent
from agents.qa_agent import QAAgent
client_id = client_config.client_id
logger.info(f"[{client_id}] Running daily digest (attempt {retry_count + 1})")
db = get_connector()
try:
# Step 1: Get bottom 3 branches by revenue with ROI
bottom_sql = """
SELECT branch, country,
SUM(revenue) as total_revenue,
SUM(spend) as total_spend,
SUM(leads) as total_leads,
ROUND((SUM(revenue)/NULLIF(SUM(spend),0))::numeric, 2) as roi
FROM spend_data
WHERE client_id = %s
AND date >= CURRENT_DATE - INTERVAL '7 days'
GROUP BY branch, country
ORDER BY total_revenue ASC
LIMIT 3
"""
data = db.execute_query(bottom_sql, (client_id,))
# Step 2: Generate digest via Gen Agent with specific prompt
soul_content = load_soul(client_config)
gen_agent = GenAgent()
qa_agent = QAAgent()
# Retrieve RAG chunks for digest context
rag_chunks = None
if orchestrator.retriever:
try:
rag_chunks = orchestrator.retriever.retrieve(
query="weekly performance bottom performers budget recommendations",
client_id=client_id,
)
except Exception as e:
logger.warning(f"RAG retrieval for digest failed: {e}")
# Generate digest with explicit prompt
digest = gen_agent.generate(
question="Generate morning business digest. Highlight bottom 3 branches. Suggest where to focus budget. Newsletter style.",
soul_content=soul_content,
data_context=data,
rag_chunks=rag_chunks,
)
# Step 2b: QA pass on digest (more lenient threshold for digest)
qa_result = qa_agent.evaluate(
response=digest,
data_context=data,
threshold=0.6, # More lenient for digest
)
if not qa_result["passed"]:
logger.warning(
f"[{client_id}] Digest QA failed (score={qa_result['score']:.2f}), "
f"retrying with strict instruction"
)
# Retry digest generation with strict instruction
digest = gen_agent.generate(
question="Generate morning business digest. Highlight bottom 3 branches. Suggest where to focus budget. Newsletter style.",
soul_content=soul_content,
data_context=data,
rag_chunks=rag_chunks,
retry_issues=qa_result["issues"],
retry_count=1,
strict_data_instruction=(
"CRITICAL: Only mention branches and figures that appear in the "
"Data Context. Do not reference any other branches or historical data."
),
)
# Step 3: Scout Agent for competitor/sector news
scout_section = None
try:
from agents.scout_agent import ScoutAgent
scout = ScoutAgent()
scout_section = await scout.gather_intelligence(client_config)
except Exception as e:
logger.warning(f"Scout Agent failed: {e}")
# Step 4: Assemble full digest
full_digest = f"📊 **Clawrity Daily Digest — {client_config.client_name}**\n"
full_digest += f"*{datetime.now().strftime('%B %d, %Y')}*\n\n"
full_digest += digest
if scout_section:
full_digest += f"\n\n---\n\n{scout_section}"
# Step 5: Push to Slack webhook
webhook_url = client_config.channels.get("slack_webhook", "")
if webhook_url:
await _push_to_slack(webhook_url, full_digest)
else:
logger.warning(f"[{client_id}] No Slack webhook configured")
# Step 6: Log success to JSONL
_log_digest_event(client_id, "success", {
"qa_score": qa_result["score"],
"qa_passed": qa_result["passed"],
"scout_included": scout_section is not None,
"digest_length": len(full_digest),
})
logger.info(f"[{client_id}] Digest completed successfully")
return full_digest
except Exception as e:
logger.error(f"[{client_id}] Digest failed: {e}", exc_info=True)
_log_digest_event(client_id, "failure", {"error": str(e), "attempt": retry_count + 1})
heartbeat = load_heartbeat(client_config)
if retry_count < heartbeat.max_retries:
delay_minutes = heartbeat.retry_delay_minutes
logger.info(
f"[{client_id}] Scheduling digest retry in {delay_minutes} minutes "
f"(attempt {retry_count + 2}/{heartbeat.max_retries + 1})"
)
await asyncio.sleep(delay_minutes * 60)
return await run_digest(client_config, orchestrator, retry_count + 1)
else:
logger.error(f"[{client_id}] Digest failed after {heartbeat.max_retries + 1} attempts")
# Post failure notification to Slack
webhook_url = client_config.channels.get("slack_webhook", "")
if webhook_url:
await _push_to_slack(
webhook_url,
"Clawrity digest unavailable. Backend may be offline."
)
return None
async def _push_to_slack(webhook_url: str, message: str):
"""Push a message to a Slack incoming webhook."""
try:
async with httpx.AsyncClient() as client:
response = await client.post(
webhook_url,
json={"text": message},
timeout=30,
)
if response.status_code == 200:
logger.info("Digest pushed to Slack successfully")
else:
logger.error(f"Slack webhook returned {response.status_code}: {response.text}")
except Exception as e:
logger.error(f"Failed to push digest to Slack: {e}")
def _log_digest_event(client_id: str, status: str, details: dict):
"""Log digest event to JSONL monitoring file."""
settings = get_settings()
logs_dir = settings.logs_dir
os.makedirs(logs_dir, exist_ok=True)
log_path = os.path.join(logs_dir, f"{client_id}_digest.jsonl")
entry = {
"timestamp": datetime.utcnow().isoformat(),
"client_id": client_id,
"event": "digest",
"status": status,
**details,
}
try:
with open(log_path, "a") as f:
f.write(json.dumps(entry) + "\n")
except Exception as e:
logger.error(f"Failed to log digest event: {e}")
def start_scheduler(
client_configs: Dict[str, ClientConfig],
orchestrator: Orchestrator,
) -> AsyncIOScheduler:
"""
Start the APScheduler with digest jobs for all clients.
Schedule per client:
- Digest at configured time (from HEARTBEAT.md)
- ETL sync at 02:00 (placeholder)
- RAG re-index at 03:00 (placeholder)
"""
scheduler = AsyncIOScheduler()
for client_id, config in client_configs.items():
heartbeat = load_heartbeat(config)
# Daily digest at configured time
scheduler.add_job(
run_digest,
CronTrigger(
hour=heartbeat.hour,
minute=heartbeat.minute,
timezone=heartbeat.timezone,
),
args=[config, orchestrator],
id=f"digest_{client_id}",
name=f"Daily Digest — {config.client_name}",
replace_existing=True,
)
logger.info(
f"Scheduled digest for {client_id}: "
f"{heartbeat.time} {heartbeat.timezone}"
)
# ETL sync at 02:00 (placeholder)
scheduler.add_job(
_etl_sync_placeholder,
CronTrigger(hour=2, minute=0, timezone=heartbeat.timezone),
args=[client_id],
id=f"etl_{client_id}",
name=f"ETL Sync — {config.client_name}",
replace_existing=True,
)
# RAG re-index at 03:00 (placeholder)
scheduler.add_job(
_rag_reindex_placeholder,
CronTrigger(hour=3, minute=0, timezone=heartbeat.timezone),
args=[client_id],
id=f"rag_reindex_{client_id}",
name=f"RAG Re-index — {config.client_name}",
replace_existing=True,
)
scheduler.start()
return scheduler
async def _etl_sync_placeholder(client_id: str):
"""Placeholder for nightly ETL data sync."""
logger.info(f"[{client_id}] ETL sync triggered (placeholder)")
async def _rag_reindex_placeholder(client_id: str):
"""Placeholder for nightly RAG re-indexing."""
logger.info(f"[{client_id}] RAG re-index triggered (placeholder)")
try:
from scripts.run_rag_pipeline import run_pipeline
run_pipeline(client_id)
except Exception as e:
logger.warning(f"RAG re-index failed: {e}")