mirror of
https://github.com/Manoj-HV30/clawrity.git
synced 2026-05-16 19:35:21 +00:00
prototype
This commit is contained in:
@@ -0,0 +1,18 @@
|
||||
# HEARTBEAT — ACME Corporation
|
||||
|
||||
## Schedule
|
||||
- trigger: daily
|
||||
- time: "08:00"
|
||||
- timezone: "Asia/Kolkata"
|
||||
|
||||
## Digest Tasks
|
||||
1. Pull last 7 days spend + revenue per branch
|
||||
2. Identify bottom 3 performing branches by revenue
|
||||
3. Generate newsletter-style summary via Gen Agent → QA Agent
|
||||
4. Run Scout Agent for competitor + sector news
|
||||
5. Append Market Intelligence section to digest
|
||||
6. Push complete digest to Slack channel
|
||||
|
||||
## Retry
|
||||
- on_failure: retry after 15 minutes
|
||||
- max_retries: 3
|
||||
@@ -0,0 +1,124 @@
|
||||
"""
|
||||
Clawrity — HEARTBEAT Loader
|
||||
|
||||
Parses HEARTBEAT.md files to extract schedule, digest tasks, and retry config.
|
||||
HEARTBEAT.md drives autonomous daily digest generation per client.
|
||||
"""
|
||||
|
||||
import re
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict, Any
|
||||
|
||||
from config.client_loader import ClientConfig
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class HeartbeatConfig:
|
||||
"""Parsed heartbeat configuration."""
|
||||
|
||||
def __init__(self):
|
||||
self.trigger: str = "daily"
|
||||
self.time: str = "08:00"
|
||||
self.timezone: str = "UTC"
|
||||
self.retry_delay_minutes: int = 15
|
||||
self.max_retries: int = 3
|
||||
self.tasks: list = []
|
||||
self.raw_content: str = ""
|
||||
|
||||
@property
|
||||
def hour(self) -> int:
|
||||
"""Extract hour from time string."""
|
||||
return int(self.time.split(":")[0])
|
||||
|
||||
@property
|
||||
def minute(self) -> int:
|
||||
"""Extract minute from time string."""
|
||||
return int(self.time.split(":")[1])
|
||||
|
||||
|
||||
def load_heartbeat(client_config: ClientConfig) -> HeartbeatConfig:
|
||||
"""
|
||||
Load and parse the HEARTBEAT.md file for a client.
|
||||
|
||||
Args:
|
||||
client_config: The client's configuration containing heartbeat_file path.
|
||||
|
||||
Returns:
|
||||
Parsed HeartbeatConfig with schedule, tasks, and retry settings.
|
||||
"""
|
||||
config = HeartbeatConfig()
|
||||
heartbeat_path = Path(client_config.heartbeat_file)
|
||||
|
||||
# Use client YAML timezone as fallback
|
||||
config.timezone = client_config.timezone
|
||||
|
||||
if not heartbeat_path.exists():
|
||||
logger.warning(
|
||||
f"HEARTBEAT file not found at {heartbeat_path} for client "
|
||||
f"{client_config.client_id}. Using defaults from client YAML."
|
||||
)
|
||||
config.time = client_config.digest_schedule
|
||||
return config
|
||||
|
||||
try:
|
||||
content = heartbeat_path.read_text(encoding="utf-8")
|
||||
config.raw_content = content
|
||||
_parse_heartbeat(content, config)
|
||||
logger.info(
|
||||
f"Loaded HEARTBEAT for {client_config.client_id}: "
|
||||
f"{config.trigger} at {config.time} {config.timezone}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing HEARTBEAT file {heartbeat_path}: {e}")
|
||||
config.time = client_config.digest_schedule
|
||||
|
||||
return config
|
||||
|
||||
|
||||
def _parse_heartbeat(content: str, config: HeartbeatConfig) -> None:
|
||||
"""Parse markdown content and extract structured config."""
|
||||
lines = content.split("\n")
|
||||
|
||||
current_section = None
|
||||
task_lines = []
|
||||
|
||||
for line in lines:
|
||||
stripped = line.strip()
|
||||
|
||||
# Detect section headers
|
||||
if stripped.startswith("## "):
|
||||
current_section = stripped[3:].strip().lower()
|
||||
continue
|
||||
|
||||
if current_section == "schedule":
|
||||
# Parse key-value pairs like "- trigger: daily"
|
||||
match = re.match(r"-\s*(\w+):\s*\"?([^\"]+)\"?", stripped)
|
||||
if match:
|
||||
key, value = match.group(1).strip(), match.group(2).strip()
|
||||
if key == "trigger":
|
||||
config.trigger = value
|
||||
elif key == "time":
|
||||
config.time = value
|
||||
elif key == "timezone":
|
||||
config.timezone = value
|
||||
|
||||
elif current_section == "digest tasks":
|
||||
# Parse numbered list items
|
||||
match = re.match(r"\d+\.\s+(.*)", stripped)
|
||||
if match:
|
||||
config.tasks.append(match.group(1).strip())
|
||||
|
||||
elif current_section == "retry":
|
||||
# Parse retry config
|
||||
match = re.match(r"-\s*(\w+):\s*(.+)", stripped)
|
||||
if match:
|
||||
key, value = match.group(1).strip(), match.group(2).strip()
|
||||
if "retry" in key and "after" in value:
|
||||
# Extract minutes from "retry after 15 minutes"
|
||||
mins = re.search(r"(\d+)", value)
|
||||
if mins:
|
||||
config.retry_delay_minutes = int(mins.group(1))
|
||||
elif key == "max_retries":
|
||||
config.max_retries = int(value)
|
||||
@@ -0,0 +1,295 @@
|
||||
"""
|
||||
Clawrity — HEARTBEAT Scheduler
|
||||
|
||||
APScheduler AsyncIOScheduler fires digest jobs per client at configured times.
|
||||
Schedule: ETL at 02:00 → RAG re-index at 03:00 → Digest + Scout at configured time.
|
||||
Retry: on failure, retry after N minutes, max retries from HEARTBEAT.md.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime
|
||||
from typing import Dict, Optional
|
||||
|
||||
import httpx
|
||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||
from apscheduler.triggers.cron import CronTrigger
|
||||
|
||||
from agents.orchestrator import Orchestrator
|
||||
from channels.protocol_adapter import NormalisedMessage
|
||||
from config.client_loader import ClientConfig
|
||||
from config.settings import get_settings
|
||||
from heartbeat.heartbeat_loader import load_heartbeat
|
||||
from skills.postgres_connector import get_connector
|
||||
from soul.soul_loader import load_soul
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def run_digest(
|
||||
client_config: ClientConfig,
|
||||
orchestrator: Orchestrator,
|
||||
retry_count: int = 0,
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Run the daily digest for a client.
|
||||
|
||||
Steps:
|
||||
1. Query bottom 3 branches by revenue (last 7 days)
|
||||
2. Gen Agent → QA Agent pipeline for digest
|
||||
3. Scout Agent for competitor/sector news
|
||||
4. Push to Slack webhook
|
||||
5. Log success/failure to JSONL
|
||||
|
||||
Returns:
|
||||
Full digest text if successful, None on failure
|
||||
"""
|
||||
from agents.gen_agent import GenAgent
|
||||
from agents.qa_agent import QAAgent
|
||||
|
||||
client_id = client_config.client_id
|
||||
logger.info(f"[{client_id}] Running daily digest (attempt {retry_count + 1})")
|
||||
|
||||
db = get_connector()
|
||||
|
||||
try:
|
||||
# Step 1: Get bottom 3 branches by revenue with ROI
|
||||
bottom_sql = """
|
||||
SELECT branch, country,
|
||||
SUM(revenue) as total_revenue,
|
||||
SUM(spend) as total_spend,
|
||||
SUM(leads) as total_leads,
|
||||
ROUND((SUM(revenue)/NULLIF(SUM(spend),0))::numeric, 2) as roi
|
||||
FROM spend_data
|
||||
WHERE client_id = %s
|
||||
AND date >= CURRENT_DATE - INTERVAL '7 days'
|
||||
GROUP BY branch, country
|
||||
ORDER BY total_revenue ASC
|
||||
LIMIT 3
|
||||
"""
|
||||
data = db.execute_query(bottom_sql, (client_id,))
|
||||
|
||||
# Step 2: Generate digest via Gen Agent with specific prompt
|
||||
soul_content = load_soul(client_config)
|
||||
gen_agent = GenAgent()
|
||||
qa_agent = QAAgent()
|
||||
|
||||
# Retrieve RAG chunks for digest context
|
||||
rag_chunks = None
|
||||
if orchestrator.retriever:
|
||||
try:
|
||||
rag_chunks = orchestrator.retriever.retrieve(
|
||||
query="weekly performance bottom performers budget recommendations",
|
||||
client_id=client_id,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"RAG retrieval for digest failed: {e}")
|
||||
|
||||
# Generate digest with explicit prompt
|
||||
digest = gen_agent.generate(
|
||||
question="Generate morning business digest. Highlight bottom 3 branches. Suggest where to focus budget. Newsletter style.",
|
||||
soul_content=soul_content,
|
||||
data_context=data,
|
||||
rag_chunks=rag_chunks,
|
||||
)
|
||||
|
||||
# Step 2b: QA pass on digest (more lenient threshold for digest)
|
||||
qa_result = qa_agent.evaluate(
|
||||
response=digest,
|
||||
data_context=data,
|
||||
threshold=0.6, # More lenient for digest
|
||||
)
|
||||
|
||||
if not qa_result["passed"]:
|
||||
logger.warning(
|
||||
f"[{client_id}] Digest QA failed (score={qa_result['score']:.2f}), "
|
||||
f"retrying with strict instruction"
|
||||
)
|
||||
# Retry digest generation with strict instruction
|
||||
digest = gen_agent.generate(
|
||||
question="Generate morning business digest. Highlight bottom 3 branches. Suggest where to focus budget. Newsletter style.",
|
||||
soul_content=soul_content,
|
||||
data_context=data,
|
||||
rag_chunks=rag_chunks,
|
||||
retry_issues=qa_result["issues"],
|
||||
retry_count=1,
|
||||
strict_data_instruction=(
|
||||
"CRITICAL: Only mention branches and figures that appear in the "
|
||||
"Data Context. Do not reference any other branches or historical data."
|
||||
),
|
||||
)
|
||||
|
||||
# Step 3: Scout Agent for competitor/sector news
|
||||
scout_section = None
|
||||
try:
|
||||
from agents.scout_agent import ScoutAgent
|
||||
scout = ScoutAgent()
|
||||
scout_section = await scout.gather_intelligence(client_config)
|
||||
except Exception as e:
|
||||
logger.warning(f"Scout Agent failed: {e}")
|
||||
|
||||
# Step 4: Assemble full digest
|
||||
full_digest = f"📊 **Clawrity Daily Digest — {client_config.client_name}**\n"
|
||||
full_digest += f"*{datetime.now().strftime('%B %d, %Y')}*\n\n"
|
||||
full_digest += digest
|
||||
|
||||
if scout_section:
|
||||
full_digest += f"\n\n---\n\n{scout_section}"
|
||||
|
||||
# Step 5: Push to Slack webhook
|
||||
webhook_url = client_config.channels.get("slack_webhook", "")
|
||||
if webhook_url:
|
||||
await _push_to_slack(webhook_url, full_digest)
|
||||
else:
|
||||
logger.warning(f"[{client_id}] No Slack webhook configured")
|
||||
|
||||
# Step 6: Log success to JSONL
|
||||
_log_digest_event(client_id, "success", {
|
||||
"qa_score": qa_result["score"],
|
||||
"qa_passed": qa_result["passed"],
|
||||
"scout_included": scout_section is not None,
|
||||
"digest_length": len(full_digest),
|
||||
})
|
||||
|
||||
logger.info(f"[{client_id}] Digest completed successfully")
|
||||
return full_digest
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[{client_id}] Digest failed: {e}", exc_info=True)
|
||||
_log_digest_event(client_id, "failure", {"error": str(e), "attempt": retry_count + 1})
|
||||
|
||||
heartbeat = load_heartbeat(client_config)
|
||||
|
||||
if retry_count < heartbeat.max_retries:
|
||||
delay_minutes = heartbeat.retry_delay_minutes
|
||||
logger.info(
|
||||
f"[{client_id}] Scheduling digest retry in {delay_minutes} minutes "
|
||||
f"(attempt {retry_count + 2}/{heartbeat.max_retries + 1})"
|
||||
)
|
||||
await asyncio.sleep(delay_minutes * 60)
|
||||
return await run_digest(client_config, orchestrator, retry_count + 1)
|
||||
else:
|
||||
logger.error(f"[{client_id}] Digest failed after {heartbeat.max_retries + 1} attempts")
|
||||
# Post failure notification to Slack
|
||||
webhook_url = client_config.channels.get("slack_webhook", "")
|
||||
if webhook_url:
|
||||
await _push_to_slack(
|
||||
webhook_url,
|
||||
"Clawrity digest unavailable. Backend may be offline."
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
async def _push_to_slack(webhook_url: str, message: str):
|
||||
"""Push a message to a Slack incoming webhook."""
|
||||
try:
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(
|
||||
webhook_url,
|
||||
json={"text": message},
|
||||
timeout=30,
|
||||
)
|
||||
if response.status_code == 200:
|
||||
logger.info("Digest pushed to Slack successfully")
|
||||
else:
|
||||
logger.error(f"Slack webhook returned {response.status_code}: {response.text}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to push digest to Slack: {e}")
|
||||
|
||||
|
||||
def _log_digest_event(client_id: str, status: str, details: dict):
|
||||
"""Log digest event to JSONL monitoring file."""
|
||||
settings = get_settings()
|
||||
logs_dir = settings.logs_dir
|
||||
os.makedirs(logs_dir, exist_ok=True)
|
||||
log_path = os.path.join(logs_dir, f"{client_id}_digest.jsonl")
|
||||
|
||||
entry = {
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"client_id": client_id,
|
||||
"event": "digest",
|
||||
"status": status,
|
||||
**details,
|
||||
}
|
||||
|
||||
try:
|
||||
with open(log_path, "a") as f:
|
||||
f.write(json.dumps(entry) + "\n")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to log digest event: {e}")
|
||||
|
||||
|
||||
def start_scheduler(
|
||||
client_configs: Dict[str, ClientConfig],
|
||||
orchestrator: Orchestrator,
|
||||
) -> AsyncIOScheduler:
|
||||
"""
|
||||
Start the APScheduler with digest jobs for all clients.
|
||||
|
||||
Schedule per client:
|
||||
- Digest at configured time (from HEARTBEAT.md)
|
||||
- ETL sync at 02:00 (placeholder)
|
||||
- RAG re-index at 03:00 (placeholder)
|
||||
"""
|
||||
scheduler = AsyncIOScheduler()
|
||||
|
||||
for client_id, config in client_configs.items():
|
||||
heartbeat = load_heartbeat(config)
|
||||
|
||||
# Daily digest at configured time
|
||||
scheduler.add_job(
|
||||
run_digest,
|
||||
CronTrigger(
|
||||
hour=heartbeat.hour,
|
||||
minute=heartbeat.minute,
|
||||
timezone=heartbeat.timezone,
|
||||
),
|
||||
args=[config, orchestrator],
|
||||
id=f"digest_{client_id}",
|
||||
name=f"Daily Digest — {config.client_name}",
|
||||
replace_existing=True,
|
||||
)
|
||||
logger.info(
|
||||
f"Scheduled digest for {client_id}: "
|
||||
f"{heartbeat.time} {heartbeat.timezone}"
|
||||
)
|
||||
|
||||
# ETL sync at 02:00 (placeholder)
|
||||
scheduler.add_job(
|
||||
_etl_sync_placeholder,
|
||||
CronTrigger(hour=2, minute=0, timezone=heartbeat.timezone),
|
||||
args=[client_id],
|
||||
id=f"etl_{client_id}",
|
||||
name=f"ETL Sync — {config.client_name}",
|
||||
replace_existing=True,
|
||||
)
|
||||
|
||||
# RAG re-index at 03:00 (placeholder)
|
||||
scheduler.add_job(
|
||||
_rag_reindex_placeholder,
|
||||
CronTrigger(hour=3, minute=0, timezone=heartbeat.timezone),
|
||||
args=[client_id],
|
||||
id=f"rag_reindex_{client_id}",
|
||||
name=f"RAG Re-index — {config.client_name}",
|
||||
replace_existing=True,
|
||||
)
|
||||
|
||||
scheduler.start()
|
||||
return scheduler
|
||||
|
||||
|
||||
async def _etl_sync_placeholder(client_id: str):
|
||||
"""Placeholder for nightly ETL data sync."""
|
||||
logger.info(f"[{client_id}] ETL sync triggered (placeholder)")
|
||||
|
||||
|
||||
async def _rag_reindex_placeholder(client_id: str):
|
||||
"""Placeholder for nightly RAG re-indexing."""
|
||||
logger.info(f"[{client_id}] RAG re-index triggered (placeholder)")
|
||||
try:
|
||||
from scripts.run_rag_pipeline import run_pipeline
|
||||
run_pipeline(client_id)
|
||||
except Exception as e:
|
||||
logger.warning(f"RAG re-index failed: {e}")
|
||||
Reference in New Issue
Block a user