mirror of
https://github.com/Manoj-HV30/clawrity.git
synced 2026-05-16 19:35:21 +00:00
prototype
This commit is contained in:
@@ -0,0 +1,158 @@
|
||||
"""
|
||||
Clawrity — Client Configuration Loader
|
||||
|
||||
Scans config/clients/ for YAML files and parses each into a ClientConfig model.
|
||||
Supports ${ENV_VAR} interpolation in YAML values.
|
||||
New client = new YAML file. Zero code changes.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import glob
|
||||
import logging
|
||||
from typing import Dict, List, Optional
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
from pydantic import BaseModel
|
||||
|
||||
from config.settings import get_settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pydantic models for client YAML structure
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class DataSourceConfig(BaseModel):
|
||||
type: str = "csv"
|
||||
path: str = ""
|
||||
|
||||
|
||||
class DatabaseConfig(BaseModel):
|
||||
url: str = ""
|
||||
schema_name: str = "" # 'schema' is a Pydantic reserved attr
|
||||
|
||||
|
||||
class ScoutConfig(BaseModel):
|
||||
sector: str = ""
|
||||
competitors: List[str] = []
|
||||
keywords: List[str] = []
|
||||
news_lookback_days: int = 1
|
||||
|
||||
|
||||
class ClientConfig(BaseModel):
|
||||
client_id: str
|
||||
client_name: str = ""
|
||||
|
||||
data_source: DataSourceConfig = DataSourceConfig()
|
||||
database: DatabaseConfig = DatabaseConfig()
|
||||
|
||||
countries: List[str] = []
|
||||
risk_threshold: float = 0.15
|
||||
hallucination_threshold: float = 0.75
|
||||
|
||||
digest_schedule: str = "08:00"
|
||||
timezone: str = "UTC"
|
||||
|
||||
channels: Dict[str, str] = {}
|
||||
|
||||
soul_file: str = ""
|
||||
heartbeat_file: str = ""
|
||||
|
||||
column_mapping: Dict[str, str] = {}
|
||||
|
||||
scout: ScoutConfig = ScoutConfig()
|
||||
|
||||
# Runtime: workspace/team ID → client_id mapping for ProtocolAdapter
|
||||
slack_workspace_ids: List[str] = []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Environment variable interpolation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_ENV_PATTERN = re.compile(r"\$\{(\w+)\}")
|
||||
|
||||
|
||||
def _interpolate_env(value: str) -> str:
|
||||
"""Replace ${ENV_VAR} placeholders with actual environment variable values."""
|
||||
def _replace(match):
|
||||
var_name = match.group(1)
|
||||
return os.environ.get(var_name, match.group(0))
|
||||
|
||||
if isinstance(value, str):
|
||||
return _ENV_PATTERN.sub(_replace, value)
|
||||
return value
|
||||
|
||||
|
||||
def _interpolate_dict(d: dict) -> dict:
|
||||
"""Recursively interpolate environment variables in a dictionary."""
|
||||
result = {}
|
||||
for key, value in d.items():
|
||||
if isinstance(value, dict):
|
||||
result[key] = _interpolate_dict(value)
|
||||
elif isinstance(value, list):
|
||||
result[key] = [
|
||||
_interpolate_env(v) if isinstance(v, str) else v
|
||||
for v in value
|
||||
]
|
||||
elif isinstance(value, str):
|
||||
result[key] = _interpolate_env(value)
|
||||
else:
|
||||
result[key] = value
|
||||
return result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Loader
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def load_client_configs(config_dir: Optional[str] = None) -> Dict[str, ClientConfig]:
|
||||
"""
|
||||
Load all client YAML files from the config directory.
|
||||
|
||||
Returns:
|
||||
Dict mapping client_id → ClientConfig
|
||||
"""
|
||||
if config_dir is None:
|
||||
config_dir = get_settings().clients_config_dir
|
||||
|
||||
configs: Dict[str, ClientConfig] = {}
|
||||
yaml_pattern = os.path.join(config_dir, "*.yaml")
|
||||
|
||||
for yaml_path in glob.glob(yaml_pattern):
|
||||
try:
|
||||
with open(yaml_path, "r") as f:
|
||||
raw = yaml.safe_load(f)
|
||||
|
||||
if not raw or "client_id" not in raw:
|
||||
logger.warning(f"Skipping {yaml_path}: missing client_id")
|
||||
continue
|
||||
|
||||
# Interpolate environment variables
|
||||
interpolated = _interpolate_dict(raw)
|
||||
|
||||
# Handle 'schema' → 'schema_name' mapping for Pydantic
|
||||
if "database" in interpolated and "schema" in interpolated["database"]:
|
||||
interpolated["database"]["schema_name"] = interpolated["database"].pop("schema")
|
||||
|
||||
config = ClientConfig(**interpolated)
|
||||
configs[config.client_id] = config
|
||||
logger.info(f"Loaded client config: {config.client_id} from {yaml_path}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading {yaml_path}: {e}")
|
||||
|
||||
if not configs:
|
||||
logger.warning(f"No client configs found in {config_dir}")
|
||||
|
||||
return configs
|
||||
|
||||
|
||||
def get_client_config(client_id: str, configs: Optional[Dict[str, ClientConfig]] = None) -> Optional[ClientConfig]:
|
||||
"""Get a specific client config by ID."""
|
||||
if configs is None:
|
||||
configs = load_client_configs()
|
||||
return configs.get(client_id)
|
||||
@@ -0,0 +1,36 @@
|
||||
client_id: acme_corp
|
||||
client_name: ACME Corporation
|
||||
|
||||
data_source:
|
||||
type: "csv"
|
||||
path: "data/processed/acme_merged.csv"
|
||||
|
||||
database:
|
||||
url: "${DATABASE_URL}"
|
||||
schema: "acme"
|
||||
|
||||
countries: ["US", "Canada", "MENA"]
|
||||
risk_threshold: 0.15
|
||||
hallucination_threshold: 0.75
|
||||
|
||||
digest_schedule: "08:00"
|
||||
timezone: "Asia/Kolkata"
|
||||
|
||||
channels:
|
||||
slack_webhook: "${ACME_SLACK_WEBHOOK}"
|
||||
|
||||
soul_file: "soul/acme_soul.md"
|
||||
heartbeat_file: "heartbeat/acme_heartbeat.md"
|
||||
|
||||
column_mapping:
|
||||
Order Date: date
|
||||
Country: country
|
||||
City: branch
|
||||
Sales: revenue
|
||||
Profit: profit
|
||||
|
||||
scout:
|
||||
sector: "global retail"
|
||||
competitors: ["IKEA", "Amazon", "Walmart", "Staples"]
|
||||
keywords: ["retail supply chain", "furniture market trends", "office supplies demand", "global retail ecommerce"]
|
||||
news_lookback_days: 1
|
||||
@@ -0,0 +1,76 @@
|
||||
"""
|
||||
Clawrity — LLM Client Factory
|
||||
|
||||
Provides a unified LLM client that works with both NVIDIA NIM and Groq.
|
||||
Both are OpenAI-compatible APIs, so we use the OpenAI client with different
|
||||
base URLs and API keys.
|
||||
|
||||
Auto-detects provider from settings:
|
||||
- NVIDIA NIM: base_url="https://integrate.api.nvidia.com/v1"
|
||||
- Groq: base_url="https://api.groq.com/openai/v1"
|
||||
"""
|
||||
|
||||
import logging
|
||||
from functools import lru_cache
|
||||
|
||||
from openai import OpenAI
|
||||
|
||||
from config.settings import get_settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Provider configs
|
||||
_PROVIDERS = {
|
||||
"nvidia": {
|
||||
"base_url": "https://integrate.api.nvidia.com/v1",
|
||||
"default_model": "meta/llama-3.3-70b-instruct",
|
||||
},
|
||||
"groq": {
|
||||
"base_url": "https://api.groq.com/openai/v1",
|
||||
"default_model": "llama-3.3-70b-versatile",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def get_llm_client() -> OpenAI:
|
||||
"""Get the configured LLM client (NVIDIA NIM or Groq)."""
|
||||
settings = get_settings()
|
||||
provider = settings.active_llm_provider
|
||||
|
||||
if provider == "nvidia":
|
||||
api_key = settings.nvidia_api_key
|
||||
elif provider == "groq":
|
||||
api_key = settings.groq_api_key
|
||||
else:
|
||||
raise ValueError(f"Unknown LLM provider: {provider}")
|
||||
|
||||
if not api_key:
|
||||
raise ValueError(
|
||||
f"No API key configured for LLM provider '{provider}'. "
|
||||
f"Set {'NVIDIA_API_KEY' if provider == 'nvidia' else 'GROQ_API_KEY'} in .env"
|
||||
)
|
||||
|
||||
config = _PROVIDERS[provider]
|
||||
client = OpenAI(
|
||||
api_key=api_key,
|
||||
base_url=config["base_url"],
|
||||
)
|
||||
|
||||
logger.info(f"LLM client: {provider} ({config['base_url']})")
|
||||
return client
|
||||
|
||||
|
||||
def get_model_name() -> str:
|
||||
"""Get the model name for the active provider."""
|
||||
settings = get_settings()
|
||||
provider = settings.active_llm_provider
|
||||
|
||||
# If user specified a model in settings, use it
|
||||
# Otherwise use the provider default
|
||||
model = settings.llm_model
|
||||
if model == "meta/llama-3.3-70b-instruct" and provider == "groq":
|
||||
model = _PROVIDERS["groq"]["default_model"]
|
||||
elif model == "llama-3.3-70b-versatile" and provider == "nvidia":
|
||||
model = _PROVIDERS["nvidia"]["default_model"]
|
||||
|
||||
return model
|
||||
@@ -0,0 +1,72 @@
|
||||
"""
|
||||
Clawrity — Application Settings
|
||||
|
||||
Loads environment variables via pydantic-settings.
|
||||
All secrets read from .env file — nothing is hardcoded.
|
||||
"""
|
||||
|
||||
import os
|
||||
from functools import lru_cache
|
||||
from typing import Optional
|
||||
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
"""Application settings loaded from environment variables."""
|
||||
|
||||
# --- Database ---
|
||||
database_url: str = "postgresql://user:pass@localhost:5432/clawrity"
|
||||
|
||||
# --- LLM Providers ---
|
||||
groq_api_key: str = ""
|
||||
nvidia_api_key: str = ""
|
||||
|
||||
# --- Slack (Socket Mode) ---
|
||||
# Bot Token (xoxb-...) — OAuth & Permissions → Install to Workspace
|
||||
slack_bot_token: str = ""
|
||||
# App-Level Token (xapp-...) — Socket Mode → Generate Token
|
||||
slack_app_token: str = ""
|
||||
# Signing Secret — Basic Information → App Credentials
|
||||
slack_signing_secret: str = ""
|
||||
|
||||
# --- Tavily Web Search ---
|
||||
tavily_api_key: str = ""
|
||||
|
||||
# --- Slack Webhook for digest delivery ---
|
||||
acme_slack_webhook: str = ""
|
||||
|
||||
# --- Paths ---
|
||||
data_raw_dir: str = "data/raw"
|
||||
data_processed_dir: str = "data/processed"
|
||||
logs_dir: str = "logs"
|
||||
clients_config_dir: str = "config/clients"
|
||||
|
||||
# --- Model Defaults ---
|
||||
llm_model: str = "meta/llama-3.3-70b-instruct"
|
||||
llm_provider: str = "" # auto-detected: "nvidia" or "groq"
|
||||
embedding_model: str = "all-MiniLM-L6-v2"
|
||||
embedding_dim: int = 384
|
||||
|
||||
@property
|
||||
def active_llm_provider(self) -> str:
|
||||
"""Auto-detect which LLM provider to use based on available keys."""
|
||||
if self.llm_provider:
|
||||
return self.llm_provider
|
||||
if self.nvidia_api_key:
|
||||
return "nvidia"
|
||||
if self.groq_api_key:
|
||||
return "groq"
|
||||
return "nvidia" # default
|
||||
|
||||
model_config = {
|
||||
"env_file": ".env",
|
||||
"env_file_encoding": "utf-8",
|
||||
"case_sensitive": False,
|
||||
}
|
||||
|
||||
|
||||
@lru_cache()
|
||||
def get_settings() -> Settings:
|
||||
"""Singleton settings instance. Cached after first call."""
|
||||
return Settings()
|
||||
Reference in New Issue
Block a user