| | @@ -0,0 +1,190 @@ |
| + | #!/usr/bin/env python3 |
| + | """Hermes C4 - shadow-pivot evaluation (unattended). |
| + | |
| + | Runs on CT-REDACTED via system cron (no Claude session needed). Pulls SHADOW |
| + | predictions from hermes.db, backfills outcomes from the Kalshi API, applies a |
| + | hard liquidity filter, and scores the pivot against the pre-committed decision |
| + | gate. Writes a report + Discord ping. Idempotent; safe to re-run. |
| + | |
| + | Decision gate (from PIVOT_SPEC.md): propose a small live pilot ONLY if, on the |
| + | liquid subset with >=30 resolved: |
| + | EV/trade after fees > 0 AND Brier < 0.25 AND it holds in the |
| + | highest-volume city/market-type cell (not one lucky cluster). |
| + | Otherwise: iterate SHADOW_GAP_K / SHADOW_MIN_BRACKET_WIDTH, or retire. |
| + | NEVER enables live trading. Never writes auto_config. |
| + | """ |
| + | import sys, os, json, sqlite3, statistics, datetime, traceback |
| + | |
| + | DB = "$HERMES_HOME/hermes.db" |
| + | REPORT_DIR = "$HERMES_HOME" |
| + | MIN_PRICE = 0.10 |
| + | MIN_VOLUME = 20 |
| + | MIN_RESOLVED = 30 |
| + | BRIER_GATE = 0.25 |
| + | sys.path.insert(0, "$HERMES_HOME") |
| + | |
| + | def log(*a): |
| + | print(*a, flush=True) |
| + | |
| + | def backfill_outcome(main, ticker): |
| + | """Return 1 if YES settled, 0 if NO, None if still open/unknown.""" |
| + | try: |
| + | m = main.kalshi_get(f"/markets/{ticker}") |
| + | mk = (m or {}).get("market") or {} |
| + | res = (mk.get("result") or "").lower() |
| + | if res == "yes": |
| + | return 1 |
| + | if res == "no": |
| + | return 0 |
| + | return None |
| + | except Exception: |
| + | return None |
| + | |
| + | def main_eval(): |
| + | ts = datetime.datetime.now().strftime("%Y%m%d-%H%M") |
| + | report_path = os.path.join(REPORT_DIR, f"c4_report_{ts}.txt") |
| + | out = [] |
| + | |
| + | def emit(s=""): |
| + | out.append(s); log(s) |
| + | |
| + | emit(f"=== Hermes C4 evaluation - {datetime.datetime.now().isoformat(timespec='seconds')} ===") |
| + | |
| + | try: |
| + | import main |
| + | except Exception as e: |
| + | emit(f"FATAL: cannot import main.py: {e}") |
| + | _write_and_notify(report_path, out, None) |
| + | return |
| + | |
| + | con = sqlite3.connect(DB) |
| + | con.row_factory = sqlite3.Row |
| + | rows = con.execute( |
| + | """SELECT p.id, p.ticker, p.market_title, p.ensemble_probability ep, |
| + | p.market_price px, p.edge, p.recommendation rec, p.market_type, |
| + | p.actual_outcome, p.predicted_at, |
| + | COALESCE(mh.volume_fp, 0) vol |
| + | FROM predictions p |
| + | LEFT JOIN market_history mh ON mh.ticker = p.ticker |
| + | WHERE p.recommendation LIKE 'SHADOW%' |
| + | ORDER BY p.id""" |
| + | ).fetchall() |
| + | emit(f"shadow predictions on record: {len(rows)}") |
| + | if not rows: |
| + | emit("No shadow predictions yet - collection has not produced data. " |
| + | "Recommend: verify SHADOW_MODE scanner is running; re-check in ~1 week.") |
| + | _write_and_notify(report_path, out, "INSUFFICIENT") |
| + | con.close() |
| + | return |
| + | |
| + | liquid = [r for r in rows if (r["px"] or 0) >= MIN_PRICE and (r["vol"] or 0) >= MIN_VOLUME] |
| + | emit(f"after liquidity filter (px>=${MIN_PRICE:.2f} & vol>={MIN_VOLUME}): {len(liquid)}") |
| + | |
| + | resolved = [] |
| + | for r in liquid: |
| + | ao = r["actual_outcome"] |
| + | outcome = None |
| + | if ao in (0, 1): |
| + | outcome = int(ao) |
| + | elif isinstance(ao, str) and ao.strip().lower() in ("yes", "no"): |
| + | outcome = 1 if ao.strip().lower() == "yes" else 0 |
| + | else: |
| + | outcome = backfill_outcome(main, r["ticker"]) |
| + | if outcome is not None: |
| + | resolved.append((r, outcome)) |
| + | emit(f"resolved (liquid): {len(resolved)} / need {MIN_RESOLVED}") |
| + | |
| + | if len(resolved) < MIN_RESOLVED: |
| + | emit("") |
| + | emit(f"VERDICT: INSUFFICIENT DATA - {len(resolved)} resolved liquid predictions " |
| + | f"(< {MIN_RESOLVED}). Do NOT evaluate edge yet. Keep SHADOW_MODE running; " |
| + | f"re-run this eval in ~7 days. (Total shadow rows {len(rows)}, " |
| + | f"liquid {len(liquid)} - if liquid stays tiny, the pivot's market set " |
| + | f"may be structurally illiquid → that itself is a finding.)") |
| + | _write_and_notify(report_path, out, "INSUFFICIENT") |
| + | con.close() |
| + | return |
| + | |
| + | briers, wins, evs = [], 0, [] |
| + | cell = {} |
| + | for r, outcome in resolved: |
| + | ep = r["ep"] if r["ep"] is not None else 0.5 |
| + | briers.append((ep - outcome) ** 2) |
| + | side_yes = "YES" in (r["rec"] or "").upper() |
| + | px = r["px"] or 0.0 |
| + | try: |
| + | fee = main.kalshi_taker_fee(px) |
| + | except Exception: |
| + | fee = 0.0 |
| + | won = (outcome == 1) if side_yes else (outcome == 0) |
| + | ev = ((1.0 - px) - fee) if won else (-(px + fee)) |
| + | evs.append(ev) |
| + | wins += 1 if won else 0 |
| + | city = r["ticker"].split("-")[0] |
| + | key = (city, r["market_type"] or "?") |
| + | c = cell.setdefault(key, [0, 0, 0.0]) |
| + | c[0] += 1; c[1] += 1 if won else 0; c[2] += ev |
| + | |
| + | n = len(resolved) |
| + | brier = statistics.mean(briers) |
| + | wr = wins / n |
| + | ev_mean = statistics.mean(evs) |
| + | emit("") |
| + | emit(f"n={n} WR={wr*100:.1f}% Brier={brier:.4f} EV/trade=${ev_mean:+.3f} (after fees)") |
| + | emit("by cell (city, market_type):") |
| + | best_cell = None |
| + | for key, (cn, cw, cev) in sorted(cell.items(), key=lambda kv: -kv[1][0]): |
| + | cev_avg = cev / cn if cn else 0 |
| + | emit(f" {key[0]:14s} {key[1]:6s} n={cn:3d} WR={cw/cn*100:4.0f}% EV=${cev_avg:+.3f}") |
| + | if best_cell is None: |
| + | best_cell = (key, cn, cw / cn, cev_avg) |
| + | |
| + | cell_ok = bool(best_cell and best_cell[1] >= 10 and best_cell[3] > 0) |
| + | passed = (ev_mean > 0) and (brier < BRIER_GATE) and cell_ok |
| + | emit("") |
| + | if passed: |
| + | emit("VERDICT: GATE PASSED - pivot shows positive EV after fees, Brier " |
| + | f"< {BRIER_GATE}, and holds in the highest-volume cell " |
| + | f"{best_cell[0]} (n={best_cell[1]}, EV=${best_cell[3]:+.3f}). " |
| + | "RECOMMEND: propose a SMALL live pilot to the user. Do NOT auto-enable.") |
| + | else: |
| + | why = [] |
| + | if ev_mean <= 0: why.append(f"EV/trade ${ev_mean:+.3f} not > 0") |
| + | if brier >= BRIER_GATE: why.append(f"Brier {brier:.3f} not < {BRIER_GATE}") |
| + | if not cell_ok: why.append("does not hold in the highest-volume cell") |
| + | emit("VERDICT: GATE FAILED - " + "; ".join(why) + ". " |
| + | "RECOMMEND: iterate SHADOW_GAP_K / SHADOW_MIN_BRACKET_WIDTH, or retire. " |
| + | "Do NOT enable live trading.") |
| + | emit("") |
| + | emit("(Auto-trading untouched. This script never writes auto_config.)") |
| + | con.close() |
| + | _write_and_notify(report_path, out, "PASS" if passed else "FAIL") |
| + | |
| + | def _write_and_notify(path, lines, status): |
| + | body = "\n".join(lines) |
| + | try: |
| + | with open(path, "w") as f: |
| + | f.write(body + "\n") |
| + | except Exception: |
| + | pass |
| + | |
| + | try: |
| + | import main |
| + | hook = getattr(main, "DISCORD_WEBHOOK", None) or os.getenv("DISCORD_WEBHOOK") |
| + | if hook: |
| + | import urllib.request |
| + | tag = {"PASS": "✅", "FAIL": "❌", "INSUFFICIENT": "⏳"}.get(status, "ℹ️") |
| + | msg = f"{tag} **Hermes C4 eval** ({status})\n```\n{body[-1500:]}\n```" |
| + | req = urllib.request.Request( |
| + | hook, data=json.dumps({"content": msg}).encode(), |
| + | headers={"Content-Type": "application/json"}) |
| + | urllib.request.urlopen(req, timeout=10) |
| + | except Exception: |
| + | pass |
| + | |
| + | if __name__ == "__main__": |
| + | try: |
| + | main_eval() |
| + | except Exception: |
| + | traceback.print_exc() |
| + | sys.exit(1) |