| 1 | """Hermes C4 - shadow-pivot evaluation (unattended). |
| 2 | |
| 3 | Runs on CT-REDACTED via system cron (no Claude session needed). Pulls SHADOW |
| 4 | predictions from hermes.db, backfills outcomes from the Kalshi API, applies a |
| 5 | hard liquidity filter, and scores the pivot against the pre-committed decision |
| 6 | gate. Writes a report + Discord ping. Idempotent; safe to re-run. |
| 7 | |
| 8 | Decision gate (from PIVOT_SPEC.md): propose a small live pilot ONLY if, on the |
| 9 | liquid subset with >=30 resolved: |
| 10 | EV/trade after fees > 0 AND Brier < 0.25 AND it holds in the |
| 11 | highest-volume city/market-type cell (not one lucky cluster). |
| 12 | Otherwise: iterate SHADOW_GAP_K / SHADOW_MIN_BRACKET_WIDTH, or retire. |
| 13 | NEVER enables live trading. Never writes auto_config. |
| 14 | """ |
| 15 | import sys, os, json, sqlite3, statistics, datetime, traceback |
| 16 | |
| 17 | DB = "$HERMES_HOME/hermes.db" |
| 18 | REPORT_DIR = "$HERMES_HOME" |
| 19 | MIN_PRICE = 0.10 |
| 20 | MIN_VOLUME = 20 |
| 21 | MIN_RESOLVED = 30 |
| 22 | BRIER_GATE = 0.25 |
| 23 | sys.path.insert(0, "$HERMES_HOME") |
| 24 | |
| 25 | def log(*a): |
| 26 | print(*a, flush=True) |
| 27 | |
| 28 | def backfill_outcome(main, ticker): |
| 29 | """Return 1 if YES settled, 0 if NO, None if still open/unknown.""" |
| 30 | try: |
| 31 | m = main.kalshi_get(f"/markets/{ticker}") |
| 32 | mk = (m or {}).get("market") or {} |
| 33 | res = (mk.get("result") or "").lower() |
| 34 | if res == "yes": |
| 35 | return 1 |
| 36 | if res == "no": |
| 37 | return 0 |
| 38 | return None |
| 39 | except Exception: |
| 40 | return None |
| 41 | |
| 42 | def main_eval(): |
| 43 | ts = datetime.datetime.now().strftime("%Y%m%d-%H%M") |
| 44 | report_path = os.path.join(REPORT_DIR, f"c4_report_{ts}.txt") |
| 45 | out = [] |
| 46 | |
| 47 | def emit(s=""): |
| 48 | out.append(s); log(s) |
| 49 | |
| 50 | emit(f"=== Hermes C4 evaluation - {datetime.datetime.now().isoformat(timespec='seconds')} ===") |
| 51 | |
| 52 | try: |
| 53 | import main |
| 54 | except Exception as e: |
| 55 | emit(f"FATAL: cannot import main.py: {e}") |
| 56 | _write_and_notify(report_path, out, None) |
| 57 | return |
| 58 | |
| 59 | con = sqlite3.connect(DB) |
| 60 | con.row_factory = sqlite3.Row |
| 61 | rows = con.execute( |
| 62 | """SELECT p.id, p.ticker, p.market_title, p.ensemble_probability ep, |
| 63 | p.market_price px, p.edge, p.recommendation rec, p.market_type, |
| 64 | p.actual_outcome, p.predicted_at, |
| 65 | COALESCE(mh.volume_fp, 0) vol |
| 66 | FROM predictions p |
| 67 | LEFT JOIN market_history mh ON mh.ticker = p.ticker |
| 68 | WHERE p.recommendation LIKE 'SHADOW%' |
| 69 | ORDER BY p.id""" |
| 70 | ).fetchall() |
| 71 | emit(f"shadow predictions on record: {len(rows)}") |
| 72 | if not rows: |
| 73 | emit("No shadow predictions yet - collection has not produced data. " |
| 74 | "Recommend: verify SHADOW_MODE scanner is running; re-check in ~1 week.") |
| 75 | _write_and_notify(report_path, out, "INSUFFICIENT") |
| 76 | con.close() |
| 77 | return |
| 78 | |
| 79 | liquid = [r for r in rows if (r["px"] or 0) >= MIN_PRICE and (r["vol"] or 0) >= MIN_VOLUME] |
| 80 | emit(f"after liquidity filter (px>=${MIN_PRICE:.2f} & vol>={MIN_VOLUME}): {len(liquid)}") |
| 81 | |
| 82 | resolved = [] |
| 83 | for r in liquid: |
| 84 | ao = r["actual_outcome"] |
| 85 | outcome = None |
| 86 | if ao in (0, 1): |
| 87 | outcome = int(ao) |
| 88 | elif isinstance(ao, str) and ao.strip().lower() in ("yes", "no"): |
| 89 | outcome = 1 if ao.strip().lower() == "yes" else 0 |
| 90 | else: |
| 91 | outcome = backfill_outcome(main, r["ticker"]) |
| 92 | if outcome is not None: |
| 93 | resolved.append((r, outcome)) |
| 94 | emit(f"resolved (liquid): {len(resolved)} / need {MIN_RESOLVED}") |
| 95 | |
| 96 | if len(resolved) < MIN_RESOLVED: |
| 97 | emit("") |
| 98 | emit(f"VERDICT: INSUFFICIENT DATA - {len(resolved)} resolved liquid predictions " |
| 99 | f"(< {MIN_RESOLVED}). Do NOT evaluate edge yet. Keep SHADOW_MODE running; " |
| 100 | f"re-run this eval in ~7 days. (Total shadow rows {len(rows)}, " |
| 101 | f"liquid {len(liquid)} - if liquid stays tiny, the pivot's market set " |
| 102 | f"may be structurally illiquid → that itself is a finding.)") |
| 103 | _write_and_notify(report_path, out, "INSUFFICIENT") |
| 104 | con.close() |
| 105 | return |
| 106 | |
| 107 | briers, wins, evs = [], 0, [] |
| 108 | cell = {} |
| 109 | for r, outcome in resolved: |
| 110 | ep = r["ep"] if r["ep"] is not None else 0.5 |
| 111 | briers.append((ep - outcome) ** 2) |
| 112 | side_yes = "YES" in (r["rec"] or "").upper() |
| 113 | px = r["px"] or 0.0 |
| 114 | try: |
| 115 | fee = main.kalshi_taker_fee(px) |
| 116 | except Exception: |
| 117 | fee = 0.0 |
| 118 | won = (outcome == 1) if side_yes else (outcome == 0) |
| 119 | ev = ((1.0 - px) - fee) if won else (-(px + fee)) |
| 120 | evs.append(ev) |
| 121 | wins += 1 if won else 0 |
| 122 | city = r["ticker"].split("-")[0] |
| 123 | key = (city, r["market_type"] or "?") |
| 124 | c = cell.setdefault(key, [0, 0, 0.0]) |
| 125 | c[0] += 1; c[1] += 1 if won else 0; c[2] += ev |
| 126 | |
| 127 | n = len(resolved) |
| 128 | brier = statistics.mean(briers) |
| 129 | wr = wins / n |
| 130 | ev_mean = statistics.mean(evs) |
| 131 | emit("") |
| 132 | emit(f"n={n} WR={wr*100:.1f}% Brier={brier:.4f} EV/trade=${ev_mean:+.3f} (after fees)") |
| 133 | emit("by cell (city, market_type):") |
| 134 | best_cell = None |
| 135 | for key, (cn, cw, cev) in sorted(cell.items(), key=lambda kv: -kv[1][0]): |
| 136 | cev_avg = cev / cn if cn else 0 |
| 137 | emit(f" {key[0]:14s} {key[1]:6s} n={cn:3d} WR={cw/cn*100:4.0f}% EV=${cev_avg:+.3f}") |
| 138 | if best_cell is None: |
| 139 | best_cell = (key, cn, cw / cn, cev_avg) |
| 140 | |
| 141 | cell_ok = bool(best_cell and best_cell[1] >= 10 and best_cell[3] > 0) |
| 142 | passed = (ev_mean > 0) and (brier < BRIER_GATE) and cell_ok |
| 143 | emit("") |
| 144 | if passed: |
| 145 | emit("VERDICT: GATE PASSED - pivot shows positive EV after fees, Brier " |
| 146 | f"< {BRIER_GATE}, and holds in the highest-volume cell " |
| 147 | f"{best_cell[0]} (n={best_cell[1]}, EV=${best_cell[3]:+.3f}). " |
| 148 | "RECOMMEND: propose a SMALL live pilot to the user. Do NOT auto-enable.") |
| 149 | else: |
| 150 | why = [] |
| 151 | if ev_mean <= 0: why.append(f"EV/trade ${ev_mean:+.3f} not > 0") |
| 152 | if brier >= BRIER_GATE: why.append(f"Brier {brier:.3f} not < {BRIER_GATE}") |
| 153 | if not cell_ok: why.append("does not hold in the highest-volume cell") |
| 154 | emit("VERDICT: GATE FAILED - " + "; ".join(why) + ". " |
| 155 | "RECOMMEND: iterate SHADOW_GAP_K / SHADOW_MIN_BRACKET_WIDTH, or retire. " |
| 156 | "Do NOT enable live trading.") |
| 157 | emit("") |
| 158 | emit("(Auto-trading untouched. This script never writes auto_config.)") |
| 159 | con.close() |
| 160 | _write_and_notify(report_path, out, "PASS" if passed else "FAIL") |
| 161 | |
| 162 | def _write_and_notify(path, lines, status): |
| 163 | body = "\n".join(lines) |
| 164 | try: |
| 165 | with open(path, "w") as f: |
| 166 | f.write(body + "\n") |
| 167 | except Exception: |
| 168 | pass |
| 169 | |
| 170 | try: |
| 171 | import main |
| 172 | hook = getattr(main, "DISCORD_WEBHOOK", None) or os.getenv("DISCORD_WEBHOOK") |
| 173 | if hook: |
| 174 | import urllib.request |
| 175 | tag = {"PASS": "✅", "FAIL": "❌", "INSUFFICIENT": "⏳"}.get(status, "ℹ️") |
| 176 | msg = f"{tag} **Hermes C4 eval** ({status})\n```\n{body[-1500:]}\n```" |
| 177 | req = urllib.request.Request( |
| 178 | hook, data=json.dumps({"content": msg}).encode(), |
| 179 | headers={"Content-Type": "application/json"}) |
| 180 | urllib.request.urlopen(req, timeout=10) |
| 181 | except Exception: |
| 182 | pass |
| 183 | |
| 184 | if __name__ == "__main__": |
| 185 | try: |
| 186 | main_eval() |
| 187 | except Exception: |
| 188 | traceback.print_exc() |
| 189 | sys.exit(1) |