Zion Boggan zionboggan.com ↗
189 lines · python
History for this file →
1
"""Hermes C4 - shadow-pivot evaluation (unattended).
2
 
3
Runs on CT-REDACTED via system cron (no Claude session needed). Pulls SHADOW
4
predictions from hermes.db, backfills outcomes from the Kalshi API, applies a
5
hard liquidity filter, and scores the pivot against the pre-committed decision
6
gate. Writes a report + Discord ping. Idempotent; safe to re-run.
7
 
8
Decision gate (from PIVOT_SPEC.md): propose a small live pilot ONLY if, on the
9
liquid subset with >=30 resolved:
10
    EV/trade after fees > 0  AND  Brier < 0.25  AND  it holds in the
11
    highest-volume city/market-type cell (not one lucky cluster).
12
Otherwise: iterate SHADOW_GAP_K / SHADOW_MIN_BRACKET_WIDTH, or retire.
13
NEVER enables live trading. Never writes auto_config.
14
"""
15
import sys, os, json, sqlite3, statistics, datetime, traceback
16
 
17
DB = "$HERMES_HOME/hermes.db"
18
REPORT_DIR = "$HERMES_HOME"
19
MIN_PRICE = 0.10
20
MIN_VOLUME = 20
21
MIN_RESOLVED = 30
22
BRIER_GATE = 0.25
23
sys.path.insert(0, "$HERMES_HOME")
24
 
25
def log(*a):
26
    print(*a, flush=True)
27
 
28
def backfill_outcome(main, ticker):
29
    """Return 1 if YES settled, 0 if NO, None if still open/unknown."""
30
    try:
31
        m = main.kalshi_get(f"/markets/{ticker}")
32
        mk = (m or {}).get("market") or {}
33
        res = (mk.get("result") or "").lower()
34
        if res == "yes":
35
            return 1
36
        if res == "no":
37
            return 0
38
        return None
39
    except Exception:
40
        return None
41
 
42
def main_eval():
43
    ts = datetime.datetime.now().strftime("%Y%m%d-%H%M")
44
    report_path = os.path.join(REPORT_DIR, f"c4_report_{ts}.txt")
45
    out = []
46
 
47
    def emit(s=""):
48
        out.append(s); log(s)
49
 
50
    emit(f"=== Hermes C4 evaluation - {datetime.datetime.now().isoformat(timespec='seconds')} ===")
51
 
52
    try:
53
        import main
54
    except Exception as e:
55
        emit(f"FATAL: cannot import main.py: {e}")
56
        _write_and_notify(report_path, out, None)
57
        return
58
 
59
    con = sqlite3.connect(DB)
60
    con.row_factory = sqlite3.Row
61
    rows = con.execute(
62
        """SELECT p.id, p.ticker, p.market_title, p.ensemble_probability ep,
63
                  p.market_price px, p.edge, p.recommendation rec, p.market_type,
64
                  p.actual_outcome, p.predicted_at,
65
                  COALESCE(mh.volume_fp, 0) vol
66
           FROM predictions p
67
           LEFT JOIN market_history mh ON mh.ticker = p.ticker
68
           WHERE p.recommendation LIKE 'SHADOW%'
69
           ORDER BY p.id"""
70
    ).fetchall()
71
    emit(f"shadow predictions on record: {len(rows)}")
72
    if not rows:
73
        emit("No shadow predictions yet - collection has not produced data. "
74
             "Recommend: verify SHADOW_MODE scanner is running; re-check in ~1 week.")
75
        _write_and_notify(report_path, out, "INSUFFICIENT")
76
        con.close()
77
        return
78
 
79
    liquid = [r for r in rows if (r["px"] or 0) >= MIN_PRICE and (r["vol"] or 0) >= MIN_VOLUME]
80
    emit(f"after liquidity filter (px>=${MIN_PRICE:.2f} & vol>={MIN_VOLUME}): {len(liquid)}")
81
 
82
    resolved = []
83
    for r in liquid:
84
        ao = r["actual_outcome"]
85
        outcome = None
86
        if ao in (0, 1):
87
            outcome = int(ao)
88
        elif isinstance(ao, str) and ao.strip().lower() in ("yes", "no"):
89
            outcome = 1 if ao.strip().lower() == "yes" else 0
90
        else:
91
            outcome = backfill_outcome(main, r["ticker"])
92
        if outcome is not None:
93
            resolved.append((r, outcome))
94
    emit(f"resolved (liquid): {len(resolved)} / need {MIN_RESOLVED}")
95
 
96
    if len(resolved) < MIN_RESOLVED:
97
        emit("")
98
        emit(f"VERDICT: INSUFFICIENT DATA - {len(resolved)} resolved liquid predictions "
99
             f"(< {MIN_RESOLVED}). Do NOT evaluate edge yet. Keep SHADOW_MODE running; "
100
             f"re-run this eval in ~7 days. (Total shadow rows {len(rows)}, "
101
             f"liquid {len(liquid)} - if liquid stays tiny, the pivot's market set "
102
             f"may be structurally illiquid → that itself is a finding.)")
103
        _write_and_notify(report_path, out, "INSUFFICIENT")
104
        con.close()
105
        return
106
 
107
    briers, wins, evs = [], 0, []
108
    cell = {}
109
    for r, outcome in resolved:
110
        ep = r["ep"] if r["ep"] is not None else 0.5
111
        briers.append((ep - outcome) ** 2)
112
        side_yes = "YES" in (r["rec"] or "").upper()
113
        px = r["px"] or 0.0
114
        try:
115
            fee = main.kalshi_taker_fee(px)
116
        except Exception:
117
            fee = 0.0
118
        won = (outcome == 1) if side_yes else (outcome == 0)
119
        ev = ((1.0 - px) - fee) if won else (-(px + fee))
120
        evs.append(ev)
121
        wins += 1 if won else 0
122
        city = r["ticker"].split("-")[0]
123
        key = (city, r["market_type"] or "?")
124
        c = cell.setdefault(key, [0, 0, 0.0])
125
        c[0] += 1; c[1] += 1 if won else 0; c[2] += ev
126
 
127
    n = len(resolved)
128
    brier = statistics.mean(briers)
129
    wr = wins / n
130
    ev_mean = statistics.mean(evs)
131
    emit("")
132
    emit(f"n={n}  WR={wr*100:.1f}%  Brier={brier:.4f}  EV/trade=${ev_mean:+.3f} (after fees)")
133
    emit("by cell (city, market_type):")
134
    best_cell = None
135
    for key, (cn, cw, cev) in sorted(cell.items(), key=lambda kv: -kv[1][0]):
136
        cev_avg = cev / cn if cn else 0
137
        emit(f"  {key[0]:14s} {key[1]:6s}  n={cn:3d}  WR={cw/cn*100:4.0f}%  EV=${cev_avg:+.3f}")
138
        if best_cell is None:
139
            best_cell = (key, cn, cw / cn, cev_avg)
140
 
141
    cell_ok = bool(best_cell and best_cell[1] >= 10 and best_cell[3] > 0)
142
    passed = (ev_mean > 0) and (brier < BRIER_GATE) and cell_ok
143
    emit("")
144
    if passed:
145
        emit("VERDICT: GATE PASSED - pivot shows positive EV after fees, Brier "
146
             f"< {BRIER_GATE}, and holds in the highest-volume cell "
147
             f"{best_cell[0]} (n={best_cell[1]}, EV=${best_cell[3]:+.3f}). "
148
             "RECOMMEND: propose a SMALL live pilot to the user. Do NOT auto-enable.")
149
    else:
150
        why = []
151
        if ev_mean <= 0: why.append(f"EV/trade ${ev_mean:+.3f} not > 0")
152
        if brier >= BRIER_GATE: why.append(f"Brier {brier:.3f} not < {BRIER_GATE}")
153
        if not cell_ok: why.append("does not hold in the highest-volume cell")
154
        emit("VERDICT: GATE FAILED - " + "; ".join(why) + ". "
155
             "RECOMMEND: iterate SHADOW_GAP_K / SHADOW_MIN_BRACKET_WIDTH, or retire. "
156
             "Do NOT enable live trading.")
157
    emit("")
158
    emit("(Auto-trading untouched. This script never writes auto_config.)")
159
    con.close()
160
    _write_and_notify(report_path, out, "PASS" if passed else "FAIL")
161
 
162
def _write_and_notify(path, lines, status):
163
    body = "\n".join(lines)
164
    try:
165
        with open(path, "w") as f:
166
            f.write(body + "\n")
167
    except Exception:
168
        pass
169
 
170
    try:
171
        import main
172
        hook = getattr(main, "DISCORD_WEBHOOK", None) or os.getenv("DISCORD_WEBHOOK")
173
        if hook:
174
            import urllib.request
175
            tag = {"PASS": "✅", "FAIL": "❌", "INSUFFICIENT": "⏳"}.get(status, "ℹ️")
176
            msg = f"{tag} **Hermes C4 eval** ({status})\n```\n{body[-1500:]}\n```"
177
            req = urllib.request.Request(
178
                hook, data=json.dumps({"content": msg}).encode(),
179
                headers={"Content-Type": "application/json"})
180
            urllib.request.urlopen(req, timeout=10)
181
    except Exception:
182
        pass
183
 
184
if __name__ == "__main__":
185
    try:
186
        main_eval()
187
    except Exception:
188
        traceback.print_exc()
189
        sys.exit(1)