| 1 | """Hermes root-cause analysis: empirical bracket-hit model vs Gaussian. |
| 2 | |
| 3 | Reads the joined historical dataset, reconstructs (NWS forecast, bracket bounds, |
| 4 | hit/miss, market price), and: |
| 5 | 1. Examines the distance->hit relationship vs what the Gaussian predicts |
| 6 | 2. Builds an empirical P(hit | |forecast - bracket_mid|) lookup (Laplace-smoothed) |
| 7 | 3. Walk-forward backtests empirical vs Gaussian: Brier, WR, EV, total P&L |
| 8 | No external deps (stdlib only). |
| 9 | """ |
| 10 | import csv, math, os, re, statistics |
| 11 | from collections import defaultdict |
| 12 | |
| 13 | PSV = os.environ.get("HERMES_DATASET", "data/sample_trades.psv") |
| 14 | COLS = ["id","ticker","market_title","nws_forecast","side","edge","ensemble_probability", |
| 15 | "pnl","opened_at","cost","count","avg_price","nws_probability","market_price", |
| 16 | "grok_probability","actual_outcome","correct","market_type"] |
| 17 | |
| 18 | BRACKET_RE = re.compile(r"be\s+\**\s*(-?\d+)\s*-\s*(-?\d+)\s*°") |
| 19 | DEFAULT_MAE_FALLBACK = 3.0 |
| 20 | |
| 21 | def f(x): |
| 22 | try: return float(x) |
| 23 | except (TypeError, ValueError): return None |
| 24 | |
| 25 | def load(): |
| 26 | rows = [] |
| 27 | with open(PSV) as fh: |
| 28 | for line in fh: |
| 29 | parts = line.rstrip("\n").split("|") |
| 30 | if len(parts) != len(COLS): |
| 31 | continue |
| 32 | r = dict(zip(COLS, parts)) |
| 33 | m = BRACKET_RE.search(r["market_title"]) |
| 34 | if not m: |
| 35 | continue |
| 36 | lo, hi = int(m.group(1)), int(m.group(2)) |
| 37 | if lo > hi: lo, hi = hi, lo |
| 38 | r["lo"], r["hi"] = lo, hi |
| 39 | r["mid"] = (lo + hi) / 2.0 |
| 40 | r["width"] = hi - lo |
| 41 | r["nws"] = f(r["nws_forecast"]) |
| 42 | r["hit"] = 1 if r["actual_outcome"].strip().lower() == "yes" else 0 |
| 43 | r["mkt_price"] = f(r["avg_price"]) |
| 44 | r["mkt_p_hit"] = (1.0 - r["mkt_price"]) if r["mkt_price"] is not None else None |
| 45 | r["gauss_p_hit"] = f(r["nws_probability"]) |
| 46 | r["pnl"] = f(r["pnl"]); r["cost"] = f(r["cost"]); r["cnt"] = f(r["count"]) |
| 47 | rows.append(r) |
| 48 | return rows |
| 49 | |
| 50 | def gaussian_p_hit(nws, lo, hi, mae=DEFAULT_MAE_FALLBACK): |
| 51 | """Reproduce nws_implied_probability bracket branch.""" |
| 52 | sigma = mae * math.sqrt(math.pi / 2.0) |
| 53 | z_lo = (lo - nws) / sigma |
| 54 | z_hi = (hi - nws) / sigma |
| 55 | cdf = lambda z: 0.5 * (1.0 + math.erf(z / math.sqrt(2))) |
| 56 | return max(0.0, cdf(z_hi) - cdf(z_lo)) |
| 57 | |
| 58 | def absd_bin(ad): |
| 59 | |
| 60 | for edge in (0.5, 1.5, 2.5, 3.5, 5.0, 8.0): |
| 61 | if ad < edge: return edge |
| 62 | return 99.0 |
| 63 | |
| 64 | def build_empirical(train): |
| 65 | """P(hit | |d| bin) with Laplace smoothing toward the global base rate.""" |
| 66 | by_bin = defaultdict(lambda: [0,0]) |
| 67 | g_hits = g_n = 0 |
| 68 | for r in train: |
| 69 | if r["nws"] is None: continue |
| 70 | b = absd_bin(abs(r["nws"] - r["mid"])) |
| 71 | by_bin[b][0] += r["hit"]; by_bin[b][1] += 1 |
| 72 | g_hits += r["hit"]; g_n += 1 |
| 73 | base = (g_hits / g_n) if g_n else 0.15 |
| 74 | ALPHA = 4.0 |
| 75 | table = {} |
| 76 | for b,(h,n) in by_bin.items(): |
| 77 | table[b] = (h + ALPHA*base) / (n + ALPHA) |
| 78 | return table, base |
| 79 | |
| 80 | def emp_p_hit(table, base, nws, mid): |
| 81 | if nws is None: return base |
| 82 | return table.get(absd_bin(abs(nws - mid)), base) |
| 83 | |
| 84 | def brier(pred, outcome): |
| 85 | return (pred - outcome) ** 2 |
| 86 | |
| 87 | def walk_forward(rows, min_edge): |
| 88 | """Chronological. For trade i, train empirical on 0..i-1 only. |
| 89 | Simulate: bet NO if p_hit <= mkt_p_hit - min_edge; YES if p_hit >= mkt_p_hit + min_edge.""" |
| 90 | res = {"gauss": dict(n=0,wins=0,pnl=0.0,brier=[]), |
| 91 | "emp": dict(n=0,wins=0,pnl=0.0,brier=[]), |
| 92 | "actual_all_brier_g":[], "actual_all_brier_e":[]} |
| 93 | usable = [r for r in rows if r["nws"] is not None and r["mkt_p_hit"] is not None |
| 94 | and r["pnl"] is not None and r["cnt"]] |
| 95 | for i, r in enumerate(usable): |
| 96 | train = usable[:i] |
| 97 | if len(train) < 15: |
| 98 | continue |
| 99 | table, base = build_empirical(train) |
| 100 | pe = emp_p_hit(table, base, r["nws"], r["mid"]) |
| 101 | pg = gaussian_p_hit(r["nws"], r["lo"], r["hi"]) |
| 102 | mp = r["mkt_p_hit"] |
| 103 | |
| 104 | res["actual_all_brier_g"].append(brier(pg, r["hit"])) |
| 105 | res["actual_all_brier_e"].append(brier(pe, r["hit"])) |
| 106 | per_contract_no_win = (1.0 - r["mkt_price"]) |
| 107 | for tag, p in (("gauss", pg), ("emp", pe)): |
| 108 | d = res[tag] |
| 109 | if p <= mp - min_edge: |
| 110 | d["n"] += 1 |
| 111 | won = (r["hit"] == 0) |
| 112 | d["pnl"] += (r["cnt"]*per_contract_no_win) if won else (-r["cost"]) |
| 113 | d["wins"] += 1 if won else 0 |
| 114 | d["brier"].append(brier(p, r["hit"])) |
| 115 | elif p >= mp + min_edge: |
| 116 | d["n"] += 1 |
| 117 | won = (r["hit"] == 1) |
| 118 | yes_price = 1.0 - r["mkt_price"] |
| 119 | d["pnl"] += (r["cnt"]*(1.0-yes_price)) if won else (-r["cnt"]*yes_price) |
| 120 | d["wins"] += 1 if won else 0 |
| 121 | d["brier"].append(brier(p, r["hit"])) |
| 122 | return res, usable |
| 123 | |
| 124 | def main(): |
| 125 | rows = load() |
| 126 | print(f"Loaded {len(rows)} bracket rows; " |
| 127 | f"{sum(1 for r in rows if r['nws'] is not None)} have NWS forecast.\n") |
| 128 | |
| 129 | print("=== Empirical P(hit) by |NWS - bracket_mid|, vs Gaussian prediction ===") |
| 130 | buckets = defaultdict(lambda:[0,0,[]]) |
| 131 | for r in rows: |
| 132 | if r["nws"] is None: continue |
| 133 | b = absd_bin(abs(r["nws"]-r["mid"])) |
| 134 | buckets[b][0]+=r["hit"]; buckets[b][1]+=1 |
| 135 | buckets[b][2].append(gaussian_p_hit(r["nws"],r["lo"],r["hi"])) |
| 136 | print(f"{'|d|<':>6} {'n':>4} {'hits':>4} {'emp_P(hit)':>10} {'gauss_P(hit)':>12} gap") |
| 137 | for b in sorted(buckets): |
| 138 | h,n,gs = buckets[b] |
| 139 | emp = h/n if n else 0 |
| 140 | g = statistics.mean(gs) if gs else 0 |
| 141 | print(f"{b:>6} {n:>4} {h:>4} {emp:>10.3f} {g:>12.3f} {emp-g:+.3f}") |
| 142 | |
| 143 | bg = [brier(gaussian_p_hit(r["nws"],r["lo"],r["hi"]), r["hit"]) |
| 144 | for r in rows if r["nws"] is not None] |
| 145 | print(f"\nGaussian Brier (all {len(bg)} w/ NWS): {statistics.mean(bg):.4f} " |
| 146 | f"(0.25=coinflip, lower=better)") |
| 147 | |
| 148 | for thr in (0.45, 0.35, 0.25, 0.15): |
| 149 | res, usable = walk_forward(rows, thr) |
| 150 | print(f"\n=== WALK-FORWARD @ min_edge={thr} (usable n={len(usable)}, " |
| 151 | f"warmup 15) ===") |
| 152 | for tag in ("gauss","emp"): |
| 153 | d = res[tag] |
| 154 | n = d["n"]; wr = (d["wins"]/n*100) if n else 0 |
| 155 | ev = (d["pnl"]/n) if n else 0 |
| 156 | bm = statistics.mean(d["brier"]) if d["brier"] else float("nan") |
| 157 | print(f" {tag:5s}: trades={n:3d} WR={wr:5.1f}% " |
| 158 | f"P&L=${d['pnl']:+7.2f} EV/trade=${ev:+.3f} Brier(traded)={bm:.4f}") |
| 159 | if res["actual_all_brier_g"]: |
| 160 | print(f" calibration Brier on ALL resolved (walk-fwd): " |
| 161 | f"gauss={statistics.mean(res['actual_all_brier_g']):.4f} " |
| 162 | f"emp={statistics.mean(res['actual_all_brier_e']):.4f}") |
| 163 | |
| 164 | if __name__ == "__main__": |
| 165 | main() |