Zion Boggan
repos/Prediction Market Bot Postmortem/eval/empirical_analysis.py
zionboggan.com ↗
165 lines · python
History for this file →
1
"""Hermes root-cause analysis: empirical bracket-hit model vs Gaussian.
2
 
3
Reads the joined historical dataset, reconstructs (NWS forecast, bracket bounds,
4
hit/miss, market price), and:
5
  1. Examines the distance->hit relationship vs what the Gaussian predicts
6
  2. Builds an empirical P(hit | |forecast - bracket_mid|) lookup (Laplace-smoothed)
7
  3. Walk-forward backtests empirical vs Gaussian: Brier, WR, EV, total P&L
8
No external deps (stdlib only).
9
"""
10
import csv, math, os, re, statistics
11
from collections import defaultdict
12
 
13
PSV = os.environ.get("HERMES_DATASET", "data/sample_trades.psv")
14
COLS = ["id","ticker","market_title","nws_forecast","side","edge","ensemble_probability",
15
        "pnl","opened_at","cost","count","avg_price","nws_probability","market_price",
16
        "grok_probability","actual_outcome","correct","market_type"]
17
 
18
BRACKET_RE = re.compile(r"be\s+\**\s*(-?\d+)\s*-\s*(-?\d+)\s*°")
19
DEFAULT_MAE_FALLBACK = 3.0
20
 
21
def f(x):
22
    try: return float(x)
23
    except (TypeError, ValueError): return None
24
 
25
def load():
26
    rows = []
27
    with open(PSV) as fh:
28
        for line in fh:
29
            parts = line.rstrip("\n").split("|")
30
            if len(parts) != len(COLS):
31
                continue
32
            r = dict(zip(COLS, parts))
33
            m = BRACKET_RE.search(r["market_title"])
34
            if not m:
35
                continue
36
            lo, hi = int(m.group(1)), int(m.group(2))
37
            if lo > hi: lo, hi = hi, lo
38
            r["lo"], r["hi"] = lo, hi
39
            r["mid"] = (lo + hi) / 2.0
40
            r["width"] = hi - lo
41
            r["nws"] = f(r["nws_forecast"])
42
            r["hit"] = 1 if r["actual_outcome"].strip().lower() == "yes" else 0
43
            r["mkt_price"] = f(r["avg_price"])
44
            r["mkt_p_hit"] = (1.0 - r["mkt_price"]) if r["mkt_price"] is not None else None
45
            r["gauss_p_hit"] = f(r["nws_probability"])
46
            r["pnl"] = f(r["pnl"]); r["cost"] = f(r["cost"]); r["cnt"] = f(r["count"])
47
            rows.append(r)
48
    return rows
49
 
50
def gaussian_p_hit(nws, lo, hi, mae=DEFAULT_MAE_FALLBACK):
51
    """Reproduce nws_implied_probability bracket branch."""
52
    sigma = mae * math.sqrt(math.pi / 2.0)
53
    z_lo = (lo - nws) / sigma
54
    z_hi = (hi - nws) / sigma
55
    cdf = lambda z: 0.5 * (1.0 + math.erf(z / math.sqrt(2)))
56
    return max(0.0, cdf(z_hi) - cdf(z_lo))
57
 
58
def absd_bin(ad):
59
 
60
    for edge in (0.5, 1.5, 2.5, 3.5, 5.0, 8.0):
61
        if ad < edge: return edge
62
    return 99.0
63
 
64
def build_empirical(train):
65
    """P(hit | |d| bin) with Laplace smoothing toward the global base rate."""
66
    by_bin = defaultdict(lambda: [0,0])
67
    g_hits = g_n = 0
68
    for r in train:
69
        if r["nws"] is None: continue
70
        b = absd_bin(abs(r["nws"] - r["mid"]))
71
        by_bin[b][0] += r["hit"]; by_bin[b][1] += 1
72
        g_hits += r["hit"]; g_n += 1
73
    base = (g_hits / g_n) if g_n else 0.15
74
    ALPHA = 4.0
75
    table = {}
76
    for b,(h,n) in by_bin.items():
77
        table[b] = (h + ALPHA*base) / (n + ALPHA)
78
    return table, base
79
 
80
def emp_p_hit(table, base, nws, mid):
81
    if nws is None: return base
82
    return table.get(absd_bin(abs(nws - mid)), base)
83
 
84
def brier(pred, outcome):
85
    return (pred - outcome) ** 2
86
 
87
def walk_forward(rows, min_edge):
88
    """Chronological. For trade i, train empirical on 0..i-1 only.
89
    Simulate: bet NO if p_hit <= mkt_p_hit - min_edge; YES if p_hit >= mkt_p_hit + min_edge."""
90
    res = {"gauss": dict(n=0,wins=0,pnl=0.0,brier=[]),
91
           "emp":   dict(n=0,wins=0,pnl=0.0,brier=[]),
92
           "actual_all_brier_g":[], "actual_all_brier_e":[]}
93
    usable = [r for r in rows if r["nws"] is not None and r["mkt_p_hit"] is not None
94
              and r["pnl"] is not None and r["cnt"]]
95
    for i, r in enumerate(usable):
96
        train = usable[:i]
97
        if len(train) < 15:
98
            continue
99
        table, base = build_empirical(train)
100
        pe = emp_p_hit(table, base, r["nws"], r["mid"])
101
        pg = gaussian_p_hit(r["nws"], r["lo"], r["hi"])
102
        mp = r["mkt_p_hit"]
103
 
104
        res["actual_all_brier_g"].append(brier(pg, r["hit"]))
105
        res["actual_all_brier_e"].append(brier(pe, r["hit"]))
106
        per_contract_no_win = (1.0 - r["mkt_price"])
107
        for tag, p in (("gauss", pg), ("emp", pe)):
108
            d = res[tag]
109
            if p <= mp - min_edge:
110
                d["n"] += 1
111
                won = (r["hit"] == 0)
112
                d["pnl"] += (r["cnt"]*per_contract_no_win) if won else (-r["cost"])
113
                d["wins"] += 1 if won else 0
114
                d["brier"].append(brier(p, r["hit"]))
115
            elif p >= mp + min_edge:
116
                d["n"] += 1
117
                won = (r["hit"] == 1)
118
                yes_price = 1.0 - r["mkt_price"]
119
                d["pnl"] += (r["cnt"]*(1.0-yes_price)) if won else (-r["cnt"]*yes_price)
120
                d["wins"] += 1 if won else 0
121
                d["brier"].append(brier(p, r["hit"]))
122
    return res, usable
123
 
124
def main():
125
    rows = load()
126
    print(f"Loaded {len(rows)} bracket rows; "
127
          f"{sum(1 for r in rows if r['nws'] is not None)} have NWS forecast.\n")
128
 
129
    print("=== Empirical P(hit) by |NWS - bracket_mid|, vs Gaussian prediction ===")
130
    buckets = defaultdict(lambda:[0,0,[]])
131
    for r in rows:
132
        if r["nws"] is None: continue
133
        b = absd_bin(abs(r["nws"]-r["mid"]))
134
        buckets[b][0]+=r["hit"]; buckets[b][1]+=1
135
        buckets[b][2].append(gaussian_p_hit(r["nws"],r["lo"],r["hi"]))
136
    print(f"{'|d|<':>6} {'n':>4} {'hits':>4} {'emp_P(hit)':>10} {'gauss_P(hit)':>12}  gap")
137
    for b in sorted(buckets):
138
        h,n,gs = buckets[b]
139
        emp = h/n if n else 0
140
        g = statistics.mean(gs) if gs else 0
141
        print(f"{b:>6} {n:>4} {h:>4} {emp:>10.3f} {g:>12.3f}  {emp-g:+.3f}")
142
 
143
    bg = [brier(gaussian_p_hit(r["nws"],r["lo"],r["hi"]), r["hit"])
144
          for r in rows if r["nws"] is not None]
145
    print(f"\nGaussian Brier (all {len(bg)} w/ NWS): {statistics.mean(bg):.4f}  "
146
          f"(0.25=coinflip, lower=better)")
147
 
148
    for thr in (0.45, 0.35, 0.25, 0.15):
149
        res, usable = walk_forward(rows, thr)
150
        print(f"\n=== WALK-FORWARD @ min_edge={thr}  (usable n={len(usable)}, "
151
              f"warmup 15) ===")
152
        for tag in ("gauss","emp"):
153
            d = res[tag]
154
            n = d["n"]; wr = (d["wins"]/n*100) if n else 0
155
            ev = (d["pnl"]/n) if n else 0
156
            bm = statistics.mean(d["brier"]) if d["brier"] else float("nan")
157
            print(f"  {tag:5s}: trades={n:3d} WR={wr:5.1f}% "
158
                  f"P&L=${d['pnl']:+7.2f} EV/trade=${ev:+.3f} Brier(traded)={bm:.4f}")
159
        if res["actual_all_brier_g"]:
160
            print(f"  calibration Brier on ALL resolved (walk-fwd): "
161
                  f"gauss={statistics.mean(res['actual_all_brier_g']):.4f}  "
162
                  f"emp={statistics.mean(res['actual_all_brier_e']):.4f}")
163
 
164
if __name__ == "__main__":
165
    main()