Zion Boggan
repos/Security Portfolio/cti-detection-automation/index.html
zionboggan.com ↗
385 lines · html
History for this file →
1
<!doctype html>
2
<html lang="en">
3
<head>
4
<meta charset="utf-8">
5
<meta name="viewport" content="width=device-width, initial-scale=1.0">
6
<title>CTI Detection Automation | Zion Boggan</title>
7
<meta name="description" content="Pulls indicators from five live threat-intel feeds, dedupes across them, extracts the MITRE techniques, generates Wazuh CDB lists and a tagged XML ruleset, then emails an analyst a signed, single-use review link before anything reaches the SIEM.">
8
<link rel="icon" href="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 32 32'%3E%3Crect width='32' height='32' rx='6' fill='%230c0e12'/%3E%3Ctext x='16' y='22' font-family='monospace' font-size='15' fill='%236cc7b8' text-anchor='middle'%3Ezb%3C/text%3E%3C/svg%3E">
9
<style>
10
  :root{
11
    --bg:#0c0e12; --bg2:#0f1217; --panel:#14181f; --panel2:#171c24;
12
    --line:#222936; --line2:#2c3543;
13
    --ink:#e8eaed; --soft:#c3cad4; --muted:#8a94a3; --faint:#5d6675;
14
    --accent:#6cc7b8; --accent-dim:#274b47;
15
    --maxw:1020px;
16
  }
17
  *{box-sizing:border-box;}
18
  html{scroll-behavior:smooth;}
19
  body{margin:0;background:var(--bg);color:var(--ink);
20
    font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,Helvetica,Arial,sans-serif;
21
    font-size:16px;line-height:1.65;-webkit-font-smoothing:antialiased;}
22
  .mono{font-family:ui-monospace,SFMono-Regular,"SF Mono",Menlo,Consolas,monospace;}
23
  a{color:var(--accent);text-decoration:none;}
24
  a:hover{color:#8fe0d2;}
25
  .wrap{max-width:var(--maxw);margin:0 auto;padding:0 24px;}
26
 
27
  /* nav */
28
  nav{position:sticky;top:0;z-index:20;background:rgba(12,14,18,.82);
29
    backdrop-filter:blur(10px);border-bottom:1px solid var(--line);}
30
  nav .wrap{display:flex;align-items:center;justify-content:space-between;height:58px;}
31
  nav .brand{font-weight:600;letter-spacing:.2px;}
32
  nav .brand .dot{color:var(--accent);}
33
  nav .links{display:flex;gap:26px;font-size:13.5px;}
34
  nav .links a{color:var(--muted);}
35
  nav .links a:hover{color:var(--ink);}
36
  @media(max-width:680px){nav .links{display:none;}}
37
 
38
  /* hero */
39
  header.hero{padding:74px 0 54px;border-bottom:1px solid var(--line);
40
    background:radial-gradient(900px 380px at 78% -10%, #11201e 0%, transparent 60%);}
41
  .avail{font-size:12.5px;letter-spacing:1.5px;text-transform:uppercase;color:var(--accent);
42
    display:flex;align-items:center;gap:9px;margin-bottom:20px;}
43
  .avail .pulse{width:7px;height:7px;border-radius:50%;background:var(--accent);
44
    box-shadow:0 0 0 0 rgba(108,199,184,.5);animation:p 2.4s infinite;}
45
  @keyframes p{0%{box-shadow:0 0 0 0 rgba(108,199,184,.45)}70%{box-shadow:0 0 0 8px rgba(108,199,184,0)}100%{box-shadow:0 0 0 0 rgba(108,199,184,0)}}
46
  h1{font-size:clamp(34px,6vw,52px);line-height:1.05;margin:0 0 8px;letter-spacing:-1px;font-weight:680;}
47
  .hero .sub{font-size:clamp(16px,2.4vw,20px);color:var(--soft);margin:0 0 24px;font-weight:500;}
48
  .hero .lede{max-width:660px;color:var(--soft);font-size:17px;margin:0 0 28px;}
49
  .hero .lede b{color:var(--ink);font-weight:600;}
50
  .cta{display:flex;flex-wrap:wrap;gap:12px;align-items:center;}
51
  .btn{display:inline-flex;align-items:center;gap:8px;padding:10px 18px;border-radius:8px;
52
    font-size:14.5px;font-weight:550;border:1px solid var(--line2);color:var(--ink);background:var(--panel);}
53
  .btn:hover{border-color:var(--accent-dim);background:var(--panel2);color:var(--ink);}
54
  .btn.primary{background:var(--accent);color:#06231f;border-color:var(--accent);font-weight:650;}
55
  .btn.primary:hover{background:#8fe0d2;color:#06231f;}
56
  .meta{margin-top:26px;display:flex;flex-wrap:wrap;gap:8px 22px;font-size:13px;color:var(--muted);}
57
  .meta .mono{color:var(--faint);}
58
 
59
  /* sections */
60
  section{padding:64px 0;border-bottom:1px solid var(--line);}
61
  .shead{display:flex;align-items:baseline;gap:14px;margin-bottom:30px;}
62
  .shead .idx{font-size:13px;color:var(--accent);letter-spacing:1px;}
63
  .shead h2{font-size:14px;letter-spacing:2px;text-transform:uppercase;color:var(--muted);margin:0;font-weight:600;}
64
  .shead .rule{flex:1;height:1px;background:var(--line);}
65
 
66
  /* flagship */
67
  .flag{background:linear-gradient(180deg,var(--panel) 0%,var(--bg2) 100%);
68
    border:1px solid var(--line2);border-radius:14px;overflow:hidden;}
69
  .flag .top{padding:30px 32px 8px;}
70
  .flag .tag{font-size:12px;letter-spacing:1.5px;text-transform:uppercase;color:var(--accent);margin-bottom:12px;}
71
  .flag h3{font-size:27px;margin:0 0 6px;letter-spacing:-.4px;}
72
  .flag h3 .v{font-size:13px;color:var(--muted);font-weight:500;margin-left:8px;letter-spacing:0;}
73
  .flag .grid{display:grid;grid-template-columns:1.25fr 1fr;gap:30px;padding:14px 32px 30px;}
74
  .flag p{color:var(--soft);margin:0 0 16px;}
75
  .flag .stats{display:grid;grid-template-columns:1fr 1fr;gap:12px;margin-top:6px;}
76
  .stat{background:var(--bg);border:1px solid var(--line);border-radius:9px;padding:13px 15px;}
77
  .stat .n{font-size:21px;font-weight:680;color:var(--ink);}
78
  .stat .k{font-size:12px;color:var(--muted);margin-top:2px;}
79
  .spec{background:var(--bg);border:1px solid var(--line);border-radius:10px;padding:18px 18px;}
80
  .spec .sk{font-size:11px;letter-spacing:1.5px;text-transform:uppercase;color:var(--faint);margin-bottom:10px;}
81
  .spec ul{margin:0;padding:0;list-style:none;font-size:13.5px;}
82
  .spec li{padding:6px 0;border-top:1px solid var(--line);color:var(--soft);display:flex;justify-content:space-between;gap:14px;}
83
  .spec li:first-child{border-top:none;}
84
  .spec li span{color:var(--muted);}
85
  .flag .foot{padding:0 32px 28px;display:flex;gap:18px;flex-wrap:wrap;font-size:14px;}
86
  @media(max-width:720px){.flag .grid{grid-template-columns:1fr;}}
87
 
88
  /* lab cards */
89
  .cards{display:grid;grid-template-columns:1fr 1fr;gap:20px;}
90
  @media(max-width:680px){.cards{grid-template-columns:1fr;}}
91
  .card{border:1px solid var(--line);border-radius:12px;overflow:hidden;background:var(--panel);
92
    display:flex;flex-direction:column;transition:border-color .15s,transform .15s;}
93
  .card:hover{border-color:var(--accent-dim);transform:translateY(-2px);}
94
  .card .thumb{height:172px;overflow:hidden;border-bottom:1px solid var(--line);background:#fff;}
95
  .card .thumb img{width:100%;height:100%;object-fit:cover;object-position:top left;display:block;}
96
  .card .body{padding:18px 20px 20px;display:flex;flex-direction:column;flex:1;}
97
  .card h3{margin:0 0 9px;font-size:17px;}
98
  .card p{margin:0 0 14px;font-size:14px;color:var(--soft);flex:1;}
99
  .tags{display:flex;flex-wrap:wrap;gap:6px;margin-bottom:14px;}
100
  .tags span{font-size:11.5px;color:var(--muted);background:var(--bg);border:1px solid var(--line);
101
    border-radius:5px;padding:3px 8px;}
102
  .card .lnk{font-size:13.5px;font-family:ui-monospace,Menlo,monospace;}
103
  .card .lnk::after{content:" โ†’";}
104
 
105
  /* research */
106
  .rlede{color:var(--soft);max-width:680px;margin:-6px 0 26px;}
107
  .research{display:flex;flex-direction:column;gap:0;border:1px solid var(--line);border-radius:12px;overflow:hidden;}
108
  .ritem{display:grid;grid-template-columns:120px 1fr auto;gap:18px;align-items:center;
109
    padding:18px 22px;border-top:1px solid var(--line);}
110
  .ritem:first-child{border-top:none;}
111
  .ritem:hover{background:var(--panel);}
112
  .ritem .cls{font-size:11px;letter-spacing:.5px;text-transform:uppercase;color:var(--accent);}
113
  .ritem h3{margin:0 0 3px;font-size:16px;}
114
  .ritem p{margin:0;font-size:13.5px;color:var(--muted);}
115
  .ritem .go{font-family:ui-monospace,Menlo,monospace;font-size:13px;white-space:nowrap;}
116
  @media(max-width:680px){.ritem{grid-template-columns:1fr;gap:6px;}.ritem .go{margin-top:4px;}}
117
  .progs{margin-top:22px;}
118
  .progs .sk{font-size:11px;letter-spacing:1.5px;text-transform:uppercase;color:var(--faint);margin-bottom:11px;}
119
  .progs .row{display:flex;flex-wrap:wrap;gap:7px;}
120
  .progs .row span{font-size:12.5px;color:var(--soft);background:var(--panel);border:1px solid var(--line);
121
    border-radius:6px;padding:4px 10px;}
122
 
123
  /* credentials */
124
  .cred{display:grid;grid-template-columns:1.1fr 1fr;gap:28px;}
125
  @media(max-width:680px){.cred{grid-template-columns:1fr;}}
126
  .cred p{color:var(--soft);margin:0 0 14px;}
127
  .cred .role{font-size:14px;color:var(--muted);}
128
  .cred .role b{color:var(--ink);font-weight:600;}
129
  .certs{list-style:none;margin:0;padding:0;}
130
  .certs li{padding:9px 0;border-top:1px solid var(--line);font-size:14px;color:var(--soft);
131
    display:flex;gap:10px;align-items:baseline;}
132
  .certs li:first-child{border-top:none;}
133
  .certs li .c{color:var(--accent);font-family:ui-monospace,Menlo,monospace;font-size:12px;}
134
 
135
  footer{padding:46px 0 64px;}
136
  footer .row{display:flex;flex-wrap:wrap;justify-content:space-between;gap:18px;align-items:center;}
137
  footer .links a{color:var(--soft);margin-right:20px;font-size:14px;}
138
  footer .note{color:var(--faint);font-size:12.5px;max-width:520px;}
139
 
140
  /* detail pages */
141
  .detail-hero{padding:40px 0 28px;}
142
  .back{display:inline-block;font-size:13px;color:var(--muted);margin-bottom:22px;font-family:ui-monospace,Menlo,monospace;}
143
  .back:hover{color:var(--ink);}
144
  .kicker{font-size:12px;letter-spacing:2px;text-transform:uppercase;color:var(--accent);margin-bottom:13px;font-family:ui-monospace,Menlo,monospace;}
145
  .detail-hero h1{font-size:clamp(28px,5vw,42px);margin:0 0 12px;letter-spacing:-.6px;}
146
  .detail-hero .tagline{font-size:clamp(16px,2.2vw,19px);color:var(--soft);max-width:780px;margin:0 0 18px;}
147
  .facts{display:grid;grid-template-columns:repeat(auto-fit,minmax(148px,1fr));gap:12px;margin-top:24px;}
148
  figure{margin:0;}
149
  .shot{border:1px solid var(--line2);border-radius:12px;overflow:hidden;background:#fff;margin:30px 0 6px;}
150
  .shot img,.shot video{display:block;width:100%;height:auto;}
151
  figcaption{font-size:13px;color:var(--muted);margin:11px 2px 0;}
152
  .content{padding:6px 0 0;}
153
  .content h2{font-size:13px;letter-spacing:2px;text-transform:uppercase;color:var(--muted);margin:44px 0 16px;font-weight:600;border-top:1px solid var(--line);padding-top:30px;}
154
  .content h2.first{border-top:none;padding-top:6px;margin-top:18px;}
155
  .content p{color:var(--soft);margin:0 0 16px;}
156
  .content ul,.content ol{color:var(--soft);margin:0 0 16px;padding-left:22px;}
157
  .content li{margin:6px 0;}
158
  .content strong{color:var(--ink);font-weight:600;}
159
  .content code{font-family:ui-monospace,Menlo,monospace;font-size:13px;background:var(--panel2);border:1px solid var(--line);border-radius:4px;padding:1px 5px;color:var(--soft);}
160
  .content pre{background:var(--bg2);border:1px solid var(--line2);border-radius:10px;padding:15px 18px;overflow-x:auto;margin:0 0 18px;}
161
  .content pre code{background:none;border:none;padding:0;font-size:12.5px;color:var(--soft);line-height:1.62;}
162
  .content table{width:100%;border-collapse:collapse;margin:2px 0 20px;font-size:13.5px;}
163
  .content th{text-align:left;color:var(--muted);font-weight:600;border-bottom:1px solid var(--line2);padding:9px 12px;font-size:11px;letter-spacing:.6px;text-transform:uppercase;}
164
  .content td{color:var(--soft);border-bottom:1px solid var(--line);padding:9px 12px;vertical-align:top;}
165
  .content td code{font-size:12px;}
166
  .gallery{margin-top:8px;}
167
  .repo-line{margin:42px 0 0;color:var(--faint);font-size:12.5px;font-family:ui-monospace,Menlo,monospace;}
168
</style>
169
<link rel="canonical" href="https://zionboggan.com/cti-detection-automation/">
170
<meta name="author" content="Zion Boggan">
171
<meta name="robots" content="index, follow, max-image-preview:large">
172
<meta property="og:type" content="article">
173
<meta property="og:site_name" content="Zion Boggan">
174
<meta property="og:title" content="CTI Detection Automation | Zion Boggan">
175
<meta property="og:description" content="Pulls indicators from five live threat-intel feeds, dedupes across them, extracts the MITRE techniques, generates Wazuh CDB lists and a tagged XML ruleset, then emails an analyst a signed, single-use review link before anything reaches the SIEM.">
176
<meta property="og:url" content="https://zionboggan.com/cti-detection-automation/">
177
<meta property="og:image" content="https://zionboggan.com/assets/cti-detection-automation/01-approval-email.png">
178
<meta name="twitter:card" content="summary_large_image">
179
<meta name="twitter:title" content="CTI Detection Automation | Zion Boggan">
180
<meta name="twitter:description" content="Pulls indicators from five live threat-intel feeds, dedupes across them, extracts the MITRE techniques, generates Wazuh CDB lists and a tagged XML ruleset, then emails an analyst a signed, single-use review link before anything reaches the SIEM.">
181
<meta name="twitter:image" content="https://zionboggan.com/assets/cti-detection-automation/01-approval-email.png">
182
<script type="application/ld+json">{"@context":"https://schema.org","@type":"TechArticle","headline":"CTI Detection Automation","description":"Pulls indicators from five live threat-intel feeds, dedupes across them, extracts the MITRE techniques, generates Wazuh CDB lists and a tagged XML ruleset, then emails an analyst a signed, single-use review link before anything reaches the SIEM.","url":"https://zionboggan.com/cti-detection-automation/","image":"https://zionboggan.com/assets/cti-detection-automation/01-approval-email.png","author":{"@type":"Person","name":"Zion Boggan","url":"https://zionboggan.com"},"publisher":{"@type":"Person","name":"Zion Boggan"}}</script>
183
</head>
184
<body>
185
<nav><div class="wrap">
186
  <a class="brand mono" href="/" style="color:var(--ink)">zion_boggan<span class="dot">.</span></a>
187
  <span class="links">
188
    <a href="/#oversight">Oversight</a>
189
    <a href="/#labs">Labs</a>
190
    <a href="/#research">Research</a>
191
    <a href="/#background">Background</a>
192
    <a href="/">Home</a>
193
  </span>
194
</div></nav>
195
<header class="hero detail-hero"><div class="wrap">
196
  <a class="back" href="/#labs">&larr; All work</a>
197
  <div class="kicker">THREAT INTEL</div>
198
  <h1>CTI Detection Automation</h1>
199
  <p class="tagline">Pulls indicators from five live threat-intel feeds, dedupes across them, extracts the MITRE techniques, generates Wazuh CDB lists and a tagged XML ruleset, then emails an analyst a signed, single-use review link before anything reaches the SIEM.</p>
200
  <div class="tags"><span>Python</span><span>ThreatFox / OTX / URLhaus</span><span>Wazuh CDB</span><span>ATT&amp;CK</span><span>Flask</span><span>itsdangerous</span><span>SMTP</span></div>
201
  <div class="facts"><div class="stat"><div class="n">5</div><div class="k">live feeds connected</div></div><div class="stat"><div class="n">30</div><div class="k">tests passing</div></div><div class="stat"><div class="n">5</div><div class="k">Wazuh CDB list types</div></div><div class="stat"><div class="n">27</div><div class="k">ATT&amp;CK techniques in catalog</div></div><div class="stat"><div class="n">1</div><div class="k">signed human gate before deploy</div></div></div>
202
  <div class="cta" style="margin-top:24px"></div>
203
</div></header>
204
<section><div class="wrap">
205
  <figure class="shot"><img loading="lazy" src="/assets/cti-detection-automation/01-approval-email.png" alt="The approval email the analyst receives: bundle ID, indicator counts by type, the diff against the last approved bundle, the top ATT&amp;CK techniques, and the signed review link."></figure><figcaption>The approval email the analyst receives: bundle ID, indicator counts by type, the diff against the last approved bundle, the top ATT&amp;CK techniques, and the signed review link.</figcaption>
206
  <div class="content">
207
  <h2>The pipeline</h2>
208
<p>Each run flows through a fixed set of stages, every one of which is unit-tested in isolation. <code>build_bundle()</code> is the spine:</p><pre><code>def build_bundle(config: dict) -> RuleBundle:
209
    raw = collect_indicators(config)
210
    merged = deduplicate(raw)
211
    kept = filter_by_confidence(merged, config["min_confidence"])
212
    techniques = extract_techniques(kept)
213
    cdb_lists = rules.build_cdb_lists(kept)
214
    rules_xml = rules.build_rules_xml(kept, base_id=config["rules"]["base_id"])
215
    bundle_id = datetime.now(timezone.utc).strftime("cti-%Y%m%d-%H%M%S")
216
    return RuleBundle(
217
        bundle_id=bundle_id,
218
        generated_at=RuleBundle.now_iso(),
219
        indicators=kept,
220
        techniques=techniques,
221
        cdb_lists=cdb_lists,
222
        rules_xml=rules_xml,
223
    )</code></pre><p>A real run over the bundled fixtures produces a 19-indicator bundle: 5 IPs, 6 domains, 6 URLs, and 2 hashes, mapping to 15 distinct ATT&amp;CK techniques across the five sources.</p>
224
<h2>Feed connectors</h2>
225
<p>Five public sources, each a connector subclassing one abstract <code>Feed</code> with a pure <code>parse()</code> split cleanly from its network <code>fetch_raw()</code>, so parsing is tested against fixtures while the HTTP path stays thin:</p><table><thead><tr><th>Feed</th><th>Provides</th><th>Auth</th></tr></thead><tbody><tr><td>ThreatFox</td><td>IPs, domains, URLs, hashes, malware family</td><td>free key (POST)</td></tr><tr><td>Feodo Tracker</td><td>botnet C2 IPs</td><td>keyless</td></tr><tr><td>URLhaus</td><td>malware-distribution URLs + host domains</td><td>keyless</td></tr><tr><td>AlienVault OTX</td><td>pulse indicators carrying ATT&amp;CK IDs</td><td>free key</td></tr><tr><td>OpenPhish</td><td>phishing URLs + host domains</td><td>keyless</td></tr></tbody></table><p>The ThreatFox connector is representative: it normalizes the API's IOC types, strips ports off <code>ip:port</code> values, and derives techniques from both the malware family and the threat type.</p><pre><code>def parse(self, raw: str) -> list[Indicator]:
226
    payload = load_json(raw)
227
    if payload.get("query_status") != "ok":
228
        return []
229
    indicators: list[Indicator] = []
230
    for entry in payload.get("data", []):
231
        ioc_type = TYPE_MAP.get(entry.get("ioc_type"))
232
        if ioc_type is None:
233
            continue
234
        value = entry.get("ioc", "")
235
        if ioc_type == "ip" and ":" in value:
236
            value = value.split(":", 1)[0]
237
        malware = entry.get("malware_printable") or entry.get("malware")
238
        threat_type = entry.get("threat_type", "unknown")
239
        techniques = mitre.techniques_for_malware(malware)
240
        techniques += mitre.techniques_for_threat_type(threat_type)</code></pre><p>URLhaus and OpenPhish additionally split each URL's host into a separate <code>domain</code> indicator, which is where the cross-feed dedup earns its keep.</p>
241
<h2>Dedup and normalization</h2>
242
<p>Every connector emits the same <code>Indicator</code> dataclass, so downstream code never special-cases a source. Deduplication keys on <code>(type, value.lower())</code>, the same IP seen in ThreatFox, Feodo, and OTX collapses to one indicator carrying the union of all three sources, the maximum confidence, and the union of techniques and tags:</p><pre><code>def deduplicate(indicators: list[Indicator]) -> list[Indicator]:
243
    merged: dict[tuple[str, str], Indicator] = {}
244
    for indicator in indicators:
245
        key = indicator.key()
246
        existing = merged.get(key)
247
        if existing is None:
248
            merged[key] = Indicator(...)
249
            continue
250
        existing.confidence = max(existing.confidence, indicator.confidence)
251
        existing.techniques = sorted(set(existing.techniques) | set(indicator.techniques))
252
        existing.tags = sorted(set(existing.tags) | set(indicator.tags))
253
        existing.malware = existing.malware or indicator.malware
254
        existing.reference = existing.reference or indicator.reference
255
        existing.first_seen = existing.first_seen or indicator.first_seen
256
        if indicator.source not in existing.source.split(","):
257
            existing.source = ",".join(sorted(set(existing.source.split(",") + [indicator.source])))
258
    return list(merged.values())</code></pre><p>A merge test pins the behavior exactly:</p><pre><code>def test_merges_same_indicator_across_sources():
259
    merged = deduplicate([
260
        make("45.137.21.9", "threatfox", 100, ["T1071.001"], "Cobalt Strike"),
261
        make("45.137.21.9", "feodo", 90, ["T1573"]),
262
        make("45.137.21.9", "otx", 60, ["T1059.001"]),
263
    ])
264
    assert len(merged) == 1
265
    indicator = merged[0]
266
    assert indicator.confidence == 100
267
    assert set(indicator.techniques) == {"T1071.001", "T1573", "T1059.001"}
268
    assert set(indicator.source.split(",")) == {"threatfox", "feodo", "otx"}</code></pre><p>After dedup, <code>filter_by_confidence()</code> drops anything under the configured threshold before any techniques or rules are derived.</p>
269
<h2>MITRE TTP extraction</h2>
270
<p>Indicators arrive with techniques attached three ways: OTX pulses carry ATT&amp;CK IDs directly; feeds without IDs have them inferred from the malware family and from the threat type. The mapping tables are hand-built, 27 techniques in the catalog, keyed to malware families and threat types:</p><pre><code>MALWARE_TECHNIQUES = {
271
    "cobaltstrike": ["T1071.001", "T1059.001", "T1055"],
272
    "agenttesla": ["T1056.001", "T1555", "T1041"],
273
    "emotet": ["T1566.001", "T1071.001", "T1105"],
274
    "qakbot": ["T1566.001", "T1055", "T1071.001"],
275
    ...
276
}
277
 
278
THREAT_TYPE_TECHNIQUES = {
279
    "phishing": ["T1566.002", "T1204.001"],
280
    "botnet_cc": ["T1071.001", "T1573"],
281
    "malware_download": ["T1105", "T1204.002"],
282
    "ransomware": ["T1486", "T1071.001"],
283
    "leaked_credentials": ["T1589.001"],
284
}</code></pre><p>Family lookup is fuzzy, it lowercases the family and strips spaces, hyphens and underscores, so <code>Cobalt Strike</code>, <code>cobalt-strike</code> and <code>CobaltStrike</code> all hit the same row:</p><pre><code>def techniques_for_malware(malware: str | None) -> list[str]:
285
    if not malware:
286
        return []
287
    key = malware.lower().replace(" ", "").replace("-", "").replace("_", "")
288
    for name, techniques in MALWARE_TECHNIQUES.items():
289
        if name in key:
290
            return list(techniques)
291
    return []</code></pre><p>Once collected, <code>extract_techniques()</code> rolls every indicator's techniques into a per-technique record with its indicator count and contributing sources, sorted by prevalence, and renders a coverage table. From the live fixture run, the top of the report reads:</p><pre><code>| Technique | Tactic | Name | Indicators | Sources |
292
|---|---|---|---|---|
293
| T1204.001 | execution | User Execution: Malicious Link | 10 | openphish, threatfox, urlhaus |
294
| T1105 | command-and-control | Ingress Tool Transfer | 9 | feodo, openphish, otx, threatfox, urlhaus |
295
| T1566.002 | initial-access | Phishing: Spearphishing Link | 8 | openphish, urlhaus |
296
| T1071.001 | command-and-control | Application Layer Protocol: Web Protocols | 6 | feodo, otx, threatfox |</code></pre>
297
<h2>Generated Wazuh rules</h2>
298
<p>Indicators are written as five Wazuh CDB lists, one per indicator class, each keyed on the value with a malware-family or threat-type label, sorted and deduplicated. A real <code>cti-malicious-ip</code> list from the active bundle:</p><pre><code>185.220.101.45:Dridex
299
193.149.176.12:AsyncRAT
300
194.36.191.55:QakBot
301
45.137.21.9:Cobalt Strike
302
91.211.88.34:Emotet</code></pre><p>Labels are sanitized, colons and newlines stripped, clamped to 48 chars, so a value like <code>weird:type</code> can never break the <code>value:label</code> CDB format. Alongside the lists, <code>build_rules_xml()</code> emits list-lookup rules: outbound and inbound IP matches, a DNS-query match, a URL match, a hash-execution match, and a leaked-credential rule, each tagged with the four dominant ATT&amp;CK techniques for that bucket. This is the verbatim outbound-IP and DNS rule from the generated <code>local_cti_rules.xml</code>:</p><pre><code>&lt;group name="cti,threat-intel,auto-generated,"&gt;
303
  &lt;rule id="100300" level="12"&gt;
304
    &lt;field name="dstip" type="pcre2"&gt;\S+&lt;/field&gt;
305
    &lt;list field="dstip" lookup="address_match_key"&gt;etc/lists/cti-malicious-ip&lt;/list&gt;
306
    &lt;description&gt;Outbound connection to CTI-flagged IP: $(dstip)&lt;/description&gt;
307
    &lt;mitre&gt;
308
      &lt;id&gt;T1071.001&lt;/id&gt;
309
      &lt;id&gt;T1573&lt;/id&gt;
310
      &lt;id&gt;T1566.001&lt;/id&gt;
311
      &lt;id&gt;T1059.001&lt;/id&gt;
312
    &lt;/mitre&gt;
313
  &lt;/rule&gt;
314
  &lt;rule id="100302" level="12"&gt;
315
    &lt;field name="win.eventdata.queryName" type="pcre2"&gt;\S+&lt;/field&gt;
316
    &lt;list field="win.eventdata.queryName" lookup="match_key"&gt;etc/lists/cti-malicious-domain&lt;/list&gt;
317
    &lt;description&gt;DNS query for CTI-flagged domain: $(win.eventdata.queryName)&lt;/description&gt;
318
    &lt;mitre&gt;
319
      &lt;id&gt;T1204.001&lt;/id&gt;
320
      &lt;id&gt;T1566.002&lt;/id&gt;
321
    &lt;/mitre&gt;
322
  &lt;/rule&gt;
323
&lt;/group&gt;</code></pre><p>The lookups reference <code>etc/lists/</code>, and the hash rule fires at level 13, so the output drops directly into a Wazuh manager with no hand-editing. A test parses the generated XML with <code>ElementTree</code> to prove it is well-formed and that the <code>&lt;mitre&gt;</code> tags survived.</p>
324
<h2>The signed approval gate</h2>
325
<p>The pipeline never deploys on its own. The review link carries an <code>itsdangerous</code> URL-safe timed token, signed with a server secret under a fixed salt, so it cannot be forged and stops working once the TTL elapses:</p><pre><code>def serializer(secret: str) -> URLSafeTimedSerializer:
326
    return URLSafeTimedSerializer(secret, salt="cti-rule-approval")
327
 
328
def make_token(secret: str, bundle_id: str) -> str:
329
    return serializer(secret).dumps({"bundle_id": bundle_id})
330
 
331
def verify_token(secret: str, token: str, max_age: int) -> str | None:
332
    try:
333
        data = serializer(secret).loads(token, max_age=max_age)
334
    except (BadSignature, SignatureExpired):
335
        return None
336
    return data.get("bundle_id")</code></pre><p>A small Flask console serves the gate. Every state-changing route verifies the token first and <code>abort(403)</code>s on a bad or expired one; approval promotes the candidate to active and, if a Wazuh path is set, deploys to the manager:</p><pre><code>@app.post("/approve/&lt;token&gt;")
337
def approve(token):
338
    bundle_id = approval.verify_token(secret, token, ttl)
339
    if not bundle_id:
340
        abort(403)
341
    active_dir = config.get("wazuh_etc_dir")
342
    result = pipeline.promote(
343
        bundle_id, output_dir, Path(active_dir) if active_dir else None
344
    )
345
    return render_template("result.html", action="approved", result=result)</code></pre><p>Reject writes a <code>REJECTED</code> marker carrying the analyst's reason; the dashboard then lists every candidate bundle as pending, approved, or rejected. Promotion records the approved key set in <code>state.json</code>, which is exactly what the next run diffs against.</p>
346
<h2>Tests</h2>
347
<p>30 tests across 7 files cover every stage end-to-end: feed parsing against fixtures, cross-source dedup, confidence filtering, TTP extraction, CDB and XML generation, token signing and expiry, email rendering, and the full Flask approve/reject flow. The web tests run the real pipeline and drive the routes with Flask's test client:</p><pre><code>def test_review_requires_valid_token(app_and_bundle):
348
    app, _ = app_and_bundle
349
    client = app.test_client()
350
    assert client.get("/review/garbage").status_code == 403
351
 
352
 
353
def test_approve_promotes(app_and_bundle):
354
    app, result = app_and_bundle
355
    token = make_token("test-secret", result["bundle_id"])
356
    client = app.test_client()
357
    resp = client.post(f"/approve/{token}")
358
    assert resp.status_code == 200
359
    assert b"approved" in resp.data
360
    follow = client.get(f"/review/{token}")
361
    assert b"already been approved" in follow.data</code></pre><p>Token security is pinned directly, a token signed with one secret will not verify under another, and a zero-max-age token is rejected after a one-second sleep:</p><pre><code>def test_token_rejects_wrong_secret():
362
    token = make_token("secret", "bundle")
363
    assert verify_token("other", token, 60) is None
364
 
365
 
366
def test_token_expires():
367
    token = make_token("secret", "bundle")
368
    time.sleep(1)
369
    assert verify_token("secret", token, 0) is None</code></pre>
370
  </div>
371
  <div class="gallery"><figure class="shot"><img loading="lazy" src="/assets/cti-detection-automation/02-review-page.png" alt="The single-use signed review page: the candidate diff, indicators broken out by type, the five generated CDB lists, the ATT&amp;CK coverage table, and the approve-and-deploy and reject actions."></figure><figcaption>The single-use signed review page: the candidate diff, indicators broken out by type, the five generated CDB lists, the ATT&amp;CK coverage table, and the approve-and-deploy and reject actions.</figcaption><figure class="shot"><img loading="lazy" src="/assets/cti-detection-automation/03-dashboard.png" alt="The bundle dashboard listing every candidate with its indicator and technique counts and its pending, approved, or rejected status."></figure><figcaption>The bundle dashboard listing every candidate with its indicator and technique counts and its pending, approved, or rejected status.</figcaption><figure class="shot"><video controls preload="metadata" src="/assets/cti-detection-automation/cti-approval-walkthrough.mp4"></video></figure><figcaption>Full walkthrough: a pipeline run, the approval email, the signed review page, and promotion to the active bundle (video).</figcaption></div>
372
  <p class="repo-line">Repository &middot; github.com/zionboggan/cti-detection-automation</p>
373
</div></section>
374
<footer><div class="wrap row">
375
  <div class="links">
376
    <a href="/">Portfolio</a>
377
    <a href="https://www.linkedin.com/in/zion-boggan">LinkedIn</a>
378
    <a href="https://oversightprotocol.dev/">Oversight</a>
379
    <a href="mailto:zionboggan0@gmail.com">Email</a>
380
  </div>
381
  <div class="note">Built and deployed on a self-hosted Proxmox homelab. This page mirrors the
382
  project's documentation and results so the work is fully viewable here.</div>
383
</div></footer>
384
</body>
385
</html>