| 1 | <!doctype html> |
| 2 | <html lang="en"> |
| 3 | <head> |
| 4 | <meta charset="utf-8"> |
| 5 | <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| 6 | <title>CTI Detection Automation | Zion Boggan</title> |
| 7 | <meta name="description" content="Pulls indicators from five live threat-intel feeds, dedupes across them, extracts the MITRE techniques, generates Wazuh CDB lists and a tagged XML ruleset, then emails an analyst a signed, single-use review link before anything reaches the SIEM."> |
| 8 | <link rel="icon" href="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 32 32'%3E%3Crect width='32' height='32' rx='6' fill='%230c0e12'/%3E%3Ctext x='16' y='22' font-family='monospace' font-size='15' fill='%236cc7b8' text-anchor='middle'%3Ezb%3C/text%3E%3C/svg%3E"> |
| 9 | <style> |
| 10 | :root{ |
| 11 | --bg:#0c0e12; --bg2:#0f1217; --panel:#14181f; --panel2:#171c24; |
| 12 | --line:#222936; --line2:#2c3543; |
| 13 | --ink:#e8eaed; --soft:#c3cad4; --muted:#8a94a3; --faint:#5d6675; |
| 14 | --accent:#6cc7b8; --accent-dim:#274b47; |
| 15 | --maxw:1020px; |
| 16 | } |
| 17 | *{box-sizing:border-box;} |
| 18 | html{scroll-behavior:smooth;} |
| 19 | body{margin:0;background:var(--bg);color:var(--ink); |
| 20 | font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,Helvetica,Arial,sans-serif; |
| 21 | font-size:16px;line-height:1.65;-webkit-font-smoothing:antialiased;} |
| 22 | .mono{font-family:ui-monospace,SFMono-Regular,"SF Mono",Menlo,Consolas,monospace;} |
| 23 | a{color:var(--accent);text-decoration:none;} |
| 24 | a:hover{color:#8fe0d2;} |
| 25 | .wrap{max-width:var(--maxw);margin:0 auto;padding:0 24px;} |
| 26 | |
| 27 | /* nav */ |
| 28 | nav{position:sticky;top:0;z-index:20;background:rgba(12,14,18,.82); |
| 29 | backdrop-filter:blur(10px);border-bottom:1px solid var(--line);} |
| 30 | nav .wrap{display:flex;align-items:center;justify-content:space-between;height:58px;} |
| 31 | nav .brand{font-weight:600;letter-spacing:.2px;} |
| 32 | nav .brand .dot{color:var(--accent);} |
| 33 | nav .links{display:flex;gap:26px;font-size:13.5px;} |
| 34 | nav .links a{color:var(--muted);} |
| 35 | nav .links a:hover{color:var(--ink);} |
| 36 | @media(max-width:680px){nav .links{display:none;}} |
| 37 | |
| 38 | /* hero */ |
| 39 | header.hero{padding:74px 0 54px;border-bottom:1px solid var(--line); |
| 40 | background:radial-gradient(900px 380px at 78% -10%, #11201e 0%, transparent 60%);} |
| 41 | .avail{font-size:12.5px;letter-spacing:1.5px;text-transform:uppercase;color:var(--accent); |
| 42 | display:flex;align-items:center;gap:9px;margin-bottom:20px;} |
| 43 | .avail .pulse{width:7px;height:7px;border-radius:50%;background:var(--accent); |
| 44 | box-shadow:0 0 0 0 rgba(108,199,184,.5);animation:p 2.4s infinite;} |
| 45 | @keyframes p{0%{box-shadow:0 0 0 0 rgba(108,199,184,.45)}70%{box-shadow:0 0 0 8px rgba(108,199,184,0)}100%{box-shadow:0 0 0 0 rgba(108,199,184,0)}} |
| 46 | h1{font-size:clamp(34px,6vw,52px);line-height:1.05;margin:0 0 8px;letter-spacing:-1px;font-weight:680;} |
| 47 | .hero .sub{font-size:clamp(16px,2.4vw,20px);color:var(--soft);margin:0 0 24px;font-weight:500;} |
| 48 | .hero .lede{max-width:660px;color:var(--soft);font-size:17px;margin:0 0 28px;} |
| 49 | .hero .lede b{color:var(--ink);font-weight:600;} |
| 50 | .cta{display:flex;flex-wrap:wrap;gap:12px;align-items:center;} |
| 51 | .btn{display:inline-flex;align-items:center;gap:8px;padding:10px 18px;border-radius:8px; |
| 52 | font-size:14.5px;font-weight:550;border:1px solid var(--line2);color:var(--ink);background:var(--panel);} |
| 53 | .btn:hover{border-color:var(--accent-dim);background:var(--panel2);color:var(--ink);} |
| 54 | .btn.primary{background:var(--accent);color:#06231f;border-color:var(--accent);font-weight:650;} |
| 55 | .btn.primary:hover{background:#8fe0d2;color:#06231f;} |
| 56 | .meta{margin-top:26px;display:flex;flex-wrap:wrap;gap:8px 22px;font-size:13px;color:var(--muted);} |
| 57 | .meta .mono{color:var(--faint);} |
| 58 | |
| 59 | /* sections */ |
| 60 | section{padding:64px 0;border-bottom:1px solid var(--line);} |
| 61 | .shead{display:flex;align-items:baseline;gap:14px;margin-bottom:30px;} |
| 62 | .shead .idx{font-size:13px;color:var(--accent);letter-spacing:1px;} |
| 63 | .shead h2{font-size:14px;letter-spacing:2px;text-transform:uppercase;color:var(--muted);margin:0;font-weight:600;} |
| 64 | .shead .rule{flex:1;height:1px;background:var(--line);} |
| 65 | |
| 66 | /* flagship */ |
| 67 | .flag{background:linear-gradient(180deg,var(--panel) 0%,var(--bg2) 100%); |
| 68 | border:1px solid var(--line2);border-radius:14px;overflow:hidden;} |
| 69 | .flag .top{padding:30px 32px 8px;} |
| 70 | .flag .tag{font-size:12px;letter-spacing:1.5px;text-transform:uppercase;color:var(--accent);margin-bottom:12px;} |
| 71 | .flag h3{font-size:27px;margin:0 0 6px;letter-spacing:-.4px;} |
| 72 | .flag h3 .v{font-size:13px;color:var(--muted);font-weight:500;margin-left:8px;letter-spacing:0;} |
| 73 | .flag .grid{display:grid;grid-template-columns:1.25fr 1fr;gap:30px;padding:14px 32px 30px;} |
| 74 | .flag p{color:var(--soft);margin:0 0 16px;} |
| 75 | .flag .stats{display:grid;grid-template-columns:1fr 1fr;gap:12px;margin-top:6px;} |
| 76 | .stat{background:var(--bg);border:1px solid var(--line);border-radius:9px;padding:13px 15px;} |
| 77 | .stat .n{font-size:21px;font-weight:680;color:var(--ink);} |
| 78 | .stat .k{font-size:12px;color:var(--muted);margin-top:2px;} |
| 79 | .spec{background:var(--bg);border:1px solid var(--line);border-radius:10px;padding:18px 18px;} |
| 80 | .spec .sk{font-size:11px;letter-spacing:1.5px;text-transform:uppercase;color:var(--faint);margin-bottom:10px;} |
| 81 | .spec ul{margin:0;padding:0;list-style:none;font-size:13.5px;} |
| 82 | .spec li{padding:6px 0;border-top:1px solid var(--line);color:var(--soft);display:flex;justify-content:space-between;gap:14px;} |
| 83 | .spec li:first-child{border-top:none;} |
| 84 | .spec li span{color:var(--muted);} |
| 85 | .flag .foot{padding:0 32px 28px;display:flex;gap:18px;flex-wrap:wrap;font-size:14px;} |
| 86 | @media(max-width:720px){.flag .grid{grid-template-columns:1fr;}} |
| 87 | |
| 88 | /* lab cards */ |
| 89 | .cards{display:grid;grid-template-columns:1fr 1fr;gap:20px;} |
| 90 | @media(max-width:680px){.cards{grid-template-columns:1fr;}} |
| 91 | .card{border:1px solid var(--line);border-radius:12px;overflow:hidden;background:var(--panel); |
| 92 | display:flex;flex-direction:column;transition:border-color .15s,transform .15s;} |
| 93 | .card:hover{border-color:var(--accent-dim);transform:translateY(-2px);} |
| 94 | .card .thumb{height:172px;overflow:hidden;border-bottom:1px solid var(--line);background:#fff;} |
| 95 | .card .thumb img{width:100%;height:100%;object-fit:cover;object-position:top left;display:block;} |
| 96 | .card .body{padding:18px 20px 20px;display:flex;flex-direction:column;flex:1;} |
| 97 | .card h3{margin:0 0 9px;font-size:17px;} |
| 98 | .card p{margin:0 0 14px;font-size:14px;color:var(--soft);flex:1;} |
| 99 | .tags{display:flex;flex-wrap:wrap;gap:6px;margin-bottom:14px;} |
| 100 | .tags span{font-size:11.5px;color:var(--muted);background:var(--bg);border:1px solid var(--line); |
| 101 | border-radius:5px;padding:3px 8px;} |
| 102 | .card .lnk{font-size:13.5px;font-family:ui-monospace,Menlo,monospace;} |
| 103 | .card .lnk::after{content:" โ";} |
| 104 | |
| 105 | /* research */ |
| 106 | .rlede{color:var(--soft);max-width:680px;margin:-6px 0 26px;} |
| 107 | .research{display:flex;flex-direction:column;gap:0;border:1px solid var(--line);border-radius:12px;overflow:hidden;} |
| 108 | .ritem{display:grid;grid-template-columns:120px 1fr auto;gap:18px;align-items:center; |
| 109 | padding:18px 22px;border-top:1px solid var(--line);} |
| 110 | .ritem:first-child{border-top:none;} |
| 111 | .ritem:hover{background:var(--panel);} |
| 112 | .ritem .cls{font-size:11px;letter-spacing:.5px;text-transform:uppercase;color:var(--accent);} |
| 113 | .ritem h3{margin:0 0 3px;font-size:16px;} |
| 114 | .ritem p{margin:0;font-size:13.5px;color:var(--muted);} |
| 115 | .ritem .go{font-family:ui-monospace,Menlo,monospace;font-size:13px;white-space:nowrap;} |
| 116 | @media(max-width:680px){.ritem{grid-template-columns:1fr;gap:6px;}.ritem .go{margin-top:4px;}} |
| 117 | .progs{margin-top:22px;} |
| 118 | .progs .sk{font-size:11px;letter-spacing:1.5px;text-transform:uppercase;color:var(--faint);margin-bottom:11px;} |
| 119 | .progs .row{display:flex;flex-wrap:wrap;gap:7px;} |
| 120 | .progs .row span{font-size:12.5px;color:var(--soft);background:var(--panel);border:1px solid var(--line); |
| 121 | border-radius:6px;padding:4px 10px;} |
| 122 | |
| 123 | /* credentials */ |
| 124 | .cred{display:grid;grid-template-columns:1.1fr 1fr;gap:28px;} |
| 125 | @media(max-width:680px){.cred{grid-template-columns:1fr;}} |
| 126 | .cred p{color:var(--soft);margin:0 0 14px;} |
| 127 | .cred .role{font-size:14px;color:var(--muted);} |
| 128 | .cred .role b{color:var(--ink);font-weight:600;} |
| 129 | .certs{list-style:none;margin:0;padding:0;} |
| 130 | .certs li{padding:9px 0;border-top:1px solid var(--line);font-size:14px;color:var(--soft); |
| 131 | display:flex;gap:10px;align-items:baseline;} |
| 132 | .certs li:first-child{border-top:none;} |
| 133 | .certs li .c{color:var(--accent);font-family:ui-monospace,Menlo,monospace;font-size:12px;} |
| 134 | |
| 135 | footer{padding:46px 0 64px;} |
| 136 | footer .row{display:flex;flex-wrap:wrap;justify-content:space-between;gap:18px;align-items:center;} |
| 137 | footer .links a{color:var(--soft);margin-right:20px;font-size:14px;} |
| 138 | footer .note{color:var(--faint);font-size:12.5px;max-width:520px;} |
| 139 | |
| 140 | /* detail pages */ |
| 141 | .detail-hero{padding:40px 0 28px;} |
| 142 | .back{display:inline-block;font-size:13px;color:var(--muted);margin-bottom:22px;font-family:ui-monospace,Menlo,monospace;} |
| 143 | .back:hover{color:var(--ink);} |
| 144 | .kicker{font-size:12px;letter-spacing:2px;text-transform:uppercase;color:var(--accent);margin-bottom:13px;font-family:ui-monospace,Menlo,monospace;} |
| 145 | .detail-hero h1{font-size:clamp(28px,5vw,42px);margin:0 0 12px;letter-spacing:-.6px;} |
| 146 | .detail-hero .tagline{font-size:clamp(16px,2.2vw,19px);color:var(--soft);max-width:780px;margin:0 0 18px;} |
| 147 | .facts{display:grid;grid-template-columns:repeat(auto-fit,minmax(148px,1fr));gap:12px;margin-top:24px;} |
| 148 | figure{margin:0;} |
| 149 | .shot{border:1px solid var(--line2);border-radius:12px;overflow:hidden;background:#fff;margin:30px 0 6px;} |
| 150 | .shot img,.shot video{display:block;width:100%;height:auto;} |
| 151 | figcaption{font-size:13px;color:var(--muted);margin:11px 2px 0;} |
| 152 | .content{padding:6px 0 0;} |
| 153 | .content h2{font-size:13px;letter-spacing:2px;text-transform:uppercase;color:var(--muted);margin:44px 0 16px;font-weight:600;border-top:1px solid var(--line);padding-top:30px;} |
| 154 | .content h2.first{border-top:none;padding-top:6px;margin-top:18px;} |
| 155 | .content p{color:var(--soft);margin:0 0 16px;} |
| 156 | .content ul,.content ol{color:var(--soft);margin:0 0 16px;padding-left:22px;} |
| 157 | .content li{margin:6px 0;} |
| 158 | .content strong{color:var(--ink);font-weight:600;} |
| 159 | .content code{font-family:ui-monospace,Menlo,monospace;font-size:13px;background:var(--panel2);border:1px solid var(--line);border-radius:4px;padding:1px 5px;color:var(--soft);} |
| 160 | .content pre{background:var(--bg2);border:1px solid var(--line2);border-radius:10px;padding:15px 18px;overflow-x:auto;margin:0 0 18px;} |
| 161 | .content pre code{background:none;border:none;padding:0;font-size:12.5px;color:var(--soft);line-height:1.62;} |
| 162 | .content table{width:100%;border-collapse:collapse;margin:2px 0 20px;font-size:13.5px;} |
| 163 | .content th{text-align:left;color:var(--muted);font-weight:600;border-bottom:1px solid var(--line2);padding:9px 12px;font-size:11px;letter-spacing:.6px;text-transform:uppercase;} |
| 164 | .content td{color:var(--soft);border-bottom:1px solid var(--line);padding:9px 12px;vertical-align:top;} |
| 165 | .content td code{font-size:12px;} |
| 166 | .gallery{margin-top:8px;} |
| 167 | .repo-line{margin:42px 0 0;color:var(--faint);font-size:12.5px;font-family:ui-monospace,Menlo,monospace;} |
| 168 | </style> |
| 169 | <link rel="canonical" href="https://zionboggan.com/cti-detection-automation/"> |
| 170 | <meta name="author" content="Zion Boggan"> |
| 171 | <meta name="robots" content="index, follow, max-image-preview:large"> |
| 172 | <meta property="og:type" content="article"> |
| 173 | <meta property="og:site_name" content="Zion Boggan"> |
| 174 | <meta property="og:title" content="CTI Detection Automation | Zion Boggan"> |
| 175 | <meta property="og:description" content="Pulls indicators from five live threat-intel feeds, dedupes across them, extracts the MITRE techniques, generates Wazuh CDB lists and a tagged XML ruleset, then emails an analyst a signed, single-use review link before anything reaches the SIEM."> |
| 176 | <meta property="og:url" content="https://zionboggan.com/cti-detection-automation/"> |
| 177 | <meta property="og:image" content="https://zionboggan.com/assets/cti-detection-automation/01-approval-email.png"> |
| 178 | <meta name="twitter:card" content="summary_large_image"> |
| 179 | <meta name="twitter:title" content="CTI Detection Automation | Zion Boggan"> |
| 180 | <meta name="twitter:description" content="Pulls indicators from five live threat-intel feeds, dedupes across them, extracts the MITRE techniques, generates Wazuh CDB lists and a tagged XML ruleset, then emails an analyst a signed, single-use review link before anything reaches the SIEM."> |
| 181 | <meta name="twitter:image" content="https://zionboggan.com/assets/cti-detection-automation/01-approval-email.png"> |
| 182 | <script type="application/ld+json">{"@context":"https://schema.org","@type":"TechArticle","headline":"CTI Detection Automation","description":"Pulls indicators from five live threat-intel feeds, dedupes across them, extracts the MITRE techniques, generates Wazuh CDB lists and a tagged XML ruleset, then emails an analyst a signed, single-use review link before anything reaches the SIEM.","url":"https://zionboggan.com/cti-detection-automation/","image":"https://zionboggan.com/assets/cti-detection-automation/01-approval-email.png","author":{"@type":"Person","name":"Zion Boggan","url":"https://zionboggan.com"},"publisher":{"@type":"Person","name":"Zion Boggan"}}</script> |
| 183 | </head> |
| 184 | <body> |
| 185 | <nav><div class="wrap"> |
| 186 | <a class="brand mono" href="/" style="color:var(--ink)">zion_boggan<span class="dot">.</span></a> |
| 187 | <span class="links"> |
| 188 | <a href="/#oversight">Oversight</a> |
| 189 | <a href="/#labs">Labs</a> |
| 190 | <a href="/#research">Research</a> |
| 191 | <a href="/#background">Background</a> |
| 192 | <a href="/">Home</a> |
| 193 | </span> |
| 194 | </div></nav> |
| 195 | <header class="hero detail-hero"><div class="wrap"> |
| 196 | <a class="back" href="/#labs">← All work</a> |
| 197 | <div class="kicker">THREAT INTEL</div> |
| 198 | <h1>CTI Detection Automation</h1> |
| 199 | <p class="tagline">Pulls indicators from five live threat-intel feeds, dedupes across them, extracts the MITRE techniques, generates Wazuh CDB lists and a tagged XML ruleset, then emails an analyst a signed, single-use review link before anything reaches the SIEM.</p> |
| 200 | <div class="tags"><span>Python</span><span>ThreatFox / OTX / URLhaus</span><span>Wazuh CDB</span><span>ATT&CK</span><span>Flask</span><span>itsdangerous</span><span>SMTP</span></div> |
| 201 | <div class="facts"><div class="stat"><div class="n">5</div><div class="k">live feeds connected</div></div><div class="stat"><div class="n">30</div><div class="k">tests passing</div></div><div class="stat"><div class="n">5</div><div class="k">Wazuh CDB list types</div></div><div class="stat"><div class="n">27</div><div class="k">ATT&CK techniques in catalog</div></div><div class="stat"><div class="n">1</div><div class="k">signed human gate before deploy</div></div></div> |
| 202 | <div class="cta" style="margin-top:24px"></div> |
| 203 | </div></header> |
| 204 | <section><div class="wrap"> |
| 205 | <figure class="shot"><img loading="lazy" src="/assets/cti-detection-automation/01-approval-email.png" alt="The approval email the analyst receives: bundle ID, indicator counts by type, the diff against the last approved bundle, the top ATT&CK techniques, and the signed review link."></figure><figcaption>The approval email the analyst receives: bundle ID, indicator counts by type, the diff against the last approved bundle, the top ATT&CK techniques, and the signed review link.</figcaption> |
| 206 | <div class="content"> |
| 207 | <h2>The pipeline</h2> |
| 208 | <p>Each run flows through a fixed set of stages, every one of which is unit-tested in isolation. <code>build_bundle()</code> is the spine:</p><pre><code>def build_bundle(config: dict) -> RuleBundle: |
| 209 | raw = collect_indicators(config) |
| 210 | merged = deduplicate(raw) |
| 211 | kept = filter_by_confidence(merged, config["min_confidence"]) |
| 212 | techniques = extract_techniques(kept) |
| 213 | cdb_lists = rules.build_cdb_lists(kept) |
| 214 | rules_xml = rules.build_rules_xml(kept, base_id=config["rules"]["base_id"]) |
| 215 | bundle_id = datetime.now(timezone.utc).strftime("cti-%Y%m%d-%H%M%S") |
| 216 | return RuleBundle( |
| 217 | bundle_id=bundle_id, |
| 218 | generated_at=RuleBundle.now_iso(), |
| 219 | indicators=kept, |
| 220 | techniques=techniques, |
| 221 | cdb_lists=cdb_lists, |
| 222 | rules_xml=rules_xml, |
| 223 | )</code></pre><p>A real run over the bundled fixtures produces a 19-indicator bundle: 5 IPs, 6 domains, 6 URLs, and 2 hashes, mapping to 15 distinct ATT&CK techniques across the five sources.</p> |
| 224 | <h2>Feed connectors</h2> |
| 225 | <p>Five public sources, each a connector subclassing one abstract <code>Feed</code> with a pure <code>parse()</code> split cleanly from its network <code>fetch_raw()</code>, so parsing is tested against fixtures while the HTTP path stays thin:</p><table><thead><tr><th>Feed</th><th>Provides</th><th>Auth</th></tr></thead><tbody><tr><td>ThreatFox</td><td>IPs, domains, URLs, hashes, malware family</td><td>free key (POST)</td></tr><tr><td>Feodo Tracker</td><td>botnet C2 IPs</td><td>keyless</td></tr><tr><td>URLhaus</td><td>malware-distribution URLs + host domains</td><td>keyless</td></tr><tr><td>AlienVault OTX</td><td>pulse indicators carrying ATT&CK IDs</td><td>free key</td></tr><tr><td>OpenPhish</td><td>phishing URLs + host domains</td><td>keyless</td></tr></tbody></table><p>The ThreatFox connector is representative: it normalizes the API's IOC types, strips ports off <code>ip:port</code> values, and derives techniques from both the malware family and the threat type.</p><pre><code>def parse(self, raw: str) -> list[Indicator]: |
| 226 | payload = load_json(raw) |
| 227 | if payload.get("query_status") != "ok": |
| 228 | return [] |
| 229 | indicators: list[Indicator] = [] |
| 230 | for entry in payload.get("data", []): |
| 231 | ioc_type = TYPE_MAP.get(entry.get("ioc_type")) |
| 232 | if ioc_type is None: |
| 233 | continue |
| 234 | value = entry.get("ioc", "") |
| 235 | if ioc_type == "ip" and ":" in value: |
| 236 | value = value.split(":", 1)[0] |
| 237 | malware = entry.get("malware_printable") or entry.get("malware") |
| 238 | threat_type = entry.get("threat_type", "unknown") |
| 239 | techniques = mitre.techniques_for_malware(malware) |
| 240 | techniques += mitre.techniques_for_threat_type(threat_type)</code></pre><p>URLhaus and OpenPhish additionally split each URL's host into a separate <code>domain</code> indicator, which is where the cross-feed dedup earns its keep.</p> |
| 241 | <h2>Dedup and normalization</h2> |
| 242 | <p>Every connector emits the same <code>Indicator</code> dataclass, so downstream code never special-cases a source. Deduplication keys on <code>(type, value.lower())</code>, the same IP seen in ThreatFox, Feodo, and OTX collapses to one indicator carrying the union of all three sources, the maximum confidence, and the union of techniques and tags:</p><pre><code>def deduplicate(indicators: list[Indicator]) -> list[Indicator]: |
| 243 | merged: dict[tuple[str, str], Indicator] = {} |
| 244 | for indicator in indicators: |
| 245 | key = indicator.key() |
| 246 | existing = merged.get(key) |
| 247 | if existing is None: |
| 248 | merged[key] = Indicator(...) |
| 249 | continue |
| 250 | existing.confidence = max(existing.confidence, indicator.confidence) |
| 251 | existing.techniques = sorted(set(existing.techniques) | set(indicator.techniques)) |
| 252 | existing.tags = sorted(set(existing.tags) | set(indicator.tags)) |
| 253 | existing.malware = existing.malware or indicator.malware |
| 254 | existing.reference = existing.reference or indicator.reference |
| 255 | existing.first_seen = existing.first_seen or indicator.first_seen |
| 256 | if indicator.source not in existing.source.split(","): |
| 257 | existing.source = ",".join(sorted(set(existing.source.split(",") + [indicator.source]))) |
| 258 | return list(merged.values())</code></pre><p>A merge test pins the behavior exactly:</p><pre><code>def test_merges_same_indicator_across_sources(): |
| 259 | merged = deduplicate([ |
| 260 | make("45.137.21.9", "threatfox", 100, ["T1071.001"], "Cobalt Strike"), |
| 261 | make("45.137.21.9", "feodo", 90, ["T1573"]), |
| 262 | make("45.137.21.9", "otx", 60, ["T1059.001"]), |
| 263 | ]) |
| 264 | assert len(merged) == 1 |
| 265 | indicator = merged[0] |
| 266 | assert indicator.confidence == 100 |
| 267 | assert set(indicator.techniques) == {"T1071.001", "T1573", "T1059.001"} |
| 268 | assert set(indicator.source.split(",")) == {"threatfox", "feodo", "otx"}</code></pre><p>After dedup, <code>filter_by_confidence()</code> drops anything under the configured threshold before any techniques or rules are derived.</p> |
| 269 | <h2>MITRE TTP extraction</h2> |
| 270 | <p>Indicators arrive with techniques attached three ways: OTX pulses carry ATT&CK IDs directly; feeds without IDs have them inferred from the malware family and from the threat type. The mapping tables are hand-built, 27 techniques in the catalog, keyed to malware families and threat types:</p><pre><code>MALWARE_TECHNIQUES = { |
| 271 | "cobaltstrike": ["T1071.001", "T1059.001", "T1055"], |
| 272 | "agenttesla": ["T1056.001", "T1555", "T1041"], |
| 273 | "emotet": ["T1566.001", "T1071.001", "T1105"], |
| 274 | "qakbot": ["T1566.001", "T1055", "T1071.001"], |
| 275 | ... |
| 276 | } |
| 277 | |
| 278 | THREAT_TYPE_TECHNIQUES = { |
| 279 | "phishing": ["T1566.002", "T1204.001"], |
| 280 | "botnet_cc": ["T1071.001", "T1573"], |
| 281 | "malware_download": ["T1105", "T1204.002"], |
| 282 | "ransomware": ["T1486", "T1071.001"], |
| 283 | "leaked_credentials": ["T1589.001"], |
| 284 | }</code></pre><p>Family lookup is fuzzy, it lowercases the family and strips spaces, hyphens and underscores, so <code>Cobalt Strike</code>, <code>cobalt-strike</code> and <code>CobaltStrike</code> all hit the same row:</p><pre><code>def techniques_for_malware(malware: str | None) -> list[str]: |
| 285 | if not malware: |
| 286 | return [] |
| 287 | key = malware.lower().replace(" ", "").replace("-", "").replace("_", "") |
| 288 | for name, techniques in MALWARE_TECHNIQUES.items(): |
| 289 | if name in key: |
| 290 | return list(techniques) |
| 291 | return []</code></pre><p>Once collected, <code>extract_techniques()</code> rolls every indicator's techniques into a per-technique record with its indicator count and contributing sources, sorted by prevalence, and renders a coverage table. From the live fixture run, the top of the report reads:</p><pre><code>| Technique | Tactic | Name | Indicators | Sources | |
| 292 | |---|---|---|---|---| |
| 293 | | T1204.001 | execution | User Execution: Malicious Link | 10 | openphish, threatfox, urlhaus | |
| 294 | | T1105 | command-and-control | Ingress Tool Transfer | 9 | feodo, openphish, otx, threatfox, urlhaus | |
| 295 | | T1566.002 | initial-access | Phishing: Spearphishing Link | 8 | openphish, urlhaus | |
| 296 | | T1071.001 | command-and-control | Application Layer Protocol: Web Protocols | 6 | feodo, otx, threatfox |</code></pre> |
| 297 | <h2>Generated Wazuh rules</h2> |
| 298 | <p>Indicators are written as five Wazuh CDB lists, one per indicator class, each keyed on the value with a malware-family or threat-type label, sorted and deduplicated. A real <code>cti-malicious-ip</code> list from the active bundle:</p><pre><code>185.220.101.45:Dridex |
| 299 | 193.149.176.12:AsyncRAT |
| 300 | 194.36.191.55:QakBot |
| 301 | 45.137.21.9:Cobalt Strike |
| 302 | 91.211.88.34:Emotet</code></pre><p>Labels are sanitized, colons and newlines stripped, clamped to 48 chars, so a value like <code>weird:type</code> can never break the <code>value:label</code> CDB format. Alongside the lists, <code>build_rules_xml()</code> emits list-lookup rules: outbound and inbound IP matches, a DNS-query match, a URL match, a hash-execution match, and a leaked-credential rule, each tagged with the four dominant ATT&CK techniques for that bucket. This is the verbatim outbound-IP and DNS rule from the generated <code>local_cti_rules.xml</code>:</p><pre><code><group name="cti,threat-intel,auto-generated,"> |
| 303 | <rule id="100300" level="12"> |
| 304 | <field name="dstip" type="pcre2">\S+</field> |
| 305 | <list field="dstip" lookup="address_match_key">etc/lists/cti-malicious-ip</list> |
| 306 | <description>Outbound connection to CTI-flagged IP: $(dstip)</description> |
| 307 | <mitre> |
| 308 | <id>T1071.001</id> |
| 309 | <id>T1573</id> |
| 310 | <id>T1566.001</id> |
| 311 | <id>T1059.001</id> |
| 312 | </mitre> |
| 313 | </rule> |
| 314 | <rule id="100302" level="12"> |
| 315 | <field name="win.eventdata.queryName" type="pcre2">\S+</field> |
| 316 | <list field="win.eventdata.queryName" lookup="match_key">etc/lists/cti-malicious-domain</list> |
| 317 | <description>DNS query for CTI-flagged domain: $(win.eventdata.queryName)</description> |
| 318 | <mitre> |
| 319 | <id>T1204.001</id> |
| 320 | <id>T1566.002</id> |
| 321 | </mitre> |
| 322 | </rule> |
| 323 | </group></code></pre><p>The lookups reference <code>etc/lists/</code>, and the hash rule fires at level 13, so the output drops directly into a Wazuh manager with no hand-editing. A test parses the generated XML with <code>ElementTree</code> to prove it is well-formed and that the <code><mitre></code> tags survived.</p> |
| 324 | <h2>The signed approval gate</h2> |
| 325 | <p>The pipeline never deploys on its own. The review link carries an <code>itsdangerous</code> URL-safe timed token, signed with a server secret under a fixed salt, so it cannot be forged and stops working once the TTL elapses:</p><pre><code>def serializer(secret: str) -> URLSafeTimedSerializer: |
| 326 | return URLSafeTimedSerializer(secret, salt="cti-rule-approval") |
| 327 | |
| 328 | def make_token(secret: str, bundle_id: str) -> str: |
| 329 | return serializer(secret).dumps({"bundle_id": bundle_id}) |
| 330 | |
| 331 | def verify_token(secret: str, token: str, max_age: int) -> str | None: |
| 332 | try: |
| 333 | data = serializer(secret).loads(token, max_age=max_age) |
| 334 | except (BadSignature, SignatureExpired): |
| 335 | return None |
| 336 | return data.get("bundle_id")</code></pre><p>A small Flask console serves the gate. Every state-changing route verifies the token first and <code>abort(403)</code>s on a bad or expired one; approval promotes the candidate to active and, if a Wazuh path is set, deploys to the manager:</p><pre><code>@app.post("/approve/<token>") |
| 337 | def approve(token): |
| 338 | bundle_id = approval.verify_token(secret, token, ttl) |
| 339 | if not bundle_id: |
| 340 | abort(403) |
| 341 | active_dir = config.get("wazuh_etc_dir") |
| 342 | result = pipeline.promote( |
| 343 | bundle_id, output_dir, Path(active_dir) if active_dir else None |
| 344 | ) |
| 345 | return render_template("result.html", action="approved", result=result)</code></pre><p>Reject writes a <code>REJECTED</code> marker carrying the analyst's reason; the dashboard then lists every candidate bundle as pending, approved, or rejected. Promotion records the approved key set in <code>state.json</code>, which is exactly what the next run diffs against.</p> |
| 346 | <h2>Tests</h2> |
| 347 | <p>30 tests across 7 files cover every stage end-to-end: feed parsing against fixtures, cross-source dedup, confidence filtering, TTP extraction, CDB and XML generation, token signing and expiry, email rendering, and the full Flask approve/reject flow. The web tests run the real pipeline and drive the routes with Flask's test client:</p><pre><code>def test_review_requires_valid_token(app_and_bundle): |
| 348 | app, _ = app_and_bundle |
| 349 | client = app.test_client() |
| 350 | assert client.get("/review/garbage").status_code == 403 |
| 351 | |
| 352 | |
| 353 | def test_approve_promotes(app_and_bundle): |
| 354 | app, result = app_and_bundle |
| 355 | token = make_token("test-secret", result["bundle_id"]) |
| 356 | client = app.test_client() |
| 357 | resp = client.post(f"/approve/{token}") |
| 358 | assert resp.status_code == 200 |
| 359 | assert b"approved" in resp.data |
| 360 | follow = client.get(f"/review/{token}") |
| 361 | assert b"already been approved" in follow.data</code></pre><p>Token security is pinned directly, a token signed with one secret will not verify under another, and a zero-max-age token is rejected after a one-second sleep:</p><pre><code>def test_token_rejects_wrong_secret(): |
| 362 | token = make_token("secret", "bundle") |
| 363 | assert verify_token("other", token, 60) is None |
| 364 | |
| 365 | |
| 366 | def test_token_expires(): |
| 367 | token = make_token("secret", "bundle") |
| 368 | time.sleep(1) |
| 369 | assert verify_token("secret", token, 0) is None</code></pre> |
| 370 | </div> |
| 371 | <div class="gallery"><figure class="shot"><img loading="lazy" src="/assets/cti-detection-automation/02-review-page.png" alt="The single-use signed review page: the candidate diff, indicators broken out by type, the five generated CDB lists, the ATT&CK coverage table, and the approve-and-deploy and reject actions."></figure><figcaption>The single-use signed review page: the candidate diff, indicators broken out by type, the five generated CDB lists, the ATT&CK coverage table, and the approve-and-deploy and reject actions.</figcaption><figure class="shot"><img loading="lazy" src="/assets/cti-detection-automation/03-dashboard.png" alt="The bundle dashboard listing every candidate with its indicator and technique counts and its pending, approved, or rejected status."></figure><figcaption>The bundle dashboard listing every candidate with its indicator and technique counts and its pending, approved, or rejected status.</figcaption><figure class="shot"><video controls preload="metadata" src="/assets/cti-detection-automation/cti-approval-walkthrough.mp4"></video></figure><figcaption>Full walkthrough: a pipeline run, the approval email, the signed review page, and promotion to the active bundle (video).</figcaption></div> |
| 372 | <p class="repo-line">Repository · github.com/zionboggan/cti-detection-automation</p> |
| 373 | </div></section> |
| 374 | <footer><div class="wrap row"> |
| 375 | <div class="links"> |
| 376 | <a href="/">Portfolio</a> |
| 377 | <a href="https://www.linkedin.com/in/zion-boggan">LinkedIn</a> |
| 378 | <a href="https://oversightprotocol.dev/">Oversight</a> |
| 379 | <a href="mailto:zionboggan0@gmail.com">Email</a> |
| 380 | </div> |
| 381 | <div class="note">Built and deployed on a self-hosted Proxmox homelab. This page mirrors the |
| 382 | project's documentation and results so the work is fully viewable here.</div> |
| 383 | </div></footer> |
| 384 | </body> |
| 385 | </html> |