cti-detection-automation/index.html

385 lines · html
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>CTI Detection Automation | Zion Boggan</title>
<meta name="description" content="Pulls indicators from five live threat-intel feeds, dedupes across them, extracts the MITRE techniques, generates Wazuh CDB lists and a tagged XML ruleset, then emails an analyst a signed, single-use review link before anything reaches the SIEM.">
<link rel="icon" href="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 32 32'%3E%3Crect width='32' height='32' rx='6' fill='%230c0e12'/%3E%3Ctext x='16' y='22' font-family='monospace' font-size='15' fill='%236cc7b8' text-anchor='middle'%3Ezb%3C/text%3E%3C/svg%3E">
<style>
  :root{
    --bg:#0c0e12; --bg2:#0f1217; --panel:#14181f; --panel2:#171c24;
    --line:#222936; --line2:#2c3543;
    --ink:#e8eaed; --soft:#c3cad4; --muted:#8a94a3; --faint:#5d6675;
    --accent:#6cc7b8; --accent-dim:#274b47;
    --maxw:1020px;
  }
  *{box-sizing:border-box;}
  html{scroll-behavior:smooth;}
  body{margin:0;background:var(--bg);color:var(--ink);
    font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,Helvetica,Arial,sans-serif;
    font-size:16px;line-height:1.65;-webkit-font-smoothing:antialiased;}
  .mono{font-family:ui-monospace,SFMono-Regular,"SF Mono",Menlo,Consolas,monospace;}
  a{color:var(--accent);text-decoration:none;}
  a:hover{color:#8fe0d2;}
  .wrap{max-width:var(--maxw);margin:0 auto;padding:0 24px;}
 
  /* nav */
  nav{position:sticky;top:0;z-index:20;background:rgba(12,14,18,.82);
    backdrop-filter:blur(10px);border-bottom:1px solid var(--line);}
  nav .wrap{display:flex;align-items:center;justify-content:space-between;height:58px;}
  nav .brand{font-weight:600;letter-spacing:.2px;}
  nav .brand .dot{color:var(--accent);}
  nav .links{display:flex;gap:26px;font-size:13.5px;}
  nav .links a{color:var(--muted);}
  nav .links a:hover{color:var(--ink);}
  @media(max-width:680px){nav .links{display:none;}}
 
  /* hero */
  header.hero{padding:74px 0 54px;border-bottom:1px solid var(--line);
    background:radial-gradient(900px 380px at 78% -10%, #11201e 0%, transparent 60%);}
  .avail{font-size:12.5px;letter-spacing:1.5px;text-transform:uppercase;color:var(--accent);
    display:flex;align-items:center;gap:9px;margin-bottom:20px;}
  .avail .pulse{width:7px;height:7px;border-radius:50%;background:var(--accent);
    box-shadow:0 0 0 0 rgba(108,199,184,.5);animation:p 2.4s infinite;}
  @keyframes p{0%{box-shadow:0 0 0 0 rgba(108,199,184,.45)}70%{box-shadow:0 0 0 8px rgba(108,199,184,0)}100%{box-shadow:0 0 0 0 rgba(108,199,184,0)}}
  h1{font-size:clamp(34px,6vw,52px);line-height:1.05;margin:0 0 8px;letter-spacing:-1px;font-weight:680;}
  .hero .sub{font-size:clamp(16px,2.4vw,20px);color:var(--soft);margin:0 0 24px;font-weight:500;}
  .hero .lede{max-width:660px;color:var(--soft);font-size:17px;margin:0 0 28px;}
  .hero .lede b{color:var(--ink);font-weight:600;}
  .cta{display:flex;flex-wrap:wrap;gap:12px;align-items:center;}
  .btn{display:inline-flex;align-items:center;gap:8px;padding:10px 18px;border-radius:8px;
    font-size:14.5px;font-weight:550;border:1px solid var(--line2);color:var(--ink);background:var(--panel);}
  .btn:hover{border-color:var(--accent-dim);background:var(--panel2);color:var(--ink);}
  .btn.primary{background:var(--accent);color:#06231f;border-color:var(--accent);font-weight:650;}
  .btn.primary:hover{background:#8fe0d2;color:#06231f;}
  .meta{margin-top:26px;display:flex;flex-wrap:wrap;gap:8px 22px;font-size:13px;color:var(--muted);}
  .meta .mono{color:var(--faint);}
 
  /* sections */
  section{padding:64px 0;border-bottom:1px solid var(--line);}
  .shead{display:flex;align-items:baseline;gap:14px;margin-bottom:30px;}
  .shead .idx{font-size:13px;color:var(--accent);letter-spacing:1px;}
  .shead h2{font-size:14px;letter-spacing:2px;text-transform:uppercase;color:var(--muted);margin:0;font-weight:600;}
  .shead .rule{flex:1;height:1px;background:var(--line);}
 
  /* flagship */
  .flag{background:linear-gradient(180deg,var(--panel) 0%,var(--bg2) 100%);
    border:1px solid var(--line2);border-radius:14px;overflow:hidden;}
  .flag .top{padding:30px 32px 8px;}
  .flag .tag{font-size:12px;letter-spacing:1.5px;text-transform:uppercase;color:var(--accent);margin-bottom:12px;}
  .flag h3{font-size:27px;margin:0 0 6px;letter-spacing:-.4px;}
  .flag h3 .v{font-size:13px;color:var(--muted);font-weight:500;margin-left:8px;letter-spacing:0;}
  .flag .grid{display:grid;grid-template-columns:1.25fr 1fr;gap:30px;padding:14px 32px 30px;}
  .flag p{color:var(--soft);margin:0 0 16px;}
  .flag .stats{display:grid;grid-template-columns:1fr 1fr;gap:12px;margin-top:6px;}
  .stat{background:var(--bg);border:1px solid var(--line);border-radius:9px;padding:13px 15px;}
  .stat .n{font-size:21px;font-weight:680;color:var(--ink);}
  .stat .k{font-size:12px;color:var(--muted);margin-top:2px;}
  .spec{background:var(--bg);border:1px solid var(--line);border-radius:10px;padding:18px 18px;}
  .spec .sk{font-size:11px;letter-spacing:1.5px;text-transform:uppercase;color:var(--faint);margin-bottom:10px;}
  .spec ul{margin:0;padding:0;list-style:none;font-size:13.5px;}
  .spec li{padding:6px 0;border-top:1px solid var(--line);color:var(--soft);display:flex;justify-content:space-between;gap:14px;}
  .spec li:first-child{border-top:none;}
  .spec li span{color:var(--muted);}
  .flag .foot{padding:0 32px 28px;display:flex;gap:18px;flex-wrap:wrap;font-size:14px;}
  @media(max-width:720px){.flag .grid{grid-template-columns:1fr;}}
 
  /* lab cards */
  .cards{display:grid;grid-template-columns:1fr 1fr;gap:20px;}
  @media(max-width:680px){.cards{grid-template-columns:1fr;}}
  .card{border:1px solid var(--line);border-radius:12px;overflow:hidden;background:var(--panel);
    display:flex;flex-direction:column;transition:border-color .15s,transform .15s;}
  .card:hover{border-color:var(--accent-dim);transform:translateY(-2px);}
  .card .thumb{height:172px;overflow:hidden;border-bottom:1px solid var(--line);background:#fff;}
  .card .thumb img{width:100%;height:100%;object-fit:cover;object-position:top left;display:block;}
  .card .body{padding:18px 20px 20px;display:flex;flex-direction:column;flex:1;}
  .card h3{margin:0 0 9px;font-size:17px;}
  .card p{margin:0 0 14px;font-size:14px;color:var(--soft);flex:1;}
  .tags{display:flex;flex-wrap:wrap;gap:6px;margin-bottom:14px;}
  .tags span{font-size:11.5px;color:var(--muted);background:var(--bg);border:1px solid var(--line);
    border-radius:5px;padding:3px 8px;}
  .card .lnk{font-size:13.5px;font-family:ui-monospace,Menlo,monospace;}
  .card .lnk::after{content:" →";}
 
  /* research */
  .rlede{color:var(--soft);max-width:680px;margin:-6px 0 26px;}
  .research{display:flex;flex-direction:column;gap:0;border:1px solid var(--line);border-radius:12px;overflow:hidden;}
  .ritem{display:grid;grid-template-columns:120px 1fr auto;gap:18px;align-items:center;
    padding:18px 22px;border-top:1px solid var(--line);}
  .ritem:first-child{border-top:none;}
  .ritem:hover{background:var(--panel);}
  .ritem .cls{font-size:11px;letter-spacing:.5px;text-transform:uppercase;color:var(--accent);}
  .ritem h3{margin:0 0 3px;font-size:16px;}
  .ritem p{margin:0;font-size:13.5px;color:var(--muted);}
  .ritem .go{font-family:ui-monospace,Menlo,monospace;font-size:13px;white-space:nowrap;}
  @media(max-width:680px){.ritem{grid-template-columns:1fr;gap:6px;}.ritem .go{margin-top:4px;}}
  .progs{margin-top:22px;}
  .progs .sk{font-size:11px;letter-spacing:1.5px;text-transform:uppercase;color:var(--faint);margin-bottom:11px;}
  .progs .row{display:flex;flex-wrap:wrap;gap:7px;}
  .progs .row span{font-size:12.5px;color:var(--soft);background:var(--panel);border:1px solid var(--line);
    border-radius:6px;padding:4px 10px;}
 
  /* credentials */
  .cred{display:grid;grid-template-columns:1.1fr 1fr;gap:28px;}
  @media(max-width:680px){.cred{grid-template-columns:1fr;}}
  .cred p{color:var(--soft);margin:0 0 14px;}
  .cred .role{font-size:14px;color:var(--muted);}
  .cred .role b{color:var(--ink);font-weight:600;}
  .certs{list-style:none;margin:0;padding:0;}
  .certs li{padding:9px 0;border-top:1px solid var(--line);font-size:14px;color:var(--soft);
    display:flex;gap:10px;align-items:baseline;}
  .certs li:first-child{border-top:none;}
  .certs li .c{color:var(--accent);font-family:ui-monospace,Menlo,monospace;font-size:12px;}
 
  footer{padding:46px 0 64px;}
  footer .row{display:flex;flex-wrap:wrap;justify-content:space-between;gap:18px;align-items:center;}
  footer .links a{color:var(--soft);margin-right:20px;font-size:14px;}
  footer .note{color:var(--faint);font-size:12.5px;max-width:520px;}
 
  /* detail pages */
  .detail-hero{padding:40px 0 28px;}
  .back{display:inline-block;font-size:13px;color:var(--muted);margin-bottom:22px;font-family:ui-monospace,Menlo,monospace;}
  .back:hover{color:var(--ink);}
  .kicker{font-size:12px;letter-spacing:2px;text-transform:uppercase;color:var(--accent);margin-bottom:13px;font-family:ui-monospace,Menlo,monospace;}
  .detail-hero h1{font-size:clamp(28px,5vw,42px);margin:0 0 12px;letter-spacing:-.6px;}
  .detail-hero .tagline{font-size:clamp(16px,2.2vw,19px);color:var(--soft);max-width:780px;margin:0 0 18px;}
  .facts{display:grid;grid-template-columns:repeat(auto-fit,minmax(148px,1fr));gap:12px;margin-top:24px;}
  figure{margin:0;}
  .shot{border:1px solid var(--line2);border-radius:12px;overflow:hidden;background:#fff;margin:30px 0 6px;}
  .shot img,.shot video{display:block;width:100%;height:auto;}
  figcaption{font-size:13px;color:var(--muted);margin:11px 2px 0;}
  .content{padding:6px 0 0;}
  .content h2{font-size:13px;letter-spacing:2px;text-transform:uppercase;color:var(--muted);margin:44px 0 16px;font-weight:600;border-top:1px solid var(--line);padding-top:30px;}
  .content h2.first{border-top:none;padding-top:6px;margin-top:18px;}
  .content p{color:var(--soft);margin:0 0 16px;}
  .content ul,.content ol{color:var(--soft);margin:0 0 16px;padding-left:22px;}
  .content li{margin:6px 0;}
  .content strong{color:var(--ink);font-weight:600;}
  .content code{font-family:ui-monospace,Menlo,monospace;font-size:13px;background:var(--panel2);border:1px solid var(--line);border-radius:4px;padding:1px 5px;color:var(--soft);}
  .content pre{background:var(--bg2);border:1px solid var(--line2);border-radius:10px;padding:15px 18px;overflow-x:auto;margin:0 0 18px;}
  .content pre code{background:none;border:none;padding:0;font-size:12.5px;color:var(--soft);line-height:1.62;}
  .content table{width:100%;border-collapse:collapse;margin:2px 0 20px;font-size:13.5px;}
  .content th{text-align:left;color:var(--muted);font-weight:600;border-bottom:1px solid var(--line2);padding:9px 12px;font-size:11px;letter-spacing:.6px;text-transform:uppercase;}
  .content td{color:var(--soft);border-bottom:1px solid var(--line);padding:9px 12px;vertical-align:top;}
  .content td code{font-size:12px;}
  .gallery{margin-top:8px;}
  .repo-line{margin:42px 0 0;color:var(--faint);font-size:12.5px;font-family:ui-monospace,Menlo,monospace;}
</style>
<link rel="canonical" href="https://zionboggan.com/cti-detection-automation/">
<meta name="author" content="Zion Boggan">
<meta name="robots" content="index, follow, max-image-preview:large">
<meta property="og:type" content="article">
<meta property="og:site_name" content="Zion Boggan">
<meta property="og:title" content="CTI Detection Automation | Zion Boggan">
<meta property="og:description" content="Pulls indicators from five live threat-intel feeds, dedupes across them, extracts the MITRE techniques, generates Wazuh CDB lists and a tagged XML ruleset, then emails an analyst a signed, single-use review link before anything reaches the SIEM.">
<meta property="og:url" content="https://zionboggan.com/cti-detection-automation/">
<meta property="og:image" content="https://zionboggan.com/assets/cti-detection-automation/01-approval-email.png">
<meta name="twitter:card" content="summary_large_image">
<meta name="twitter:title" content="CTI Detection Automation | Zion Boggan">
<meta name="twitter:description" content="Pulls indicators from five live threat-intel feeds, dedupes across them, extracts the MITRE techniques, generates Wazuh CDB lists and a tagged XML ruleset, then emails an analyst a signed, single-use review link before anything reaches the SIEM.">
<meta name="twitter:image" content="https://zionboggan.com/assets/cti-detection-automation/01-approval-email.png">
<script type="application/ld+json">{"@context":"https://schema.org","@type":"TechArticle","headline":"CTI Detection Automation","description":"Pulls indicators from five live threat-intel feeds, dedupes across them, extracts the MITRE techniques, generates Wazuh CDB lists and a tagged XML ruleset, then emails an analyst a signed, single-use review link before anything reaches the SIEM.","url":"https://zionboggan.com/cti-detection-automation/","image":"https://zionboggan.com/assets/cti-detection-automation/01-approval-email.png","author":{"@type":"Person","name":"Zion Boggan","url":"https://zionboggan.com"},"publisher":{"@type":"Person","name":"Zion Boggan"}}</script>
</head>
<body>
<nav><div class="wrap">
  <a class="brand mono" href="/" style="color:var(--ink)">zion_boggan<span class="dot">.</span></a>
  <span class="links">
    <a href="/#oversight">Oversight</a>
    <a href="/#labs">Labs</a>
    <a href="/#research">Research</a>
    <a href="/#background">Background</a>
    <a href="/">Home</a>
  </span>
</div></nav>
<header class="hero detail-hero"><div class="wrap">
  <a class="back" href="/#labs">&larr; All work</a>
  <div class="kicker">THREAT INTEL</div>
  <h1>CTI Detection Automation</h1>
  <p class="tagline">Pulls indicators from five live threat-intel feeds, dedupes across them, extracts the MITRE techniques, generates Wazuh CDB lists and a tagged XML ruleset, then emails an analyst a signed, single-use review link before anything reaches the SIEM.</p>
  <div class="tags"><span>Python</span><span>ThreatFox / OTX / URLhaus</span><span>Wazuh CDB</span><span>ATT&amp;CK</span><span>Flask</span><span>itsdangerous</span><span>SMTP</span></div>
  <div class="facts"><div class="stat"><div class="n">5</div><div class="k">live feeds connected</div></div><div class="stat"><div class="n">30</div><div class="k">tests passing</div></div><div class="stat"><div class="n">5</div><div class="k">Wazuh CDB list types</div></div><div class="stat"><div class="n">27</div><div class="k">ATT&amp;CK techniques in catalog</div></div><div class="stat"><div class="n">1</div><div class="k">signed human gate before deploy</div></div></div>
  <div class="cta" style="margin-top:24px"></div>
</div></header>
<section><div class="wrap">
  <figure class="shot"><img loading="lazy" src="/assets/cti-detection-automation/01-approval-email.png" alt="The approval email the analyst receives: bundle ID, indicator counts by type, the diff against the last approved bundle, the top ATT&amp;CK techniques, and the signed review link."></figure><figcaption>The approval email the analyst receives: bundle ID, indicator counts by type, the diff against the last approved bundle, the top ATT&amp;CK techniques, and the signed review link.</figcaption>
  <div class="content">
  <h2>The pipeline</h2>
<p>Each run flows through a fixed set of stages, every one of which is unit-tested in isolation. <code>build_bundle()</code> is the spine:</p><pre><code>def build_bundle(config: dict) -> RuleBundle:
    raw = collect_indicators(config)
    merged = deduplicate(raw)
    kept = filter_by_confidence(merged, config["min_confidence"])
    techniques = extract_techniques(kept)
    cdb_lists = rules.build_cdb_lists(kept)
    rules_xml = rules.build_rules_xml(kept, base_id=config["rules"]["base_id"])
    bundle_id = datetime.now(timezone.utc).strftime("cti-%Y%m%d-%H%M%S")
    return RuleBundle(
        bundle_id=bundle_id,
        generated_at=RuleBundle.now_iso(),
        indicators=kept,
        techniques=techniques,
        cdb_lists=cdb_lists,
        rules_xml=rules_xml,
    )</code></pre><p>A real run over the bundled fixtures produces a 19-indicator bundle: 5 IPs, 6 domains, 6 URLs, and 2 hashes, mapping to 15 distinct ATT&amp;CK techniques across the five sources.</p>
<h2>Feed connectors</h2>
<p>Five public sources, each a connector subclassing one abstract <code>Feed</code> with a pure <code>parse()</code> split cleanly from its network <code>fetch_raw()</code>, so parsing is tested against fixtures while the HTTP path stays thin:</p><table><thead><tr><th>Feed</th><th>Provides</th><th>Auth</th></tr></thead><tbody><tr><td>ThreatFox</td><td>IPs, domains, URLs, hashes, malware family</td><td>free key (POST)</td></tr><tr><td>Feodo Tracker</td><td>botnet C2 IPs</td><td>keyless</td></tr><tr><td>URLhaus</td><td>malware-distribution URLs + host domains</td><td>keyless</td></tr><tr><td>AlienVault OTX</td><td>pulse indicators carrying ATT&amp;CK IDs</td><td>free key</td></tr><tr><td>OpenPhish</td><td>phishing URLs + host domains</td><td>keyless</td></tr></tbody></table><p>The ThreatFox connector is representative: it normalizes the API's IOC types, strips ports off <code>ip:port</code> values, and derives techniques from both the malware family and the threat type.</p><pre><code>def parse(self, raw: str) -> list[Indicator]:
    payload = load_json(raw)
    if payload.get("query_status") != "ok":
        return []
    indicators: list[Indicator] = []
    for entry in payload.get("data", []):
        ioc_type = TYPE_MAP.get(entry.get("ioc_type"))
        if ioc_type is None:
            continue
        value = entry.get("ioc", "")
        if ioc_type == "ip" and ":" in value:
            value = value.split(":", 1)[0]
        malware = entry.get("malware_printable") or entry.get("malware")
        threat_type = entry.get("threat_type", "unknown")
        techniques = mitre.techniques_for_malware(malware)
        techniques += mitre.techniques_for_threat_type(threat_type)</code></pre><p>URLhaus and OpenPhish additionally split each URL's host into a separate <code>domain</code> indicator, which is where the cross-feed dedup earns its keep.</p>
<h2>Dedup and normalization</h2>
<p>Every connector emits the same <code>Indicator</code> dataclass, so downstream code never special-cases a source. Deduplication keys on <code>(type, value.lower())</code>, the same IP seen in ThreatFox, Feodo, and OTX collapses to one indicator carrying the union of all three sources, the maximum confidence, and the union of techniques and tags:</p><pre><code>def deduplicate(indicators: list[Indicator]) -> list[Indicator]:
    merged: dict[tuple[str, str], Indicator] = {}
    for indicator in indicators:
        key = indicator.key()
        existing = merged.get(key)
        if existing is None:
            merged[key] = Indicator(...)
            continue
        existing.confidence = max(existing.confidence, indicator.confidence)
        existing.techniques = sorted(set(existing.techniques) | set(indicator.techniques))
        existing.tags = sorted(set(existing.tags) | set(indicator.tags))
        existing.malware = existing.malware or indicator.malware
        existing.reference = existing.reference or indicator.reference
        existing.first_seen = existing.first_seen or indicator.first_seen
        if indicator.source not in existing.source.split(","):
            existing.source = ",".join(sorted(set(existing.source.split(",") + [indicator.source])))
    return list(merged.values())</code></pre><p>A merge test pins the behavior exactly:</p><pre><code>def test_merges_same_indicator_across_sources():
    merged = deduplicate([
        make("45.137.21.9", "threatfox", 100, ["T1071.001"], "Cobalt Strike"),
        make("45.137.21.9", "feodo", 90, ["T1573"]),
        make("45.137.21.9", "otx", 60, ["T1059.001"]),
    ])
    assert len(merged) == 1
    indicator = merged[0]
    assert indicator.confidence == 100
    assert set(indicator.techniques) == {"T1071.001", "T1573", "T1059.001"}
    assert set(indicator.source.split(",")) == {"threatfox", "feodo", "otx"}</code></pre><p>After dedup, <code>filter_by_confidence()</code> drops anything under the configured threshold before any techniques or rules are derived.</p>
<h2>MITRE TTP extraction</h2>
<p>Indicators arrive with techniques attached three ways: OTX pulses carry ATT&amp;CK IDs directly; feeds without IDs have them inferred from the malware family and from the threat type. The mapping tables are hand-built, 27 techniques in the catalog, keyed to malware families and threat types:</p><pre><code>MALWARE_TECHNIQUES = {
    "cobaltstrike": ["T1071.001", "T1059.001", "T1055"],
    "agenttesla": ["T1056.001", "T1555", "T1041"],
    "emotet": ["T1566.001", "T1071.001", "T1105"],
    "qakbot": ["T1566.001", "T1055", "T1071.001"],
    ...
}
 
THREAT_TYPE_TECHNIQUES = {
    "phishing": ["T1566.002", "T1204.001"],
    "botnet_cc": ["T1071.001", "T1573"],
    "malware_download": ["T1105", "T1204.002"],
    "ransomware": ["T1486", "T1071.001"],
    "leaked_credentials": ["T1589.001"],
}</code></pre><p>Family lookup is fuzzy, it lowercases the family and strips spaces, hyphens and underscores, so <code>Cobalt Strike</code>, <code>cobalt-strike</code> and <code>CobaltStrike</code> all hit the same row:</p><pre><code>def techniques_for_malware(malware: str | None) -> list[str]:
    if not malware:
        return []
    key = malware.lower().replace(" ", "").replace("-", "").replace("_", "")
    for name, techniques in MALWARE_TECHNIQUES.items():
        if name in key:
            return list(techniques)
    return []</code></pre><p>Once collected, <code>extract_techniques()</code> rolls every indicator's techniques into a per-technique record with its indicator count and contributing sources, sorted by prevalence, and renders a coverage table. From the live fixture run, the top of the report reads:</p><pre><code>| Technique | Tactic | Name | Indicators | Sources |
|---|---|---|---|---|
| T1204.001 | execution | User Execution: Malicious Link | 10 | openphish, threatfox, urlhaus |
| T1105 | command-and-control | Ingress Tool Transfer | 9 | feodo, openphish, otx, threatfox, urlhaus |
| T1566.002 | initial-access | Phishing: Spearphishing Link | 8 | openphish, urlhaus |
| T1071.001 | command-and-control | Application Layer Protocol: Web Protocols | 6 | feodo, otx, threatfox |</code></pre>
<h2>Generated Wazuh rules</h2>
<p>Indicators are written as five Wazuh CDB lists, one per indicator class, each keyed on the value with a malware-family or threat-type label, sorted and deduplicated. A real <code>cti-malicious-ip</code> list from the active bundle:</p><pre><code>185.220.101.45:Dridex
193.149.176.12:AsyncRAT
194.36.191.55:QakBot
45.137.21.9:Cobalt Strike
91.211.88.34:Emotet</code></pre><p>Labels are sanitized, colons and newlines stripped, clamped to 48 chars, so a value like <code>weird:type</code> can never break the <code>value:label</code> CDB format. Alongside the lists, <code>build_rules_xml()</code> emits list-lookup rules: outbound and inbound IP matches, a DNS-query match, a URL match, a hash-execution match, and a leaked-credential rule, each tagged with the four dominant ATT&amp;CK techniques for that bucket. This is the verbatim outbound-IP and DNS rule from the generated <code>local_cti_rules.xml</code>:</p><pre><code>&lt;group name="cti,threat-intel,auto-generated,"&gt;
  &lt;rule id="100300" level="12"&gt;
    &lt;field name="dstip" type="pcre2"&gt;\S+&lt;/field&gt;
    &lt;list field="dstip" lookup="address_match_key"&gt;etc/lists/cti-malicious-ip&lt;/list&gt;
    &lt;description&gt;Outbound connection to CTI-flagged IP: $(dstip)&lt;/description&gt;
    &lt;mitre&gt;
      &lt;id&gt;T1071.001&lt;/id&gt;
      &lt;id&gt;T1573&lt;/id&gt;
      &lt;id&gt;T1566.001&lt;/id&gt;
      &lt;id&gt;T1059.001&lt;/id&gt;
    &lt;/mitre&gt;
  &lt;/rule&gt;
  &lt;rule id="100302" level="12"&gt;
    &lt;field name="win.eventdata.queryName" type="pcre2"&gt;\S+&lt;/field&gt;
    &lt;list field="win.eventdata.queryName" lookup="match_key"&gt;etc/lists/cti-malicious-domain&lt;/list&gt;
    &lt;description&gt;DNS query for CTI-flagged domain: $(win.eventdata.queryName)&lt;/description&gt;
    &lt;mitre&gt;
      &lt;id&gt;T1204.001&lt;/id&gt;
      &lt;id&gt;T1566.002&lt;/id&gt;
    &lt;/mitre&gt;
  &lt;/rule&gt;
&lt;/group&gt;</code></pre><p>The lookups reference <code>etc/lists/</code>, and the hash rule fires at level 13, so the output drops directly into a Wazuh manager with no hand-editing. A test parses the generated XML with <code>ElementTree</code> to prove it is well-formed and that the <code>&lt;mitre&gt;</code> tags survived.</p>
<h2>The signed approval gate</h2>
<p>The pipeline never deploys on its own. The review link carries an <code>itsdangerous</code> URL-safe timed token, signed with a server secret under a fixed salt, so it cannot be forged and stops working once the TTL elapses:</p><pre><code>def serializer(secret: str) -> URLSafeTimedSerializer:
    return URLSafeTimedSerializer(secret, salt="cti-rule-approval")
 
def make_token(secret: str, bundle_id: str) -> str:
    return serializer(secret).dumps({"bundle_id": bundle_id})
 
def verify_token(secret: str, token: str, max_age: int) -> str | None:
    try:
        data = serializer(secret).loads(token, max_age=max_age)
    except (BadSignature, SignatureExpired):
        return None
    return data.get("bundle_id")</code></pre><p>A small Flask console serves the gate. Every state-changing route verifies the token first and <code>abort(403)</code>s on a bad or expired one; approval promotes the candidate to active and, if a Wazuh path is set, deploys to the manager:</p><pre><code>@app.post("/approve/&lt;token&gt;")
def approve(token):
    bundle_id = approval.verify_token(secret, token, ttl)
    if not bundle_id:
        abort(403)
    active_dir = config.get("wazuh_etc_dir")
    result = pipeline.promote(
        bundle_id, output_dir, Path(active_dir) if active_dir else None
    )
    return render_template("result.html", action="approved", result=result)</code></pre><p>Reject writes a <code>REJECTED</code> marker carrying the analyst's reason; the dashboard then lists every candidate bundle as pending, approved, or rejected. Promotion records the approved key set in <code>state.json</code>, which is exactly what the next run diffs against.</p>
<h2>Tests</h2>
<p>30 tests across 7 files cover every stage end-to-end: feed parsing against fixtures, cross-source dedup, confidence filtering, TTP extraction, CDB and XML generation, token signing and expiry, email rendering, and the full Flask approve/reject flow. The web tests run the real pipeline and drive the routes with Flask's test client:</p><pre><code>def test_review_requires_valid_token(app_and_bundle):
    app, _ = app_and_bundle
    client = app.test_client()
    assert client.get("/review/garbage").status_code == 403
 
 
def test_approve_promotes(app_and_bundle):
    app, result = app_and_bundle
    token = make_token("test-secret", result["bundle_id"])
    client = app.test_client()
    resp = client.post(f"/approve/{token}")
    assert resp.status_code == 200
    assert b"approved" in resp.data
    follow = client.get(f"/review/{token}")
    assert b"already been approved" in follow.data</code></pre><p>Token security is pinned directly, a token signed with one secret will not verify under another, and a zero-max-age token is rejected after a one-second sleep:</p><pre><code>def test_token_rejects_wrong_secret():
    token = make_token("secret", "bundle")
    assert verify_token("other", token, 60) is None
 
 
def test_token_expires():
    token = make_token("secret", "bundle")
    time.sleep(1)
    assert verify_token("secret", token, 0) is None</code></pre>
  </div>
  <div class="gallery"><figure class="shot"><img loading="lazy" src="/assets/cti-detection-automation/02-review-page.png" alt="The single-use signed review page: the candidate diff, indicators broken out by type, the five generated CDB lists, the ATT&amp;CK coverage table, and the approve-and-deploy and reject actions."></figure><figcaption>The single-use signed review page: the candidate diff, indicators broken out by type, the five generated CDB lists, the ATT&amp;CK coverage table, and the approve-and-deploy and reject actions.</figcaption><figure class="shot"><img loading="lazy" src="/assets/cti-detection-automation/03-dashboard.png" alt="The bundle dashboard listing every candidate with its indicator and technique counts and its pending, approved, or rejected status."></figure><figcaption>The bundle dashboard listing every candidate with its indicator and technique counts and its pending, approved, or rejected status.</figcaption><figure class="shot"><video controls preload="metadata" src="/assets/cti-detection-automation/cti-approval-walkthrough.mp4"></video></figure><figcaption>Full walkthrough: a pipeline run, the approval email, the signed review page, and promotion to the active bundle (video).</figcaption></div>
  <p class="repo-line">Repository &middot; github.com/zionboggan/cti-detection-automation</p>
</div></section>
<footer><div class="wrap row">
  <div class="links">
    <a href="/">Portfolio</a>
    <a href="https://www.linkedin.com/in/zion-boggan">LinkedIn</a>
    <a href="https://oversightprotocol.dev/">Oversight</a>
    <a href="mailto:zionboggan0@gmail.com">Email</a>
  </div>
  <div class="note">Built and deployed on a self-hosted Proxmox homelab. This page mirrors the
  project's documentation and results so the work is fully viewable here.</div>
</div></footer>
</body>
</html>