| 1 | import { createInterface } from 'node:readline/promises'; |
| 2 | import { sha256, shannonEntropy, truncate, c } from './util.js'; |
| 3 | |
| 4 | export const RULES = [ |
| 5 | |
| 6 | { id: 'private-key-block', severity: 'high', re: /-----BEGIN [A-Z ]*PRIVATE KEY( BLOCK)?-----[\s\S]*?(-----END [A-Z ]*PRIVATE KEY( BLOCK)?-----|$)/g }, |
| 7 | { id: 'aws-access-key', severity: 'high', re: /\b(AKIA|ASIA)[0-9A-Z]{16}\b/g }, |
| 8 | { id: 'github-token', severity: 'high', re: /\b(ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9]{36,}\b/g }, |
| 9 | { id: 'github-fine-grained', severity: 'high', re: /\bgithub_pat_[A-Za-z0-9_]{22,}\b/g }, |
| 10 | { id: 'gitlab-token', severity: 'high', re: /\bglpat-[0-9a-zA-Z_-]{20,}\b/g }, |
| 11 | { id: 'anthropic-key', severity: 'high', re: /\bsk-ant-[A-Za-z0-9_-]{20,}\b/g }, |
| 12 | { id: 'openai-key', severity: 'high', re: /\bsk-(?!ant-)[A-Za-z0-9_-]{20,}\b/g }, |
| 13 | { id: 'slack-token', severity: 'high', re: /\bxox[baprs]-[0-9A-Za-z-]{10,}\b/g }, |
| 14 | { id: 'stripe-live-key', severity: 'high', re: /\b[sr]k_live_[0-9a-zA-Z]{10,}\b/g }, |
| 15 | { id: 'npm-token', severity: 'high', re: /\bnpm_[A-Za-z0-9]{36}\b/g }, |
| 16 | { id: 'tailscale-key', severity: 'high', re: /\btskey-[a-zA-Z0-9-]{10,}\b/g }, |
| 17 | { id: 'google-api-key', severity: 'high', re: /\bAIza[0-9A-Za-z_-]{35}\b/g }, |
| 18 | { id: 'sendgrid-key', severity: 'high', re: /\bSG\.[A-Za-z0-9_-]{16,32}\.[A-Za-z0-9_-]{16,64}\b/g }, |
| 19 | { id: 'twilio-key', severity: 'high', re: /\bSK[0-9a-fA-F]{32}\b/g }, |
| 20 | { id: 'telegram-bot-token', severity: 'high', re: /\b\d{8,10}:AA[A-Za-z0-9_-]{32,33}\b/g }, |
| 21 | { id: 'discord-webhook', severity: 'high', re: /https:\/\/(?:ptb\.|canary\.)?discord(?:app)?\.com\/api\/webhooks\/\d+\/[A-Za-z0-9_-]+/g }, |
| 22 | { id: 'jwt', severity: 'high', re: /\beyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{5,}\b/g }, |
| 23 | |
| 24 | { id: 'hex-token', severity: 'medium', re: /\b[0-9a-fA-F]{32,512}\b/g }, |
| 25 | { id: 'wireguard-key', severity: 'medium', re: /\b(PrivateKey|PresharedKey)\s*=\s*[A-Za-z0-9+/]{42,44}=?/g }, |
| 26 | { id: 'url-basic-auth', severity: 'medium', re: /\b[a-z][a-z0-9+.-]{0,30}:\/\/[^/\s:@'"`]{2,256}:[^/\s@'"`]{2,256}@[^\s'"`]{1,512}/gi }, |
| 27 | { id: 'bearer-header', severity: 'medium', re: /\bBearer\s+[A-Za-z0-9._+/=-]{20,}\b/gi }, |
| 28 | { id: 'secret-assignment', severity: 'medium', re: /["'`]?\b(password|passwd|pwd|secret|api[_-]?key|access[_-]?token|auth[_-]?token|client[_-]?secret|secret[_-]?key|token|bearer)\b["'`]?\s*[:=]\s*(?!(?:["'`]?\s*)?(?:\$\{|\$\(|<|%|\*{3}|\.{3}|REDACTED|\[REDACTED|xxx+|placeholder|changeme|example|your[-_]|null\b|true\b|false\b))(?:"(?:[^"\\]|\\.){4,512}"|'(?:[^'\\]|\\.){4,512}'|`(?:[^`\\]|\\.){4,512}`|[^\s'"`,;){}]{6,512})/gi }, |
| 29 | { id: 'secret-assignment', severity: 'medium', re: /["'`]?\b(password|passwd|pwd|secret|api[_-]?key|access[_-]?token|auth[_-]?token|client[_-]?secret|secret[_-]?key|token|bearer)\b["'`]?\s*[:=]\s*(?!(?:["'`]?\s*)?(?:\$\{|\$\(|<|%|\*{3}|\.{3}|REDACTED|\[REDACTED|xxx+|placeholder|changeme|example|your[-_]|null\b|true\b|false\b))(?:"(?:[^"\\]|\\.)*?\\.(?:[^"\\]|\\.)*?"|'(?:[^'\\]|\\.)*?\\.(?:[^'\\]|\\.)*?'|`(?:[^`\\]|\\.)*?\\.(?:[^`\\]|\\.)*?`)/gi }, |
| 30 | |
| 31 | { id: 'email', severity: 'soft', re: /\b[A-Za-z0-9._%+-]+@(?!(?:users\.noreply\.github\.com|example\.(?:com|org)))[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g }, |
| 32 | { id: 'ipv4', severity: 'soft', re: /\b(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)\.){3}(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)\b(?!\.\d)/g }, |
| 33 | { id: 'home-dir-username', severity: 'soft', re: /(?:\/(?:home|Users)\/|C:\\Users\\)([A-Za-z][A-Za-z0-9._-]{2,30})\b/g }, |
| 34 | ]; |
| 35 | |
| 36 | const HEX_RE = /^[0-9a-fA-F]+$/; |
| 37 | const ENTROPY_CANDIDATE_RE = /\b[A-Za-z0-9+/_=-]{32,4096}\b/g; |
| 38 | const MAX_TOKEN_LEN = 4096; |
| 39 | const TOKEN_CHAR_RE = /[A-Za-z0-9+/_=-]/; |
| 40 | const VERSION_LIKE_RE = /^\d+[.\d-]*$/; |
| 41 | const JOIN_SEPARATOR_RE = /[\s\u200B-\u200D\uFEFF]/; |
| 42 | const JOINED_SCAN_RULE_IDS = new Set([ |
| 43 | 'aws-access-key', |
| 44 | 'github-token', |
| 45 | 'github-fine-grained', |
| 46 | 'gitlab-token', |
| 47 | 'anthropic-key', |
| 48 | 'openai-key', |
| 49 | 'slack-token', |
| 50 | 'stripe-live-key', |
| 51 | 'npm-token', |
| 52 | 'tailscale-key', |
| 53 | 'google-api-key', |
| 54 | 'sendgrid-key', |
| 55 | 'twilio-key', |
| 56 | 'telegram-bot-token', |
| 57 | 'jwt', |
| 58 | ]); |
| 59 | |
| 60 | const LOOSE_RULES = RULES.filter((r) => JOINED_SCAN_RULE_IDS.has(r.id)).map((r) => ({ |
| 61 | id: r.id, |
| 62 | severity: r.severity, |
| 63 | re: new RegExp( |
| 64 | r.re.source.replace(/^\\b/, '').replace(/\\b$/, '').replace(/\{(\d+),\}/g, '{$1,128}'), |
| 65 | 'g' |
| 66 | ), |
| 67 | })); |
| 68 | |
| 69 | function findOversizedRuns(text) { |
| 70 | const runs = []; |
| 71 | let start = -1; |
| 72 | for (let i = 0; i <= text.length; i++) { |
| 73 | const isTok = i < text.length && TOKEN_CHAR_RE.test(text[i]); |
| 74 | if (isTok) { |
| 75 | if (start === -1) start = i; |
| 76 | } else if (start !== -1) { |
| 77 | if (i - start > MAX_TOKEN_LEN) runs.push([start, i]); |
| 78 | start = -1; |
| 79 | } |
| 80 | } |
| 81 | return runs; |
| 82 | } |
| 83 | |
| 84 | const GIT_SHA_LENGTHS = new Set([40, 64]); |
| 85 | |
| 86 | export function isGitShaCandidate(match, text, index) { |
| 87 | if (!match || !GIT_SHA_LENGTHS.has(match.length)) return false; |
| 88 | if (!/^[0-9a-fA-F]+$/.test(match)) return false; |
| 89 | const before = text.slice(Math.max(0, index - 48), index); |
| 90 | if (/\b(?:commit|tree|parent|object|merge|ref|refs|origin|HEAD|tag|blob|cherry|rebase|bisect|stash)\b[\s:./-]*$/i.test(before)) { |
| 91 | return true; |
| 92 | } |
| 93 | const atLineStart = index === 0 || text[index - 1] === '\n'; |
| 94 | return atLineStart && text[index + match.length] === ' '; |
| 95 | } |
| 96 | |
| 97 | export function scanText(text) { |
| 98 | const oversized = text.length > MAX_TOKEN_LEN ? findOversizedRuns(text) : []; |
| 99 | let scanInput = text; |
| 100 | if (oversized.length) { |
| 101 | const chars = text.split(''); |
| 102 | for (const [s, e] of oversized) { |
| 103 | for (let i = s; i < e; i++) chars[i] = '\n'; |
| 104 | } |
| 105 | scanInput = chars.join(''); |
| 106 | } |
| 107 | |
| 108 | const findings = []; |
| 109 | for (const [s, e] of oversized) { |
| 110 | findings.push({ |
| 111 | ruleId: 'oversized-token', |
| 112 | severity: 'medium', |
| 113 | match: text.slice(s, e), |
| 114 | index: s, |
| 115 | }); |
| 116 | } |
| 117 | |
| 118 | for (const rule of RULES) { |
| 119 | rule.re.lastIndex = 0; |
| 120 | let m; |
| 121 | while ((m = rule.re.exec(scanInput)) !== null) { |
| 122 | const finding = { |
| 123 | ruleId: rule.id, |
| 124 | severity: rule.severity, |
| 125 | match: m[0], |
| 126 | index: m.index, |
| 127 | }; |
| 128 | if (rule.id === 'hex-token') finding.gitShaCandidate = isGitShaCandidate(m[0], scanInput, m.index); |
| 129 | findings.push(finding); |
| 130 | if (m.index === rule.re.lastIndex) rule.re.lastIndex++; |
| 131 | } |
| 132 | } |
| 133 | |
| 134 | const seenSpans = findings.map((f) => [f.index, f.index + f.match.length]); |
| 135 | ENTROPY_CANDIDATE_RE.lastIndex = 0; |
| 136 | let m; |
| 137 | while ((m = ENTROPY_CANDIDATE_RE.exec(scanInput)) !== null) { |
| 138 | const tok = m[0]; |
| 139 | if (HEX_RE.test(tok) || VERSION_LIKE_RE.test(tok)) continue; |
| 140 | const classes = (/[A-Z]/.test(tok) ? 1 : 0) + (/[a-z]/.test(tok) ? 1 : 0) + (/[0-9]/.test(tok) ? 1 : 0); |
| 141 | if (classes < 2) continue; |
| 142 | if (shannonEntropy(tok) < 4.4) continue; |
| 143 | const start = m.index; |
| 144 | if (seenSpans.some(([s, e]) => start >= s && start < e)) continue; |
| 145 | findings.push({ ruleId: 'high-entropy-token', severity: 'medium', match: tok, index: start }); |
| 146 | } |
| 147 | |
| 148 | findings.push(...scanJoinedProviderTokens(scanInput, findings, text)); |
| 149 | return findings; |
| 150 | } |
| 151 | |
| 152 | function scanJoinedProviderTokens(scanInput, existing, original = scanInput) { |
| 153 | const chars = []; |
| 154 | const indexMap = []; |
| 155 | for (let i = 0; i < scanInput.length; i++) { |
| 156 | if (JOIN_SEPARATOR_RE.test(scanInput[i])) continue; |
| 157 | chars.push(scanInput[i]); |
| 158 | indexMap.push(i); |
| 159 | } |
| 160 | if (chars.length === scanInput.length) return []; |
| 161 | |
| 162 | const joined = chars.join(''); |
| 163 | const existingSpans = existing.map((f) => [f.index, f.index + f.match.length]); |
| 164 | const findings = []; |
| 165 | for (const rule of LOOSE_RULES) { |
| 166 | rule.re.lastIndex = 0; |
| 167 | let m; |
| 168 | while ((m = rule.re.exec(joined)) !== null) { |
| 169 | if (m[0].length <= 256) { |
| 170 | const start = indexMap[m.index]; |
| 171 | const end = indexMap[m.index + m[0].length - 1] + 1; |
| 172 | const slice = original.slice(start, end); |
| 173 | if (JOIN_SEPARATOR_RE.test(slice) && !existingSpans.some(([s, e]) => start >= s && start < e)) { |
| 174 | findings.push({ ruleId: rule.id, severity: rule.severity, match: slice, index: start }); |
| 175 | } |
| 176 | } |
| 177 | if (m.index === rule.re.lastIndex) rule.re.lastIndex++; |
| 178 | } |
| 179 | } |
| 180 | return findings; |
| 181 | } |
| 182 | |
| 183 | export function maskFor(finding) { |
| 184 | return `[REDACTED:${finding.ruleId}]`; |
| 185 | } |
| 186 | |
| 187 | export async function resolveFindings(findings, priorDecisions, { interactive, autoRedact, keepGitShas = false } = {}) { |
| 188 | const decisions = { ...priorDecisions }; |
| 189 | const unique = new Map(); |
| 190 | for (const f of findings) { |
| 191 | const h = sha256(f.match); |
| 192 | if (!unique.has(h)) unique.set(h, { finding: f, count: 0 }); |
| 193 | unique.get(h).count++; |
| 194 | } |
| 195 | |
| 196 | let autoKeptGitShas = 0; |
| 197 | if (keepGitShas) { |
| 198 | const highHashes = new Set(); |
| 199 | for (const f of findings) if (f.severity === 'high') highHashes.add(sha256(f.match)); |
| 200 | for (const [h, { finding }] of unique) { |
| 201 | if (finding.gitShaCandidate && !decisions[h] && !highHashes.has(h)) { |
| 202 | decisions[h] = { action: 'keep', ruleId: 'git-commit-sha' }; |
| 203 | autoKeptGitShas++; |
| 204 | } |
| 205 | } |
| 206 | } |
| 207 | |
| 208 | const autoMode = !interactive || autoRedact; |
| 209 | let overriddenKeeps = 0; |
| 210 | if (autoMode) { |
| 211 | for (const [h, { finding }] of unique) { |
| 212 | const prior = decisions[h]; |
| 213 | if (prior && prior.action === 'keep' && (finding.severity === 'high' || finding.severity === 'medium')) { |
| 214 | if (keepGitShas && finding.gitShaCandidate) continue; |
| 215 | delete decisions[h]; |
| 216 | overriddenKeeps++; |
| 217 | } |
| 218 | } |
| 219 | } |
| 220 | |
| 221 | const unresolved = [...unique.entries()].filter(([h]) => !decisions[h]); |
| 222 | if (!unresolved.length) return { decisions, asked: 0, overriddenKeeps, autoKeptGitShas }; |
| 223 | |
| 224 | if (autoMode) { |
| 225 | for (const [h, { finding }] of unresolved) { |
| 226 | decisions[h] = { action: 'redact', replacement: maskFor(finding), ruleId: finding.ruleId }; |
| 227 | } |
| 228 | return { decisions, asked: 0, autoRedacted: unresolved.length, overriddenKeeps, autoKeptGitShas }; |
| 229 | } |
| 230 | |
| 231 | const rl = createInterface({ input: process.stdin, output: process.stderr }); |
| 232 | process.stderr.write( |
| 233 | `\n${c.bold(`${unresolved.length} potential secret${unresolved.length === 1 ? '' : 's'} found`)}. Nothing is exported until each is resolved.\n\n` |
| 234 | ); |
| 235 | let i = 0; |
| 236 | for (const [h, { finding, count }] of unresolved) { |
| 237 | i++; |
| 238 | const sev = |
| 239 | finding.severity === 'high' ? c.red(finding.severity) |
| 240 | : finding.severity === 'medium' ? c.yellow(finding.severity) |
| 241 | : c.gray(finding.severity); |
| 242 | process.stderr.write( |
| 243 | `${c.dim(`[${i}/${unresolved.length}]`)} ${sev} ${c.bold(finding.ruleId)} ×${count}\n ${c.cyan(truncate(finding.match, 72))}\n` |
| 244 | |
| 245 | ); |
| 246 | let answer; |
| 247 | for (;;) { |
| 248 | answer = (await rl.question(` ${c.bold('[r]')}edact ${c.bold('[k]')}eep ${c.bold('[e]')}dit replacement › `)) |
| 249 | .trim() |
| 250 | .toLowerCase(); |
| 251 | if (['r', 'k', 'e', 'redact', 'keep', 'edit', ''].includes(answer)) break; |
| 252 | } |
| 253 | if (answer === 'k' || answer === 'keep') { |
| 254 | decisions[h] = { action: 'keep', ruleId: finding.ruleId }; |
| 255 | } else if (answer === 'e' || answer === 'edit') { |
| 256 | const replacement = (await rl.question(' replacement text › ')).trim() || maskFor(finding); |
| 257 | decisions[h] = { action: 'redact', replacement, ruleId: finding.ruleId }; |
| 258 | } else { |
| 259 | decisions[h] = { action: 'redact', replacement: maskFor(finding), ruleId: finding.ruleId }; |
| 260 | } |
| 261 | } |
| 262 | rl.close(); |
| 263 | return { decisions, asked: unresolved.length, autoKeptGitShas }; |
| 264 | } |
| 265 | |
| 266 | export function applyDecisions(text, findings, decisions) { |
| 267 | const toRedact = new Map(); |
| 268 | for (const f of findings) { |
| 269 | const d = decisions[sha256(f.match)]; |
| 270 | if (d && d.action === 'redact') { |
| 271 | toRedact.set(f.match, d.replacement || maskFor(f)); |
| 272 | } |
| 273 | } |
| 274 | let out = text; |
| 275 | |
| 276 | for (const [original, replacement] of [...toRedact.entries()].sort( |
| 277 | (a, b) => b[0].length - a[0].length |
| 278 | )) { |
| 279 | out = out.split(original).join(replacement); |
| 280 | } |
| 281 | return out; |
| 282 | } |
| 283 | |
| 284 | export function shadowScan(renderedText, decisions) { |
| 285 | const leaks = []; |
| 286 | for (const f of scanText(renderedText)) { |
| 287 | if (f.severity === 'soft') continue; |
| 288 | const d = decisions[sha256(f.match)]; |
| 289 | if (d && d.action === 'keep') continue; |
| 290 | if (f.match.startsWith('[REDACTED:')) continue; |
| 291 | leaks.push(f); |
| 292 | } |
| 293 | return leaks; |
| 294 | } |
| 295 | |
| 296 | export function patchResiduals(text, decisions) { |
| 297 | const leaks = shadowScan(text, decisions); |
| 298 | if (!leaks.length) return text; |
| 299 | |
| 300 | for (const f of leaks) { |
| 301 | const h = sha256(f.match); |
| 302 | if (!decisions[h]) { |
| 303 | decisions[h] = { action: 'redact', replacement: maskFor(f), ruleId: f.ruleId }; |
| 304 | } |
| 305 | } |
| 306 | |
| 307 | let out = applyDecisions(text, leaks, decisions); |
| 308 | |
| 309 | const residual = shadowScan(out, decisions); |
| 310 | if (residual.length) { |
| 311 | throw new Error( |
| 312 | `patchResiduals: ${residual.length} leak(s) remain after auto-redaction ` + |
| 313 | `(${[...new Set(residual.map((l) => l.ruleId))].join(', ')}). Refusing to write.` |
| 314 | ); |
| 315 | } |
| 316 | return out; |
| 317 | } |