src/redact.js · TreeTrace

317 lines · javascript

import { createInterface } from 'node:readline/promises';
import { sha256, shannonEntropy, truncate, c } from './util.js';
 
export const RULES = [
 
  { id: 'private-key-block', severity: 'high', re: /-----BEGIN [A-Z ]*PRIVATE KEY( BLOCK)?-----[\s\S]*?(-----END [A-Z ]*PRIVATE KEY( BLOCK)?-----|$)/g },
  { id: 'aws-access-key', severity: 'high', re: /\b(AKIA|ASIA)[0-9A-Z]{16}\b/g },
  { id: 'github-token', severity: 'high', re: /\b(ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9]{36,}\b/g },
  { id: 'github-fine-grained', severity: 'high', re: /\bgithub_pat_[A-Za-z0-9_]{22,}\b/g },
  { id: 'gitlab-token', severity: 'high', re: /\bglpat-[0-9a-zA-Z_-]{20,}\b/g },
  { id: 'anthropic-key', severity: 'high', re: /\bsk-ant-[A-Za-z0-9_-]{20,}\b/g },
  { id: 'openai-key', severity: 'high', re: /\bsk-(?!ant-)[A-Za-z0-9_-]{20,}\b/g },
  { id: 'slack-token', severity: 'high', re: /\bxox[baprs]-[0-9A-Za-z-]{10,}\b/g },
  { id: 'stripe-live-key', severity: 'high', re: /\b[sr]k_live_[0-9a-zA-Z]{10,}\b/g },
  { id: 'npm-token', severity: 'high', re: /\bnpm_[A-Za-z0-9]{36}\b/g },
  { id: 'tailscale-key', severity: 'high', re: /\btskey-[a-zA-Z0-9-]{10,}\b/g },
  { id: 'google-api-key', severity: 'high', re: /\bAIza[0-9A-Za-z_-]{35}\b/g },
  { id: 'sendgrid-key', severity: 'high', re: /\bSG\.[A-Za-z0-9_-]{16,32}\.[A-Za-z0-9_-]{16,64}\b/g },
  { id: 'twilio-key', severity: 'high', re: /\bSK[0-9a-fA-F]{32}\b/g },
  { id: 'telegram-bot-token', severity: 'high', re: /\b\d{8,10}:AA[A-Za-z0-9_-]{32,33}\b/g },
  { id: 'discord-webhook', severity: 'high', re: /https:\/\/(?:ptb\.|canary\.)?discord(?:app)?\.com\/api\/webhooks\/\d+\/[A-Za-z0-9_-]+/g },
  { id: 'jwt', severity: 'high', re: /\beyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{5,}\b/g },
 
  { id: 'hex-token', severity: 'medium', re: /\b[0-9a-fA-F]{32,512}\b/g },
  { id: 'wireguard-key', severity: 'medium', re: /\b(PrivateKey|PresharedKey)\s*=\s*[A-Za-z0-9+/]{42,44}=?/g },
  { id: 'url-basic-auth', severity: 'medium', re: /\b[a-z][a-z0-9+.-]{0,30}:\/\/[^/\s:@'"`]{2,256}:[^/\s@'"`]{2,256}@[^\s'"`]{1,512}/gi },
  { id: 'bearer-header', severity: 'medium', re: /\bBearer\s+[A-Za-z0-9._+/=-]{20,}\b/gi },
  { id: 'secret-assignment', severity: 'medium', re: /["'`]?\b(password|passwd|pwd|secret|api[_-]?key|access[_-]?token|auth[_-]?token|client[_-]?secret|secret[_-]?key|token|bearer)\b["'`]?\s*[:=]\s*(?!(?:["'`]?\s*)?(?:\$\{|\$\(|<|%|\*{3}|\.{3}|REDACTED|\[REDACTED|xxx+|placeholder|changeme|example|your[-_]|null\b|true\b|false\b))(?:"(?:[^"\\]|\\.){4,512}"|'(?:[^'\\]|\\.){4,512}'|`(?:[^`\\]|\\.){4,512}`|[^\s'"`,;){}]{6,512})/gi },
  { id: 'secret-assignment', severity: 'medium', re: /["'`]?\b(password|passwd|pwd|secret|api[_-]?key|access[_-]?token|auth[_-]?token|client[_-]?secret|secret[_-]?key|token|bearer)\b["'`]?\s*[:=]\s*(?!(?:["'`]?\s*)?(?:\$\{|\$\(|<|%|\*{3}|\.{3}|REDACTED|\[REDACTED|xxx+|placeholder|changeme|example|your[-_]|null\b|true\b|false\b))(?:"(?:[^"\\]|\\.)*?\\.(?:[^"\\]|\\.)*?"|'(?:[^'\\]|\\.)*?\\.(?:[^'\\]|\\.)*?'|`(?:[^`\\]|\\.)*?\\.(?:[^`\\]|\\.)*?`)/gi },
 
  { id: 'email', severity: 'soft', re: /\b[A-Za-z0-9._%+-]+@(?!(?:users\.noreply\.github\.com|example\.(?:com|org)))[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g },
  { id: 'ipv4', severity: 'soft', re: /\b(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)\.){3}(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)\b(?!\.\d)/g },
  { id: 'home-dir-username', severity: 'soft', re: /(?:\/(?:home|Users)\/|C:\\Users\\)([A-Za-z][A-Za-z0-9._-]{2,30})\b/g },
];
 
const HEX_RE = /^[0-9a-fA-F]+$/;
const ENTROPY_CANDIDATE_RE = /\b[A-Za-z0-9+/_=-]{32,4096}\b/g;
const MAX_TOKEN_LEN = 4096;
const TOKEN_CHAR_RE = /[A-Za-z0-9+/_=-]/;
const VERSION_LIKE_RE = /^\d+[.\d-]*$/;
const JOIN_SEPARATOR_RE = /[\s\u200B-\u200D\uFEFF]/;
const JOINED_SCAN_RULE_IDS = new Set([
  'aws-access-key',
  'github-token',
  'github-fine-grained',
  'gitlab-token',
  'anthropic-key',
  'openai-key',
  'slack-token',
  'stripe-live-key',
  'npm-token',
  'tailscale-key',
  'google-api-key',
  'sendgrid-key',
  'twilio-key',
  'telegram-bot-token',
  'jwt',
]);
 
const LOOSE_RULES = RULES.filter((r) => JOINED_SCAN_RULE_IDS.has(r.id)).map((r) => ({
  id: r.id,
  severity: r.severity,
  re: new RegExp(
    r.re.source.replace(/^\\b/, '').replace(/\\b$/, '').replace(/\{(\d+),\}/g, '{$1,128}'),
    'g'
  ),
}));
 
function findOversizedRuns(text) {
  const runs = [];
  let start = -1;
  for (let i = 0; i <= text.length; i++) {
    const isTok = i < text.length && TOKEN_CHAR_RE.test(text[i]);
    if (isTok) {
      if (start === -1) start = i;
    } else if (start !== -1) {
      if (i - start > MAX_TOKEN_LEN) runs.push([start, i]);
      start = -1;
    }
  }
  return runs;
}
 
const GIT_SHA_LENGTHS = new Set([40, 64]);
 
export function isGitShaCandidate(match, text, index) {
  if (!match || !GIT_SHA_LENGTHS.has(match.length)) return false;
  if (!/^[0-9a-fA-F]+$/.test(match)) return false;
  const before = text.slice(Math.max(0, index - 48), index);
  if (/\b(?:commit|tree|parent|object|merge|ref|refs|origin|HEAD|tag|blob|cherry|rebase|bisect|stash)\b[\s:./-]*$/i.test(before)) {
    return true;
  }
  const atLineStart = index === 0 || text[index - 1] === '\n';
  return atLineStart && text[index + match.length] === ' ';
}
 
export function scanText(text) {
  const oversized = text.length > MAX_TOKEN_LEN ? findOversizedRuns(text) : [];
  let scanInput = text;
  if (oversized.length) {
    const chars = text.split('');
    for (const [s, e] of oversized) {
      for (let i = s; i < e; i++) chars[i] = '\n';
    }
    scanInput = chars.join('');
  }
 
  const findings = [];
  for (const [s, e] of oversized) {
    findings.push({
      ruleId: 'oversized-token',
      severity: 'medium',
      match: text.slice(s, e),
      index: s,
    });
  }
 
  for (const rule of RULES) {
    rule.re.lastIndex = 0;
    let m;
    while ((m = rule.re.exec(scanInput)) !== null) {
      const finding = {
        ruleId: rule.id,
        severity: rule.severity,
        match: m[0],
        index: m.index,
      };
      if (rule.id === 'hex-token') finding.gitShaCandidate = isGitShaCandidate(m[0], scanInput, m.index);
      findings.push(finding);
      if (m.index === rule.re.lastIndex) rule.re.lastIndex++;
    }
  }
 
  const seenSpans = findings.map((f) => [f.index, f.index + f.match.length]);
  ENTROPY_CANDIDATE_RE.lastIndex = 0;
  let m;
  while ((m = ENTROPY_CANDIDATE_RE.exec(scanInput)) !== null) {
    const tok = m[0];
    if (HEX_RE.test(tok) || VERSION_LIKE_RE.test(tok)) continue;
    const classes = (/[A-Z]/.test(tok) ? 1 : 0) + (/[a-z]/.test(tok) ? 1 : 0) + (/[0-9]/.test(tok) ? 1 : 0);
    if (classes < 2) continue;
    if (shannonEntropy(tok) < 4.4) continue;
    const start = m.index;
    if (seenSpans.some(([s, e]) => start >= s && start < e)) continue;
    findings.push({ ruleId: 'high-entropy-token', severity: 'medium', match: tok, index: start });
  }
 
  findings.push(...scanJoinedProviderTokens(scanInput, findings, text));
  return findings;
}
 
function scanJoinedProviderTokens(scanInput, existing, original = scanInput) {
  const chars = [];
  const indexMap = [];
  for (let i = 0; i < scanInput.length; i++) {
    if (JOIN_SEPARATOR_RE.test(scanInput[i])) continue;
    chars.push(scanInput[i]);
    indexMap.push(i);
  }
  if (chars.length === scanInput.length) return [];
 
  const joined = chars.join('');
  const existingSpans = existing.map((f) => [f.index, f.index + f.match.length]);
  const findings = [];
  for (const rule of LOOSE_RULES) {
    rule.re.lastIndex = 0;
    let m;
    while ((m = rule.re.exec(joined)) !== null) {
      if (m[0].length <= 256) {
        const start = indexMap[m.index];
        const end = indexMap[m.index + m[0].length - 1] + 1;
        const slice = original.slice(start, end);
        if (JOIN_SEPARATOR_RE.test(slice) && !existingSpans.some(([s, e]) => start >= s && start < e)) {
          findings.push({ ruleId: rule.id, severity: rule.severity, match: slice, index: start });
        }
      }
      if (m.index === rule.re.lastIndex) rule.re.lastIndex++;
    }
  }
  return findings;
}
 
export function maskFor(finding) {
  return `[REDACTED:${finding.ruleId}]`;
}
 
export async function resolveFindings(findings, priorDecisions, { interactive, autoRedact, keepGitShas = false } = {}) {
  const decisions = { ...priorDecisions };
  const unique = new Map();
  for (const f of findings) {
    const h = sha256(f.match);
    if (!unique.has(h)) unique.set(h, { finding: f, count: 0 });
    unique.get(h).count++;
  }
 
  let autoKeptGitShas = 0;
  if (keepGitShas) {
    const highHashes = new Set();
    for (const f of findings) if (f.severity === 'high') highHashes.add(sha256(f.match));
    for (const [h, { finding }] of unique) {
      if (finding.gitShaCandidate && !decisions[h] && !highHashes.has(h)) {
        decisions[h] = { action: 'keep', ruleId: 'git-commit-sha' };
        autoKeptGitShas++;
      }
    }
  }
 
  const autoMode = !interactive || autoRedact;
  let overriddenKeeps = 0;
  if (autoMode) {
    for (const [h, { finding }] of unique) {
      const prior = decisions[h];
      if (prior && prior.action === 'keep' && (finding.severity === 'high' || finding.severity === 'medium')) {
        if (keepGitShas && finding.gitShaCandidate) continue;
        delete decisions[h];
        overriddenKeeps++;
      }
    }
  }
 
  const unresolved = [...unique.entries()].filter(([h]) => !decisions[h]);
  if (!unresolved.length) return { decisions, asked: 0, overriddenKeeps, autoKeptGitShas };
 
  if (autoMode) {
    for (const [h, { finding }] of unresolved) {
      decisions[h] = { action: 'redact', replacement: maskFor(finding), ruleId: finding.ruleId };
    }
    return { decisions, asked: 0, autoRedacted: unresolved.length, overriddenKeeps, autoKeptGitShas };
  }
 
  const rl = createInterface({ input: process.stdin, output: process.stderr });
  process.stderr.write(
    `\n${c.bold(`${unresolved.length} potential secret${unresolved.length === 1 ? '' : 's'} found`)}. Nothing is exported until each is resolved.\n\n`
  );
  let i = 0;
  for (const [h, { finding, count }] of unresolved) {
    i++;
    const sev =
      finding.severity === 'high' ? c.red(finding.severity)
      : finding.severity === 'medium' ? c.yellow(finding.severity)
      : c.gray(finding.severity);
    process.stderr.write(
      `${c.dim(`[${i}/${unresolved.length}]`)} ${sev} ${c.bold(finding.ruleId)} ×${count}\n    ${c.cyan(truncate(finding.match, 72))}\n`
 
    );
    let answer;
    for (;;) {
      answer = (await rl.question(`    ${c.bold('[r]')}edact  ${c.bold('[k]')}eep  ${c.bold('[e]')}dit replacement › `))
        .trim()
        .toLowerCase();
      if (['r', 'k', 'e', 'redact', 'keep', 'edit', ''].includes(answer)) break;
    }
    if (answer === 'k' || answer === 'keep') {
      decisions[h] = { action: 'keep', ruleId: finding.ruleId };
    } else if (answer === 'e' || answer === 'edit') {
      const replacement = (await rl.question('    replacement text › ')).trim() || maskFor(finding);
      decisions[h] = { action: 'redact', replacement, ruleId: finding.ruleId };
    } else {
      decisions[h] = { action: 'redact', replacement: maskFor(finding), ruleId: finding.ruleId };
    }
  }
  rl.close();
  return { decisions, asked: unresolved.length, autoKeptGitShas };
}
 
export function applyDecisions(text, findings, decisions) {
  const toRedact = new Map();
  for (const f of findings) {
    const d = decisions[sha256(f.match)];
    if (d && d.action === 'redact') {
      toRedact.set(f.match, d.replacement || maskFor(f));
    }
  }
  let out = text;
 
  for (const [original, replacement] of [...toRedact.entries()].sort(
    (a, b) => b[0].length - a[0].length
  )) {
    out = out.split(original).join(replacement);
  }
  return out;
}
 
export function shadowScan(renderedText, decisions) {
  const leaks = [];
  for (const f of scanText(renderedText)) {
    if (f.severity === 'soft') continue;
    const d = decisions[sha256(f.match)];
    if (d && d.action === 'keep') continue;
    if (f.match.startsWith('[REDACTED:')) continue;
    leaks.push(f);
  }
  return leaks;
}
 
export function patchResiduals(text, decisions) {
  const leaks = shadowScan(text, decisions);
  if (!leaks.length) return text;
 
  for (const f of leaks) {
    const h = sha256(f.match);
    if (!decisions[h]) {
      decisions[h] = { action: 'redact', replacement: maskFor(f), ruleId: f.ruleId };
    }
  }
 
  let out = applyDecisions(text, leaks, decisions);
 
  const residual = shadowScan(out, decisions);
  if (residual.length) {
    throw new Error(
      `patchResiduals: ${residual.length} leak(s) remain after auto-redaction ` +
        `(${[...new Set(residual.map((l) => l.ruleId))].join(', ')}). Refusing to write.`
    );
  }
  return out;
}

1	import { createInterface } from 'node:readline/promises';
2	import { sha256, shannonEntropy, truncate, c } from './util.js';
3
4	export const RULES = [
5
6	{ id: 'private-key-block', severity: 'high', re: /-----BEGIN [A-Z ]PRIVATE KEY( BLOCK)?-----[\s\S]?(-----END [A-Z ]*PRIVATE KEY( BLOCK)?-----\|$)/g },
7	{ id: 'aws-access-key', severity: 'high', re: /\b(AKIA\|ASIA)[0-9A-Z]{16}\b/g },
8	{ id: 'github-token', severity: 'high', re: /\b(ghp\|gho\|ghu\|ghs\|ghr)_[A-Za-z0-9]{36,}\b/g },
9	{ id: 'github-fine-grained', severity: 'high', re: /\bgithub_pat_[A-Za-z0-9_]{22,}\b/g },
10	{ id: 'gitlab-token', severity: 'high', re: /\bglpat-[0-9a-zA-Z_-]{20,}\b/g },
11	{ id: 'anthropic-key', severity: 'high', re: /\bsk-ant-[A-Za-z0-9_-]{20,}\b/g },
12	{ id: 'openai-key', severity: 'high', re: /\bsk-(?!ant-)[A-Za-z0-9_-]{20,}\b/g },
13	{ id: 'slack-token', severity: 'high', re: /\bxox[baprs]-[0-9A-Za-z-]{10,}\b/g },
14	{ id: 'stripe-live-key', severity: 'high', re: /\b[sr]k_live_[0-9a-zA-Z]{10,}\b/g },
15	{ id: 'npm-token', severity: 'high', re: /\bnpm_[A-Za-z0-9]{36}\b/g },
16	{ id: 'tailscale-key', severity: 'high', re: /\btskey-[a-zA-Z0-9-]{10,}\b/g },
17	{ id: 'google-api-key', severity: 'high', re: /\bAIza[0-9A-Za-z_-]{35}\b/g },
18	{ id: 'sendgrid-key', severity: 'high', re: /\bSG\.[A-Za-z0-9_-]{16,32}\.[A-Za-z0-9_-]{16,64}\b/g },
19	{ id: 'twilio-key', severity: 'high', re: /\bSK[0-9a-fA-F]{32}\b/g },
20	{ id: 'telegram-bot-token', severity: 'high', re: /\b\d{8,10}:AA[A-Za-z0-9_-]{32,33}\b/g },
21	{ id: 'discord-webhook', severity: 'high', re: /https:\/\/(?:ptb\.\|canary\.)?discord(?:app)?\.com\/api\/webhooks\/\d+\/[A-Za-z0-9_-]+/g },
22	{ id: 'jwt', severity: 'high', re: /\beyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{5,}\b/g },
23
24	{ id: 'hex-token', severity: 'medium', re: /\b[0-9a-fA-F]{32,512}\b/g },
25	{ id: 'wireguard-key', severity: 'medium', re: /\b(PrivateKey\|PresharedKey)\s=\s[A-Za-z0-9+/]{42,44}=?/g },
26	{ id: 'url-basic-auth', severity: 'medium', re: /\b[a-z][a-z0-9+.-]{0,30}:\/\/[^/\s:@'"`]{2,256}:[^/\s@'"`]{2,256}@[^\s'"`]{1,512}/gi },
27	{ id: 'bearer-header', severity: 'medium', re: /\bBearer\s+[A-Za-z0-9._+/=-]{20,}\b/gi },
28	{ id: 'secret-assignment', severity: 'medium', re: /["'`]?\b(password\|passwd\|pwd\|secret\|api[_-]?key\|access[_-]?token\|auth[_-]?token\|client[_-]?secret\|secret[_-]?key\|token\|bearer)\b["'`]?\s[:=]\s(?!(?:["'`]?\s)?(?:\$\{\|\$\(\|<\|%\|\{3}\|\.{3}\|REDACTED\|\[REDACTED\|xxx+\|placeholder\|changeme\|example\|your[-_]\|null\b\|true\b\|false\b))(?:"(?:[^"\\]\|\\.){4,512}"\|'(?:[^'\\]\|\\.){4,512}'\|`(?:[^`\\]\|\\.){4,512}`\|[^\s'"`,;){}]{6,512})/gi },
29	{ id: 'secret-assignment', severity: 'medium', re: /["'`]?\b(password\|passwd\|pwd\|secret\|api[_-]?key\|access[_-]?token\|auth[_-]?token\|client[_-]?secret\|secret[_-]?key\|token\|bearer)\b["'`]?\s[:=]\s(?!(?:["'`]?\s)?(?:\$\{\|\$\(\|<\|%\|\{3}\|\.{3}\|REDACTED\|\[REDACTED\|xxx+\|placeholder\|changeme\|example\|your[-_]\|null\b\|true\b\|false\b))(?:"(?:[^"\\]\|\\.)?\\.(?:[^"\\]\|\\.)?"\|'(?:[^'\\]\|\\.)?\\.(?:[^'\\]\|\\.)?'\|`(?:[^`\\]\|\\.)?\\.(?:[^`\\]\|\\.)?`)/gi },
30
31	{ id: 'email', severity: 'soft', re: /\b[A-Za-z0-9._%+-]+@(?!(?:users\.noreply\.github\.com\|example\.(?:com\|org)))[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g },
32	{ id: 'ipv4', severity: 'soft', re: /\b(?:(?:25[0-5]\|2[0-4]\d\|1\d\d\|[1-9]?\d)\.){3}(?:25[0-5]\|2[0-4]\d\|1\d\d\|[1-9]?\d)\b(?!\.\d)/g },
33	{ id: 'home-dir-username', severity: 'soft', re: /(?:\/(?:home\|Users)\/\|C:\\Users\\)([A-Za-z][A-Za-z0-9._-]{2,30})\b/g },
34	];
35
36	const HEX_RE = /^[0-9a-fA-F]+$/;
37	const ENTROPY_CANDIDATE_RE = /\b[A-Za-z0-9+/_=-]{32,4096}\b/g;
38	const MAX_TOKEN_LEN = 4096;
39	const TOKEN_CHAR_RE = /[A-Za-z0-9+/_=-]/;
40	const VERSION_LIKE_RE = /^\d+[.\d-]*$/;
41	const JOIN_SEPARATOR_RE = /[\s\u200B-\u200D\uFEFF]/;
42	const JOINED_SCAN_RULE_IDS = new Set([
43	'aws-access-key',
44	'github-token',
45	'github-fine-grained',
46	'gitlab-token',
47	'anthropic-key',
48	'openai-key',
49	'slack-token',
50	'stripe-live-key',
51	'npm-token',
52	'tailscale-key',
53	'google-api-key',
54	'sendgrid-key',
55	'twilio-key',
56	'telegram-bot-token',
57	'jwt',
58	]);
59
60	const LOOSE_RULES = RULES.filter((r) => JOINED_SCAN_RULE_IDS.has(r.id)).map((r) => ({
61	id: r.id,
62	severity: r.severity,
63	re: new RegExp(
64	r.re.source.replace(/^\\b/, '').replace(/\\b$/, '').replace(/\{(\d+),\}/g, '{$1,128}'),
65	'g'
66	),
67	}));
68
69	function findOversizedRuns(text) {
70	const runs = [];
71	let start = -1;
72	for (let i = 0; i <= text.length; i++) {
73	const isTok = i < text.length && TOKEN_CHAR_RE.test(text[i]);
74	if (isTok) {
75	if (start === -1) start = i;
76	} else if (start !== -1) {
77	if (i - start > MAX_TOKEN_LEN) runs.push([start, i]);
78	start = -1;
79	}
80	}
81	return runs;
82	}
83
84	const GIT_SHA_LENGTHS = new Set([40, 64]);
85
86	export function isGitShaCandidate(match, text, index) {
87	if (!match \|\| !GIT_SHA_LENGTHS.has(match.length)) return false;
88	if (!/^[0-9a-fA-F]+$/.test(match)) return false;
89	const before = text.slice(Math.max(0, index - 48), index);
90	if (/\b(?:commit\|tree\|parent\|object\|merge\|ref\|refs\|origin\|HEAD\|tag\|blob\|cherry\|rebase\|bisect\|stash)\b[\s:./-]*$/i.test(before)) {
91	return true;
92	}
93	const atLineStart = index === 0 \|\| text[index - 1] === '\n';
94	return atLineStart && text[index + match.length] === ' ';
95	}
96
97	export function scanText(text) {
98	const oversized = text.length > MAX_TOKEN_LEN ? findOversizedRuns(text) : [];
99	let scanInput = text;
100	if (oversized.length) {
101	const chars = text.split('');
102	for (const [s, e] of oversized) {
103	for (let i = s; i < e; i++) chars[i] = '\n';
104	}
105	scanInput = chars.join('');
106	}
107
108	const findings = [];
109	for (const [s, e] of oversized) {
110	findings.push({
111	ruleId: 'oversized-token',
112	severity: 'medium',
113	match: text.slice(s, e),
114	index: s,
115	});
116	}
117
118	for (const rule of RULES) {
119	rule.re.lastIndex = 0;
120	let m;
121	while ((m = rule.re.exec(scanInput)) !== null) {
122	const finding = {
123	ruleId: rule.id,
124	severity: rule.severity,
125	match: m[0],
126	index: m.index,
127	};
128	if (rule.id === 'hex-token') finding.gitShaCandidate = isGitShaCandidate(m[0], scanInput, m.index);
129	findings.push(finding);
130	if (m.index === rule.re.lastIndex) rule.re.lastIndex++;
131	}
132	}
133
134	const seenSpans = findings.map((f) => [f.index, f.index + f.match.length]);
135	ENTROPY_CANDIDATE_RE.lastIndex = 0;
136	let m;
137	while ((m = ENTROPY_CANDIDATE_RE.exec(scanInput)) !== null) {
138	const tok = m[0];
139	if (HEX_RE.test(tok) \|\| VERSION_LIKE_RE.test(tok)) continue;
140	const classes = (/[A-Z]/.test(tok) ? 1 : 0) + (/[a-z]/.test(tok) ? 1 : 0) + (/[0-9]/.test(tok) ? 1 : 0);
141	if (classes < 2) continue;
142	if (shannonEntropy(tok) < 4.4) continue;
143	const start = m.index;
144	if (seenSpans.some(([s, e]) => start >= s && start < e)) continue;
145	findings.push({ ruleId: 'high-entropy-token', severity: 'medium', match: tok, index: start });
146	}
147
148	findings.push(...scanJoinedProviderTokens(scanInput, findings, text));
149	return findings;
150	}
151
152	function scanJoinedProviderTokens(scanInput, existing, original = scanInput) {
153	const chars = [];
154	const indexMap = [];
155	for (let i = 0; i < scanInput.length; i++) {
156	if (JOIN_SEPARATOR_RE.test(scanInput[i])) continue;
157	chars.push(scanInput[i]);
158	indexMap.push(i);
159	}
160	if (chars.length === scanInput.length) return [];
161
162	const joined = chars.join('');
163	const existingSpans = existing.map((f) => [f.index, f.index + f.match.length]);
164	const findings = [];
165	for (const rule of LOOSE_RULES) {
166	rule.re.lastIndex = 0;
167	let m;
168	while ((m = rule.re.exec(joined)) !== null) {
169	if (m[0].length <= 256) {
170	const start = indexMap[m.index];
171	const end = indexMap[m.index + m[0].length - 1] + 1;
172	const slice = original.slice(start, end);
173	if (JOIN_SEPARATOR_RE.test(slice) && !existingSpans.some(([s, e]) => start >= s && start < e)) {
174	findings.push({ ruleId: rule.id, severity: rule.severity, match: slice, index: start });
175	}
176	}
177	if (m.index === rule.re.lastIndex) rule.re.lastIndex++;
178	}
179	}
180	return findings;
181	}
182
183	export function maskFor(finding) {
184	return `[REDACTED:${finding.ruleId}]`;
185	}
186
187	export async function resolveFindings(findings, priorDecisions, { interactive, autoRedact, keepGitShas = false } = {}) {
188	const decisions = { ...priorDecisions };
189	const unique = new Map();
190	for (const f of findings) {
191	const h = sha256(f.match);
192	if (!unique.has(h)) unique.set(h, { finding: f, count: 0 });
193	unique.get(h).count++;
194	}
195
196	let autoKeptGitShas = 0;
197	if (keepGitShas) {
198	const highHashes = new Set();
199	for (const f of findings) if (f.severity === 'high') highHashes.add(sha256(f.match));
200	for (const [h, { finding }] of unique) {
201	if (finding.gitShaCandidate && !decisions[h] && !highHashes.has(h)) {
202	decisions[h] = { action: 'keep', ruleId: 'git-commit-sha' };
203	autoKeptGitShas++;
204	}
205	}
206	}
207
208	const autoMode = !interactive \|\| autoRedact;
209	let overriddenKeeps = 0;
210	if (autoMode) {
211	for (const [h, { finding }] of unique) {
212	const prior = decisions[h];
213	if (prior && prior.action === 'keep' && (finding.severity === 'high' \|\| finding.severity === 'medium')) {
214	if (keepGitShas && finding.gitShaCandidate) continue;
215	delete decisions[h];
216	overriddenKeeps++;
217	}
218	}
219	}
220
221	const unresolved = [...unique.entries()].filter(([h]) => !decisions[h]);
222	if (!unresolved.length) return { decisions, asked: 0, overriddenKeeps, autoKeptGitShas };
223
224	if (autoMode) {
225	for (const [h, { finding }] of unresolved) {
226	decisions[h] = { action: 'redact', replacement: maskFor(finding), ruleId: finding.ruleId };
227	}
228	return { decisions, asked: 0, autoRedacted: unresolved.length, overriddenKeeps, autoKeptGitShas };
229	}
230
231	const rl = createInterface({ input: process.stdin, output: process.stderr });
232	process.stderr.write(
233	`\n${c.bold(`${unresolved.length} potential secret${unresolved.length === 1 ? '' : 's'} found`)}. Nothing is exported until each is resolved.\n\n`
234	);
235	let i = 0;
236	for (const [h, { finding, count }] of unresolved) {
237	i++;
238	const sev =
239	finding.severity === 'high' ? c.red(finding.severity)
240	: finding.severity === 'medium' ? c.yellow(finding.severity)
241	: c.gray(finding.severity);
242	process.stderr.write(
243	`${c.dim(`[${i}/${unresolved.length}]`)} ${sev} ${c.bold(finding.ruleId)} ×${count}\n ${c.cyan(truncate(finding.match, 72))}\n`
244
245	);
246	let answer;
247	for (;;) {
248	answer = (await rl.question(` ${c.bold('[r]')}edact ${c.bold('[k]')}eep ${c.bold('[e]')}dit replacement › `))
249	.trim()
250	.toLowerCase();
251	if (['r', 'k', 'e', 'redact', 'keep', 'edit', ''].includes(answer)) break;
252	}
253	if (answer === 'k' \|\| answer === 'keep') {
254	decisions[h] = { action: 'keep', ruleId: finding.ruleId };
255	} else if (answer === 'e' \|\| answer === 'edit') {
256	const replacement = (await rl.question(' replacement text › ')).trim() \|\| maskFor(finding);
257	decisions[h] = { action: 'redact', replacement, ruleId: finding.ruleId };
258	} else {
259	decisions[h] = { action: 'redact', replacement: maskFor(finding), ruleId: finding.ruleId };
260	}
261	}
262	rl.close();
263	return { decisions, asked: unresolved.length, autoKeptGitShas };
264	}
265
266	export function applyDecisions(text, findings, decisions) {
267	const toRedact = new Map();
268	for (const f of findings) {
269	const d = decisions[sha256(f.match)];
270	if (d && d.action === 'redact') {
271	toRedact.set(f.match, d.replacement \|\| maskFor(f));
272	}
273	}
274	let out = text;
275
276	for (const [original, replacement] of [...toRedact.entries()].sort(
277	(a, b) => b[0].length - a[0].length
278	)) {
279	out = out.split(original).join(replacement);
280	}
281	return out;
282	}
283
284	export function shadowScan(renderedText, decisions) {
285	const leaks = [];
286	for (const f of scanText(renderedText)) {
287	if (f.severity === 'soft') continue;
288	const d = decisions[sha256(f.match)];
289	if (d && d.action === 'keep') continue;
290	if (f.match.startsWith('[REDACTED:')) continue;
291	leaks.push(f);
292	}
293	return leaks;
294	}
295
296	export function patchResiduals(text, decisions) {
297	const leaks = shadowScan(text, decisions);
298	if (!leaks.length) return text;
299
300	for (const f of leaks) {
301	const h = sha256(f.match);
302	if (!decisions[h]) {
303	decisions[h] = { action: 'redact', replacement: maskFor(f), ruleId: f.ruleId };
304	}
305	}
306
307	let out = applyDecisions(text, leaks, decisions);
308
309	const residual = shadowScan(out, decisions);
310	if (residual.length) {
311	throw new Error(
312	`patchResiduals: ${residual.length} leak(s) remain after auto-redaction ` +
313	`(${[...new Set(residual.map((l) => l.ruleId))].join(', ')}). Refusing to write.`
314	);
315	}
316	return out;
317	}