Zion Boggan zionboggan.com ↗

Tie failure signals to corroboration and time-valid semantic linkage

Stop one benign prompt from minting several contradictory failure types.
inferSignals now skips long pasted documents (over 1200 chars on a
non-correction node) so incidental keyword hits in a spec or strategy
paste no longer fabricate failures, and it returns a single priority-ranked
primary signal per node instead of up to three.

A wording signal only becomes a confirmed failure when corroborated: the
node is a genuine correction, or it shares evidence with the prior node
(an overlapping action file path or meaningful token overlap). Otherwise
no failure is minted.

Replace array-adjacency linkage with timestamp-ordered, evidence-aware
linkage. Failure targets, correctors, and resolvers are chosen by ts so a
corrector or resolver can never precede its failure, and prefer nodes that
share evidence with the failure. Positional fallback is used only at low
confidence and downgrades the signal to the inferred tier. linkChain and
addFailure also drop any corrector or resolver whose timestamp precedes the
failure.

Add tests asserting a benign prompt yields no multi-type failures and that
no corrector or resolver is ever linked with an earlier timestamp than its
failure.
e2e52a2   Zion Boggan committed on Jun 13, 2026 (1 week ago)
src/analyze.js +152 -42
@@ -29,6 +29,27 @@ const REPEATED_FIX_HINT = /\b(still failing|still broken|again|same error|didn'?
const UNDERBUILT_HINT = /\b(underbuilt|missing|not enough|too bare|incomplete|you skipped|you missed)\b/i;
const FORMAT_HINT = /\b(format|json|markdown|schema|same structure|exact output|invalid)\b/i;
+const WORDING_SCAN_MAX_CHARS = 1200;
+const SIGNAL_PRIORITY = [
+ 'ignored_constraint',
+ 'hallucinated_file_or_api',
+ 'wrong_tool_choice',
+ 'repeated_failed_fix',
+ 'scope_drift',
+ 'overbuilt_solution',
+ 'underbuilt_solution',
+ 'dependency_or_environment_mismatch',
+ 'format_violation',
+ 'user_frustration',
+ 'misunderstood_goal',
+];
+const STOPWORDS = new Set([
+ 'the', 'and', 'for', 'this', 'that', 'with', 'you', 'your', 'are', 'was', 'has', 'have',
+ 'not', 'but', 'can', 'all', 'any', 'our', 'out', 'now', 'too', 'also', 'please', 'lol',
+ 'from', 'into', 'just', 'like', 'more', 'some', 'than', 'then', 'them', 'they', 'what',
+ 'when', 'where', 'which', 'will', 'about', 'agent', 'make', 'made', 'show', 'look',
+]);
+
const SECURITY_FILE_RE = /(?:^|[\\/])(?:\.env[^\\/]*|[^\\/]*(?:auth|session|middleware|login|signin|signup|permission|rbac|access[-_]?control|secur|crypto|jwt|oauth|passwd|password|secret|credential|token)[^\\/]*)$/i;
const RISKY_CMD_RE = /(?:\brm\s+-rf\b|\bchmod\s+777\b|curl[^|]*\|\s*(?:sh|bash)|wget[^|]*\|\s*(?:sh|bash)|--no-verify\b|--force\b|\bDROP\s+TABLE\b|\bTRUNCATE\s+TABLE\b)/i;
const SECRET_CONTENT_RE = /(?:\bsource\s+[^\n]*\.env\b|(?:^|[;&|]|\s)\.\s+[^\n]*\.env\b|\.env\.(?:secrets|local|prod|production)\b|\bexport\s+[A-Z0-9_]*(?:_API_KEY|_TOKEN|_SECRET|_PASSWORD|API_KEY|SECRET_KEY|ACCESS_KEY|PRIVATE_KEY)\b|\b(?:wrangler|doppler|vault)\b|\bgh\s+auth\b|\baws\s+configure\b|\bgcloud\s+auth\b|\bkubectl\s+config\s+set-credentials\b)/i;
@@ -75,6 +96,8 @@ export function analyzeTree(tree) {
const linkChain = (type, confidence, failureNode, correctionNode, resolvedNode, summary) => {
if (!correctionNode || correctionNode.id === failureNode.id) return;
+ if (!afterFailure(correctionNode, failureNode)) return;
+ const resolved = resolvedNode && afterFailure(resolvedNode, failureNode) ? resolvedNode : null;
if (correctionChains.some((c) => c.failureNodeId === failureNode.id && c.correctionNodeId === correctionNode.id)) {
return;
}
@@ -82,7 +105,7 @@ export function analyzeTree(tree) {
id: `chain_${pad(correctionChains.length + 1)}`,
failureNodeId: failureNode.id,
correctionNodeId: correctionNode.id,
- resolvedNodeId: resolvedNode?.id || null,
+ resolvedNodeId: resolved?.id || null,
failureType: type,
confidence: confidenceLabel(confidence),
summary,
@@ -92,6 +115,8 @@ export function analyzeTree(tree) {
const addFailure = ({ type, confidence, tier = 'inferred', failureNode, correctionNode, resolvedNode, evidence, summary }) => {
if (!FAILURE_TYPES.has(type) || !failureNode) return null;
if (correctionNode && correctionNode.id === failureNode.id) correctionNode = null;
+ if (correctionNode && !afterFailure(correctionNode, failureNode)) correctionNode = null;
+ if (resolvedNode && !afterFailure(resolvedNode, failureNode)) resolvedNode = null;
const model = failureNode.model || null;
const ids = uniq([failureNode.id, correctionNode?.id, resolvedNode?.id]);
@@ -184,8 +209,8 @@ export function analyzeTree(tree) {
confidence,
tier,
failureNode: node,
- correctionNode: node.kind === 'correction' ? null : nearestCorrectionAfter(tree.nodes, index),
- resolvedNode: nearestAcceptedAfter(tree.nodes, index),
+ correctionNode: node.kind === 'correction' ? null : nearestCorrectionAfter(tree.nodes, node),
+ resolvedNode: nearestAcceptedAfter(tree.nodes, node, null),
evidence: `Agent action touched ${kinds.join(', ')}: ${targets.map((t) => `"${truncate(String(t), 80)}"`).join(', ')}`,
summary: `An agent action touched auth, secrets, or access control near "${truncate(node.title, 90)}".`,
});
@@ -196,7 +221,7 @@ export function analyzeTree(tree) {
tier: 'inferred',
failureNode: node,
correctionNode: null,
- resolvedNode: nearestAcceptedAfter(tree.nodes, index),
+ resolvedNode: nearestAcceptedAfter(tree.nodes, node, null),
evidence: `User stated a security-sensitive intent: "${quote(node.text)}"`,
summary: `A security-sensitive intent was stated near "${truncate(node.title, 90)}".`,
});
@@ -208,7 +233,7 @@ export function analyzeTree(tree) {
confidence: 0.9,
tier: 'verified',
failureNode: node,
- resolvedNode: nearestAcceptedAfter(tree.nodes, index),
+ resolvedNode: nearestAcceptedAfter(tree.nodes, node, null),
evidence: `Branch abandoned after prompt: "${quote(node.text)}"`,
summary: `A side path was abandoned: ${truncate(node.title, 120)}`,
});
@@ -222,20 +247,39 @@ export function analyzeTree(tree) {
PRIVACY_HINT.test(node.text);
if (!shouldAnalyze) return;
- const priorNode = nearestFailureTarget(node, tree.nodes, index);
- const failureNode = priorNode || node;
- const correctionNode = priorNode ? node : null;
- const resolvedNode = nearestAcceptedAfter(tree.nodes, index);
const signals = inferSignals(node);
+ if (!signals.length) return;
+
+ const prior = nearestFailureTarget(node, tree.nodes);
+ const priorNode = prior ? prior.target : null;
+ const corroborated = node.kind === 'correction' || (priorNode && sharesEvidence(priorNode, node));
+
+ let failureNode;
+ let correctionNode;
+ let linkage;
+ if (priorNode && corroborated) {
+ failureNode = priorNode;
+ correctionNode = node;
+ linkage = prior.linkage;
+ } else if (node.kind === 'correction') {
+ failureNode = node;
+ correctionNode = null;
+ linkage = 'positional';
+ } else {
+ return;
+ }
+
+ const resolvedNode = nearestAcceptedAfter(tree.nodes, failureNode, correctionNode);
for (const signal of signals) {
const tier = correctionNode ? 'confirmed' : 'inferred';
- const confidence =
+ let confidence =
tier === 'confirmed' ? Math.max(signal.confidence, 0.82) : Math.min(signal.confidence, 0.7);
+ if (linkage === 'positional') confidence = Math.min(confidence, 0.68);
addFailure({
type: signal.type,
confidence,
- tier,
+ tier: linkage === 'positional' ? 'inferred' : tier,
failureNode,
correctionNode,
resolvedNode,
@@ -370,49 +414,115 @@ export function renderMemoryMarkdown(tree, opts = {}) {
}
function inferSignals(node) {
- const text = node.text;
- const signals = [];
- const push = (type, confidence) => {
- if (!signals.some((s) => s.type === type)) signals.push({ type, confidence });
+ const text = node.text || '';
+ if (node.kind !== 'correction' && text.length > WORDING_SCAN_MAX_CHARS) {
+ return [];
+ }
+ const matched = new Map();
+ const consider = (type, confidence) => {
+ const prev = matched.get(type);
+ if (prev === undefined || confidence > prev) matched.set(type, confidence);
};
- if (SCOPE_DRIFT_HINT.test(text)) push('scope_drift', 0.82);
+ if (SCOPE_DRIFT_HINT.test(text)) consider('scope_drift', 0.82);
if (/\b(i said|you forgot|you ignored|not what i (asked|wanted|meant)|asked for)\b/i.test(text)) {
- push('ignored_constraint', 0.84);
+ consider('ignored_constraint', 0.84);
+ }
+ if (TOOL_HINT.test(text)) consider('dependency_or_environment_mismatch', 0.72);
+ if (/\bwrong tool|wrong library|use .* instead\b/i.test(text)) consider('wrong_tool_choice', 0.78);
+ if (HALLUCINATION_HINT.test(text)) consider('hallucinated_file_or_api', 0.82);
+ if (REPEATED_FIX_HINT.test(text)) consider('repeated_failed_fix', 0.8);
+ if (/\btoo much|overbuilt|scrap .* web app|too heavy\b/i.test(text)) consider('overbuilt_solution', 0.78);
+ if (UNDERBUILT_HINT.test(text)) consider('underbuilt_solution', 0.76);
+ if (FORMAT_HINT.test(text)) consider('format_violation', 0.68);
+ if (FRUSTRATION_HINT.test(text)) consider('user_frustration', 0.72);
+ if (!matched.size && node.kind === 'correction') consider('misunderstood_goal', 0.62);
+
+ if (!matched.size) return [];
+ for (const type of SIGNAL_PRIORITY) {
+ if (matched.has(type)) return [{ type, confidence: matched.get(type) }];
}
- if (TOOL_HINT.test(text)) push('dependency_or_environment_mismatch', 0.72);
- if (/\bwrong tool|wrong library|use .* instead\b/i.test(text)) push('wrong_tool_choice', 0.78);
- if (HALLUCINATION_HINT.test(text)) push('hallucinated_file_or_api', 0.82);
- if (REPEATED_FIX_HINT.test(text)) push('repeated_failed_fix', 0.8);
- if (/\btoo much|overbuilt|scrap .* web app|too heavy\b/i.test(text)) push('overbuilt_solution', 0.78);
- if (UNDERBUILT_HINT.test(text)) push('underbuilt_solution', 0.76);
- if (FORMAT_HINT.test(text)) push('format_violation', 0.68);
- if (FRUSTRATION_HINT.test(text)) push('user_frustration', 0.72);
- if (!signals.length && node.kind === 'correction') push('misunderstood_goal', 0.62);
-
- return signals.slice(0, 3);
+ return [];
}
-function nearestFailureTarget(node, nodes, index) {
- if (node.parent && node.parent.status !== 'abandoned' && node.parent.id !== node.id) return node.parent;
- for (let i = index - 1; i >= 0; i--) {
- if (nodes[i].status !== 'abandoned' && nodes[i].id !== node.id) return nodes[i];
- }
- return null;
+function tsOf(node) {
+ const t = node && node.ts ? new Date(node.ts).getTime() : NaN;
+ return Number.isFinite(t) ? t : null;
+}
+
+function afterFailure(candidate, failureNode) {
+ const ct = tsOf(candidate);
+ const ft = tsOf(failureNode);
+ if (ct === null || ft === null) return true;
+ return ct >= ft;
+}
+
+function actionFiles(node) {
+ return new Set((node.actions || []).map((a) => a.file).filter(Boolean));
+}
+
+function sharedFiles(a, b) {
+ const fa = actionFiles(a);
+ if (!fa.size) return false;
+ for (const f of actionFiles(b)) if (fa.has(f)) return true;
+ return false;
}
-function nearestAcceptedAfter(nodes, index) {
- for (let i = index + 1; i < nodes.length; i++) {
- if (nodes[i].status !== 'abandoned') return nodes[i];
+function tokenSet(node) {
+ const out = new Set();
+ for (const raw of String(node.text || '').toLowerCase().match(/[a-z][a-z0-9_-]{2,}/g) || []) {
+ if (!STOPWORDS.has(raw)) out.add(raw);
}
- return null;
+ return out;
+}
+
+function tokenOverlap(a, b) {
+ const ta = tokenSet(a);
+ if (!ta.size) return 0;
+ const tb = tokenSet(b);
+ let hits = 0;
+ for (const t of tb) if (ta.has(t)) hits++;
+ return hits;
+}
+
+function sharesEvidence(failureNode, candidate) {
+ if (sharedFiles(failureNode, candidate)) return true;
+ return tokenOverlap(failureNode, candidate) >= 3;
}
-function nearestCorrectionAfter(nodes, index) {
- for (let i = index + 1; i < nodes.length; i++) {
- if (nodes[i].status !== 'abandoned' && nodes[i].kind === 'correction') return nodes[i];
+function nearestFailureTarget(node, nodes) {
+ const earlier = nodes.filter(
+ (n) => n.status !== 'abandoned' && n.id !== node.id && afterFailure(node, n)
+ );
+ if (!earlier.length) return null;
+ earlier.sort((a, b) => (tsOf(b) ?? 0) - (tsOf(a) ?? 0));
+ const semantic = earlier.find((n) => sharesEvidence(n, node));
+ if (semantic) return { target: semantic, linkage: 'semantic' };
+ if (node.parent && node.parent.status !== 'abandoned' && node.parent.id !== node.id && afterFailure(node, node.parent)) {
+ return { target: node.parent, linkage: 'positional' };
}
- return null;
+ return { target: earlier[0], linkage: 'positional' };
+}
+
+function nearestAcceptedAfter(nodes, failureNode, correctionNode) {
+ const anchor = correctionNode || failureNode;
+ const later = nodes
+ .filter((n) => n.status !== 'abandoned' && n.id !== failureNode.id && afterFailure(n, anchor))
+ .filter((n) => !correctionNode || n.id !== correctionNode.id);
+ if (!later.length) return null;
+ later.sort((a, b) => (tsOf(a) ?? Infinity) - (tsOf(b) ?? Infinity));
+ const semantic = later.find((n) => sharesEvidence(failureNode, n));
+ return semantic || later[0];
+}
+
+function nearestCorrectionAfter(nodes, failureNode) {
+ const later = nodes.filter(
+ (n) => n.status !== 'abandoned' && n.kind === 'correction' && n.id !== failureNode.id && afterFailure(n, failureNode)
+ );
+ if (!later.length) return null;
+ later.sort((a, b) => (tsOf(a) ?? Infinity) - (tsOf(b) ?? Infinity));
+ const semantic = later.find((n) => sharesEvidence(failureNode, n));
+ return semantic || later[0];
}
function tierRank(tier) {
test/treetrace.test.js +57 -0
@@ -308,6 +308,63 @@ test('analysis: a keyword-only correction stays in the inferred or confirmed tie
assert.equal(analysis.summary.tierCounts.verified, 0);
});
+test('analysis: a single benign prompt does not yield multiple failure types', () => {
+ const root = {
+ id: 'node_001', text: 'build the marketing deck', title: 'build the marketing deck',
+ kind: 'root', status: 'accepted', parent: null, ts: '2026-06-12T14:00:00.000Z', actions: [],
+ };
+ const benign = {
+ id: 'node_002', text: 'and slide an agent to make the decks mobile friendly too please',
+ title: 'make the decks mobile friendly', kind: 'direction', status: 'accepted', parent: root,
+ ts: '2026-06-12T14:52:00.000Z', actions: [],
+ };
+ const longPaste = {
+ id: 'node_003',
+ text: 'ok sounds good i agree. ' + 'do not overbuild it, it is too much, try again later if it keeps failing. '.repeat(40),
+ title: 'long strategy paste', kind: 'checkpoint', status: 'accepted', parent: benign,
+ ts: '2026-06-12T12:52:00.000Z', actions: [],
+ };
+ const analysis = analyzeTree({ nodes: [root, benign, longPaste] });
+ const benignFailures = analysis.failures.filter((f) => f.firstSeenNodeId === 'node_002');
+ assert.equal(benignFailures.length, 0, 'a benign request should not mint failures from wording alone');
+ for (const id of ['node_001', 'node_002', 'node_003']) {
+ const types = analysis.failures.filter((f) => f.firstSeenNodeId === id).map((f) => f.type);
+ assert.ok(new Set(types).size <= 1, `node ${id} emitted multiple failure types: ${types.join(', ')}`);
+ }
+});
+
+test('analysis: a corrector is never linked with an earlier timestamp than its failure', () => {
+ const failure = {
+ id: 'node_001', text: 'i do not see the deck, just the index file showing text',
+ title: 'deck not rendering', kind: 'direction', status: 'accepted', parent: null,
+ ts: '2026-06-12T14:06:20.000Z',
+ actions: [{ tool: 'Edit', file: 'site/deck/index.html', command: null, input: null, model: 'claude-opus-4-8' }],
+ };
+ const earlier = {
+ id: 'node_002', text: 'no that is wrong, the deck still does not work, redo it instead',
+ title: 'still broken', kind: 'correction', status: 'accepted', parent: failure,
+ ts: '2026-06-12T12:52:00.000Z',
+ actions: [{ tool: 'Edit', file: 'site/deck/index.html', command: null, input: null, model: 'claude-opus-4-8' }],
+ };
+ const analysis = analyzeTree({ nodes: [failure, earlier] });
+ const byId = { node_001: failure, node_002: earlier };
+ for (const f of analysis.failures) {
+ if (!f.correctedByNodeId) continue;
+ const ft = new Date(byId[f.firstSeenNodeId].ts).getTime();
+ const ct = new Date(byId[f.correctedByNodeId].ts).getTime();
+ assert.ok(ct >= ft, `failure ${f.id} corrected by an earlier-timestamped node`);
+ }
+ for (const c of analysis.correctionChains) {
+ const ft = new Date(byId[c.failureNodeId].ts).getTime();
+ const ct = new Date(byId[c.correctionNodeId].ts).getTime();
+ assert.ok(ct >= ft, `chain ${c.id} links a corrector that precedes its failure`);
+ if (c.resolvedNodeId) {
+ const rt = new Date(byId[c.resolvedNodeId].ts).getTime();
+ assert.ok(rt >= ft, `chain ${c.id} resolves before its failure`);
+ }
+ }
+});
+
test('cli: default run writes analysis artifacts with redaction', async () => {
const dir = mkdtempSync(join(tmpdir(), 'treetrace-'));
try {