b61cfed · TreeTrace

Analysis v2: verified security audit trail from agent actions, model attribution, tiered confidence

b61cfed Zion Boggan committed on Jun 12, 2026 (1 week ago)

src/analyze.js +59 -2

		@@ -28,13 +28,25 @@ const REPEATED_FIX_HINT = /\b(still failing\|still broken\|again\|same error\|didn'?
		const UNDERBUILT_HINT = /\b(underbuilt\|missing\|not enough\|too bare\|incomplete\|you skipped\|you missed)\b/i;
		const FORMAT_HINT = /\b(format\|json\|markdown\|schema\|same structure\|exact output\|invalid)\b/i;

	+	const SECURITY_FILE_RE = /(?:^\|[\\/])(?:\.env[^\\/]\|[^\\/](?:auth\|session\|middleware\|login\|signin\|signup\|permission\|rbac\|access[-_]?control\|secur\|crypto\|jwt\|oauth\|passwd\|password\|secret\|credential\|token)[^\\/]*)$/i;
	+	const RISKY_CMD_RE = /(?:\brm\s+-rf\b\|\bchmod\s+777\b\|curl[^\|]\\|\s(?:sh\|bash)\|wget[^\|]\\|\s(?:sh\|bash)\|--no-verify\b\|--force\b\|\bDROP\s+TABLE\b\|\bTRUNCATE\s+TABLE\b)/i;
	+
	+	function securityActions(node) {
	+	return (node.actions \|\| []).filter(
	+	(a) => (a.file && SECURITY_FILE_RE.test(a.file)) \|\| (a.command && RISKY_CMD_RE.test(a.command))
	+	);
	+	}
	+
		export function analyzeTree(tree) {
		if (tree.analysis) return tree.analysis;

	+	const modelsSeen = new Set();
		for (const node of tree.nodes) {
		node.failureSignals = [];
		node.evalCandidate = false;
		node.lessonIds = [];
	+	node.model = (node.actions \|\| []).map((a) => a.model).find(Boolean) \|\| null;
	+	for (const a of node.actions \|\| []) if (a.model) modelsSeen.add(a.model);
		}

		const failures = [];
		@@ -64,15 +76,17 @@ export function analyzeTree(tree) {
		});
		};

	-	const addFailure = ({ type, confidence, failureNode, correctionNode, resolvedNode, evidence, summary }) => {
	+	const addFailure = ({ type, confidence, tier = 'inferred', failureNode, correctionNode, resolvedNode, evidence, summary }) => {
		if (!FAILURE_TYPES.has(type) \|\| !failureNode) return null;
		if (correctionNode && correctionNode.id === failureNode.id) correctionNode = null;
	+	const model = failureNode.model \|\| null;

		const ids = uniq([failureNode.id, correctionNode?.id, resolvedNode?.id]);
		const key = `${type}:${failureNode.id}`;
		const existing = failureByKey.get(key);
		if (existing) {
		if (confidence > existing.confidence) existing.confidence = confidence;
	+	if (tierRank(tier) > tierRank(existing.tier)) existing.tier = tier;
		const lr = lessonByType.get(type);
		if (lr) lr.nodeIds = uniq([...lr.nodeIds, ...ids]);
		const er = evalByType.get(evalTypeFor(type));
		@@ -116,7 +130,9 @@ export function analyzeTree(tree) {

		failureNode.failureSignals.push({
		type,
	+	tier,
		confidence,
	+	model,
		evidence,
		resolvedBy: correctionNode?.id \|\| resolvedNode?.id \|\| null,
		});
		@@ -126,7 +142,9 @@ export function analyzeTree(tree) {
		const failure = {
		id: `failure_${pad(failures.length + 1)}`,
		type,
	+	tier,
		confidence,
	+	model,
		firstSeenNodeId: failureNode.id,
		correctedByNodeId: correctionNode?.id \|\| null,
		summary,
		@@ -141,10 +159,26 @@ export function analyzeTree(tree) {
		};

		tree.nodes.forEach((node, index) => {
	+	const secActs = securityActions(node);
	+	if (secActs.length) {
	+	const targets = uniq(secActs.map((a) => a.file \|\| a.command)).slice(0, 3);
	+	addFailure({
	+	type: 'security_or_privacy_risk',
	+	confidence: 0.95,
	+	tier: 'verified',
	+	failureNode: node,
	+	correctionNode: node.kind === 'correction' ? null : nearestCorrectionAfter(tree.nodes, index),
	+	resolvedNode: nearestAcceptedAfter(tree.nodes, index),
	+	evidence: `Agent touched security-sensitive targets: ${targets.map((t) => `"${truncate(String(t), 80)}"`).join(', ')}`,
	+	summary: `An agent action touched auth, secrets, or access control near "${truncate(node.title, 90)}".`,
	+	});
	+	}
	+
		if (node.status === 'abandoned') {
		addFailure({
		type: 'abandoned_path',
		confidence: 0.9,
	+	tier: 'verified',
		failureNode: node,
		resolvedNode: nearestAcceptedAfter(tree.nodes, index),
		evidence: `Branch abandoned after prompt: "${quote(node.text)}"`,
		@@ -167,9 +201,13 @@ export function analyzeTree(tree) {
		const signals = inferSignals(node);

		for (const signal of signals) {
	+	const tier = correctionNode ? 'confirmed' : 'inferred';
	+	const confidence =
	+	tier === 'confirmed' ? Math.max(signal.confidence, 0.82) : Math.min(signal.confidence, 0.7);
		addFailure({
		type: signal.type,
	-	confidence: signal.confidence,
	+	confidence,
	+	tier,
		failureNode,
		correctionNode,
		resolvedNode,
		@@ -185,6 +223,8 @@ export function analyzeTree(tree) {
		summary: {
		totalFailureSignals: failures.length,
		topFailureTypes,
	+	tierCounts: countTiers(failures),
	+	models: [...modelsSeen],
		correctionChains: correctionChains.length,
		evalCandidates: evalCandidates.length,
		lessons: lessons.length,
		@@ -323,6 +363,23 @@ function nearestAcceptedAfter(nodes, index) {
		return null;
		}

	+	function nearestCorrectionAfter(nodes, index) {
	+	for (let i = index + 1; i < nodes.length; i++) {
	+	if (nodes[i].status !== 'abandoned' && nodes[i].kind === 'correction') return nodes[i];
	+	}
	+	return null;
	+	}
	+
	+	function tierRank(tier) {
	+	return tier === 'verified' ? 3 : tier === 'confirmed' ? 2 : 1;
	+	}
	+
	+	function countTiers(failures) {
	+	const counts = { verified: 0, confirmed: 0, inferred: 0 };
	+	for (const f of failures) if (counts[f.tier] !== undefined) counts[f.tier]++;
	+	return counts;
	+	}
	+
		function summarizeFailure(type, failureNode, correctionNode) {
		const subject = truncate(failureNode?.title \|\| 'a previous direction', 90);
		if (!correctionNode) {

src/report.js +23 -2

		@@ -37,7 +37,13 @@ export function renderReportMarkdown(tree, opts = {}) {
		if (tree.stats.abandonedBranches) lines.push(`- Abandoned branches: ${tree.stats.abandonedBranches}`);
		if (tree.stats.toolUses) lines.push(`- Tool calls: ${tree.stats.toolUses.toLocaleString()}`);
		if (tree.stats.filesTouched) lines.push(`- Files touched: ${tree.stats.filesTouched}`);
	-	lines.push(`- Failure signals: ${analysis.summary.totalFailureSignals}`);
	+	const tc = analysis.summary.tierCounts \|\| { verified: 0, confirmed: 0, inferred: 0 };
	+	lines.push(
	+	`- Failure signals: ${analysis.summary.totalFailureSignals} (verified ${tc.verified}, confirmed ${tc.confirmed}, inferred ${tc.inferred})`
	+	);
	+	if (analysis.summary.models && analysis.summary.models.length) {
	+	lines.push(`- Models seen: ${analysis.summary.models.join(', ')}`);
	+	}
		lines.push(`- Eval candidates: ${analysis.summary.evalCandidates}`);
		lines.push(`- Lessons: ${analysis.summary.lessons}`);
		lines.push('');
		@@ -63,7 +69,8 @@ export function renderReportMarkdown(tree, opts = {}) {
		}
		lines.push('');
		for (const failure of analysis.failures.slice(0, 8)) {
	-	lines.push(`- ${failure.id} (${failure.type}, ${confidencePct(failure.confidence)}): ${escapeMd(failure.summary)}`);
	+	const meta = [failure.tier, confidencePct(failure.confidence), failure.model].filter(Boolean).join(', ');
	+	lines.push(`- ${failure.id} (${failure.type}, ${meta}): ${escapeMd(failure.summary)}`);
		}
		if (analysis.failures.length > 8) {
		lines.push(`- ... ${analysis.failures.length - 8} more in .treetrace/failures.json`);
		@@ -71,6 +78,20 @@ export function renderReportMarkdown(tree, opts = {}) {
		lines.push('');
		}

	+	const securityTrail = analysis.failures.filter(
	+	(f) => f.type === 'security_or_privacy_risk' && f.tier === 'verified'
	+	);
	+	if (securityTrail.length) {
	+	lines.push('## Security audit trail');
	+	lines.push('');
	+	lines.push('Every time an agent touched auth, secrets, or access control in this session:');
	+	lines.push('');
	+	for (const f of securityTrail.slice(0, 10)) {
	+	lines.push(`- ${escapeMd(f.evidence)}${f.model ? ` (${f.model})` : ''}`);
	+	}
	+	lines.push('');
	+	}
	+
		lines.push('## Handoff brief');
		lines.push('');
		lines.push(demoteHeadings(stripTitle(renderHandoff(tree, opts)), 2));

test/treetrace.test.js +30 -0

		@@ -232,6 +232,36 @@ test('analysis: tiny transcript without corrections does not invent failures', (
		assert.deepEqual(analysis.failures, []);
		});

	+	test('analysis: a security-sensitive agent action produces a verified, model-attributed signal', () => {
	+	const root = {
	+	id: 'node_001', text: 'Add rate limiting to checkout', title: 'Add rate limiting to checkout',
	+	kind: 'root', status: 'accepted', parent: null,
	+	actions: [{ tool: 'Edit', file: 'src/auth/session.ts', command: null, model: 'claude-sonnet-4-6' }],
	+	};
	+	const correction = {
	+	id: 'node_002', text: 'check the existing auth flow first', title: 'check the existing auth flow first',
	+	kind: 'correction', status: 'accepted', parent: root, actions: [],
	+	};
	+	const analysis = analyzeTree({ nodes: [root, correction] });
	+	const sec = analysis.failures.find((f) => f.type === 'security_or_privacy_risk');
	+	assert.ok(sec, 'expected a verified security signal from the auth-file edit');
	+	assert.equal(sec.tier, 'verified');
	+	assert.equal(sec.model, 'claude-sonnet-4-6');
	+	assert.equal(sec.correctedByNodeId, 'node_002');
	+	assert.ok(sec.evidence.includes('session.ts'));
	+	assert.deepEqual(analysis.summary.models, ['claude-sonnet-4-6']);
	+	assert.ok(analysis.summary.tierCounts.verified >= 1);
	+	});
	+
	+	test('analysis: a keyword-only correction stays in the inferred or confirmed tier, not verified', () => {
	+	const root = { id: 'node_001', text: 'build a dashboard', title: 'build a dashboard', kind: 'root', status: 'accepted', parent: null, actions: [] };
	+	const corr = { id: 'node_002', text: 'no, that is overbuilt, keep it minimal', title: 'no, that is overbuilt', kind: 'correction', status: 'accepted', parent: root, actions: [] };
	+	const analysis = analyzeTree({ nodes: [root, corr] });
	+	assert.ok(analysis.failures.length >= 1);
	+	assert.ok(analysis.failures.every((f) => f.tier !== 'verified'));
	+	assert.equal(analysis.summary.tierCounts.verified, 0);
	+	});
	+
		test('cli: default run writes analysis artifacts with redaction', async () => {
		const dir = mkdtempSync(join(tmpdir(), 'treetrace-'));
		try {