Zion Boggan zionboggan.com ↗

Analysis v2: verified security audit trail from agent actions, model attribution, tiered confidence

b61cfed   Zion Boggan committed on Jun 12, 2026 (1 week ago)
src/analyze.js +59 -2
@@ -28,13 +28,25 @@ const REPEATED_FIX_HINT = /\b(still failing|still broken|again|same error|didn'?
const UNDERBUILT_HINT = /\b(underbuilt|missing|not enough|too bare|incomplete|you skipped|you missed)\b/i;
const FORMAT_HINT = /\b(format|json|markdown|schema|same structure|exact output|invalid)\b/i;
+const SECURITY_FILE_RE = /(?:^|[\\/])(?:\.env[^\\/]*|[^\\/]*(?:auth|session|middleware|login|signin|signup|permission|rbac|access[-_]?control|secur|crypto|jwt|oauth|passwd|password|secret|credential|token)[^\\/]*)$/i;
+const RISKY_CMD_RE = /(?:\brm\s+-rf\b|\bchmod\s+777\b|curl[^|]*\|\s*(?:sh|bash)|wget[^|]*\|\s*(?:sh|bash)|--no-verify\b|--force\b|\bDROP\s+TABLE\b|\bTRUNCATE\s+TABLE\b)/i;
+
+function securityActions(node) {
+ return (node.actions || []).filter(
+ (a) => (a.file && SECURITY_FILE_RE.test(a.file)) || (a.command && RISKY_CMD_RE.test(a.command))
+ );
+}
+
export function analyzeTree(tree) {
if (tree.analysis) return tree.analysis;
+ const modelsSeen = new Set();
for (const node of tree.nodes) {
node.failureSignals = [];
node.evalCandidate = false;
node.lessonIds = [];
+ node.model = (node.actions || []).map((a) => a.model).find(Boolean) || null;
+ for (const a of node.actions || []) if (a.model) modelsSeen.add(a.model);
}
const failures = [];
@@ -64,15 +76,17 @@ export function analyzeTree(tree) {
});
};
- const addFailure = ({ type, confidence, failureNode, correctionNode, resolvedNode, evidence, summary }) => {
+ const addFailure = ({ type, confidence, tier = 'inferred', failureNode, correctionNode, resolvedNode, evidence, summary }) => {
if (!FAILURE_TYPES.has(type) || !failureNode) return null;
if (correctionNode && correctionNode.id === failureNode.id) correctionNode = null;
+ const model = failureNode.model || null;
const ids = uniq([failureNode.id, correctionNode?.id, resolvedNode?.id]);
const key = `${type}:${failureNode.id}`;
const existing = failureByKey.get(key);
if (existing) {
if (confidence > existing.confidence) existing.confidence = confidence;
+ if (tierRank(tier) > tierRank(existing.tier)) existing.tier = tier;
const lr = lessonByType.get(type);
if (lr) lr.nodeIds = uniq([...lr.nodeIds, ...ids]);
const er = evalByType.get(evalTypeFor(type));
@@ -116,7 +130,9 @@ export function analyzeTree(tree) {
failureNode.failureSignals.push({
type,
+ tier,
confidence,
+ model,
evidence,
resolvedBy: correctionNode?.id || resolvedNode?.id || null,
});
@@ -126,7 +142,9 @@ export function analyzeTree(tree) {
const failure = {
id: `failure_${pad(failures.length + 1)}`,
type,
+ tier,
confidence,
+ model,
firstSeenNodeId: failureNode.id,
correctedByNodeId: correctionNode?.id || null,
summary,
@@ -141,10 +159,26 @@ export function analyzeTree(tree) {
};
tree.nodes.forEach((node, index) => {
+ const secActs = securityActions(node);
+ if (secActs.length) {
+ const targets = uniq(secActs.map((a) => a.file || a.command)).slice(0, 3);
+ addFailure({
+ type: 'security_or_privacy_risk',
+ confidence: 0.95,
+ tier: 'verified',
+ failureNode: node,
+ correctionNode: node.kind === 'correction' ? null : nearestCorrectionAfter(tree.nodes, index),
+ resolvedNode: nearestAcceptedAfter(tree.nodes, index),
+ evidence: `Agent touched security-sensitive targets: ${targets.map((t) => `"${truncate(String(t), 80)}"`).join(', ')}`,
+ summary: `An agent action touched auth, secrets, or access control near "${truncate(node.title, 90)}".`,
+ });
+ }
+
if (node.status === 'abandoned') {
addFailure({
type: 'abandoned_path',
confidence: 0.9,
+ tier: 'verified',
failureNode: node,
resolvedNode: nearestAcceptedAfter(tree.nodes, index),
evidence: `Branch abandoned after prompt: "${quote(node.text)}"`,
@@ -167,9 +201,13 @@ export function analyzeTree(tree) {
const signals = inferSignals(node);
for (const signal of signals) {
+ const tier = correctionNode ? 'confirmed' : 'inferred';
+ const confidence =
+ tier === 'confirmed' ? Math.max(signal.confidence, 0.82) : Math.min(signal.confidence, 0.7);
addFailure({
type: signal.type,
- confidence: signal.confidence,
+ confidence,
+ tier,
failureNode,
correctionNode,
resolvedNode,
@@ -185,6 +223,8 @@ export function analyzeTree(tree) {
summary: {
totalFailureSignals: failures.length,
topFailureTypes,
+ tierCounts: countTiers(failures),
+ models: [...modelsSeen],
correctionChains: correctionChains.length,
evalCandidates: evalCandidates.length,
lessons: lessons.length,
@@ -323,6 +363,23 @@ function nearestAcceptedAfter(nodes, index) {
return null;
}
+function nearestCorrectionAfter(nodes, index) {
+ for (let i = index + 1; i < nodes.length; i++) {
+ if (nodes[i].status !== 'abandoned' && nodes[i].kind === 'correction') return nodes[i];
+ }
+ return null;
+}
+
+function tierRank(tier) {
+ return tier === 'verified' ? 3 : tier === 'confirmed' ? 2 : 1;
+}
+
+function countTiers(failures) {
+ const counts = { verified: 0, confirmed: 0, inferred: 0 };
+ for (const f of failures) if (counts[f.tier] !== undefined) counts[f.tier]++;
+ return counts;
+}
+
function summarizeFailure(type, failureNode, correctionNode) {
const subject = truncate(failureNode?.title || 'a previous direction', 90);
if (!correctionNode) {
src/report.js +23 -2
@@ -37,7 +37,13 @@ export function renderReportMarkdown(tree, opts = {}) {
if (tree.stats.abandonedBranches) lines.push(`- Abandoned branches: ${tree.stats.abandonedBranches}`);
if (tree.stats.toolUses) lines.push(`- Tool calls: ${tree.stats.toolUses.toLocaleString()}`);
if (tree.stats.filesTouched) lines.push(`- Files touched: ${tree.stats.filesTouched}`);
- lines.push(`- Failure signals: ${analysis.summary.totalFailureSignals}`);
+ const tc = analysis.summary.tierCounts || { verified: 0, confirmed: 0, inferred: 0 };
+ lines.push(
+ `- Failure signals: ${analysis.summary.totalFailureSignals} (verified ${tc.verified}, confirmed ${tc.confirmed}, inferred ${tc.inferred})`
+ );
+ if (analysis.summary.models && analysis.summary.models.length) {
+ lines.push(`- Models seen: ${analysis.summary.models.join(', ')}`);
+ }
lines.push(`- Eval candidates: ${analysis.summary.evalCandidates}`);
lines.push(`- Lessons: ${analysis.summary.lessons}`);
lines.push('');
@@ -63,7 +69,8 @@ export function renderReportMarkdown(tree, opts = {}) {
}
lines.push('');
for (const failure of analysis.failures.slice(0, 8)) {
- lines.push(`- ${failure.id} (${failure.type}, ${confidencePct(failure.confidence)}): ${escapeMd(failure.summary)}`);
+ const meta = [failure.tier, confidencePct(failure.confidence), failure.model].filter(Boolean).join(', ');
+ lines.push(`- ${failure.id} (${failure.type}, ${meta}): ${escapeMd(failure.summary)}`);
}
if (analysis.failures.length > 8) {
lines.push(`- ... ${analysis.failures.length - 8} more in .treetrace/failures.json`);
@@ -71,6 +78,20 @@ export function renderReportMarkdown(tree, opts = {}) {
lines.push('');
}
+ const securityTrail = analysis.failures.filter(
+ (f) => f.type === 'security_or_privacy_risk' && f.tier === 'verified'
+ );
+ if (securityTrail.length) {
+ lines.push('## Security audit trail');
+ lines.push('');
+ lines.push('Every time an agent touched auth, secrets, or access control in this session:');
+ lines.push('');
+ for (const f of securityTrail.slice(0, 10)) {
+ lines.push(`- ${escapeMd(f.evidence)}${f.model ? ` (${f.model})` : ''}`);
+ }
+ lines.push('');
+ }
+
lines.push('## Handoff brief');
lines.push('');
lines.push(demoteHeadings(stripTitle(renderHandoff(tree, opts)), 2));
test/treetrace.test.js +30 -0
@@ -232,6 +232,36 @@ test('analysis: tiny transcript without corrections does not invent failures', (
assert.deepEqual(analysis.failures, []);
});
+test('analysis: a security-sensitive agent action produces a verified, model-attributed signal', () => {
+ const root = {
+ id: 'node_001', text: 'Add rate limiting to checkout', title: 'Add rate limiting to checkout',
+ kind: 'root', status: 'accepted', parent: null,
+ actions: [{ tool: 'Edit', file: 'src/auth/session.ts', command: null, model: 'claude-sonnet-4-6' }],
+ };
+ const correction = {
+ id: 'node_002', text: 'check the existing auth flow first', title: 'check the existing auth flow first',
+ kind: 'correction', status: 'accepted', parent: root, actions: [],
+ };
+ const analysis = analyzeTree({ nodes: [root, correction] });
+ const sec = analysis.failures.find((f) => f.type === 'security_or_privacy_risk');
+ assert.ok(sec, 'expected a verified security signal from the auth-file edit');
+ assert.equal(sec.tier, 'verified');
+ assert.equal(sec.model, 'claude-sonnet-4-6');
+ assert.equal(sec.correctedByNodeId, 'node_002');
+ assert.ok(sec.evidence.includes('session.ts'));
+ assert.deepEqual(analysis.summary.models, ['claude-sonnet-4-6']);
+ assert.ok(analysis.summary.tierCounts.verified >= 1);
+});
+
+test('analysis: a keyword-only correction stays in the inferred or confirmed tier, not verified', () => {
+ const root = { id: 'node_001', text: 'build a dashboard', title: 'build a dashboard', kind: 'root', status: 'accepted', parent: null, actions: [] };
+ const corr = { id: 'node_002', text: 'no, that is overbuilt, keep it minimal', title: 'no, that is overbuilt', kind: 'correction', status: 'accepted', parent: root, actions: [] };
+ const analysis = analyzeTree({ nodes: [root, corr] });
+ assert.ok(analysis.failures.length >= 1);
+ assert.ok(analysis.failures.every((f) => f.tier !== 'verified'));
+ assert.equal(analysis.summary.tierCounts.verified, 0);
+});
+
test('cli: default run writes analysis artifacts with redaction', async () => {
const dir = mkdtempSync(join(tmpdir(), 'treetrace-'));
try {