Zion Boggan zionboggan.com ↗

Add analysis layer: failure signals, correction chains, lessons, eval candidates, agent memory

9a4a23e   Zion Boggan committed on Jun 12, 2026 (1 week ago)
package.json +8 -4
@@ -1,7 +1,7 @@
{
"name": "treetrace",
- "version": "0.1.0",
- "description": "Turn AI coding sessions into a clean, shareable PROMPT_TREE.md - the prompt lineage that built your project.",
+ "version": "0.2.0",
+ "description": "Turn AI coding sessions into regression-ready prompt lineage, failure analysis, eval cases, and handoff memory.",
"keywords": [
"claude-code",
"prompt",
@@ -10,7 +10,10 @@
"ai",
"transcript",
"lineage",
- "agent"
+ "agent",
+ "evals",
+ "regression",
+ "memory"
],
"license": "MIT",
"author": "Zion Boggan",
@@ -27,6 +30,7 @@
"node": ">=18"
},
"scripts": {
- "test": "node --test test/"
+ "test": "node --test test/treetrace.test.js",
+ "prepublishOnly": "node --test test/treetrace.test.js"
}
}
src/analyze.js +393 -0
@@ -0,0 +1,393 @@
+import { truncate } from './util.js';
+
+const FAILURE_TYPES = new Set([
+ 'ignored_constraint',
+ 'misunderstood_goal',
+ 'scope_drift',
+ 'wrong_tool_choice',
+ 'hallucinated_file_or_api',
+ 'repeated_failed_fix',
+ 'overbuilt_solution',
+ 'underbuilt_solution',
+ 'security_or_privacy_risk',
+ 'dependency_or_environment_mismatch',
+ 'format_violation',
+ 'user_frustration',
+ 'abandoned_path',
+]);
+
+const CORRECTION_HINT =
+ /\b(no|stop|scrap|not that|you forgot|you ignored|that's wrong|that is wrong|i said|instead|redo|re do|go back|wrong|doesn'?t work|didn'?t work|still (failing|broken|wrong|bad)|not what i (asked|wanted|meant))\b/i;
+const FRUSTRATION_HINT =
+ /\b(sucks|awful|god awful|what the heck|wtf|mad|angry|frustrat|not suffic|i don'?t trust|terrible|bad)\b/i;
+const PRIVACY_HINT = /\b(secret|token|api key|apikey|password|redact|privacy|private|local-first|telemetry|upload|cloud)\b/i;
+const SCOPE_DRIFT_HINT = /\b(don'?t add|do not add|not a web app|keep it local|too much|overbuilt|scope drift|stay focused|same format|keep .* cli|zero-config cli)\b/i;
+const TOOL_HINT = /\b(wrong tool|wrong library|use .* instead|don'?t use|dependency|package|environment|node version|python version|missing module)\b/i;
+const HALLUCINATION_HINT = /\b(hallucinat|doesn'?t exist|does not exist|no such file|fake file|fake api|made up)\b/i;
+const REPEATED_FIX_HINT = /\b(still failing|still broken|again|same error|didn'?t fix|doesn'?t fix|keeps? failing)\b/i;
+const UNDERBUILT_HINT = /\b(underbuilt|missing|not enough|too bare|incomplete|you skipped|you missed)\b/i;
+const FORMAT_HINT = /\b(format|json|markdown|schema|same structure|exact output|invalid)\b/i;
+
+export function analyzeTree(tree) {
+ if (tree.analysis) return tree.analysis;
+
+ for (const node of tree.nodes) {
+ node.failureSignals = [];
+ node.evalCandidate = false;
+ node.lessonIds = [];
+ }
+
+ const failures = [];
+ const correctionChains = [];
+ const lessons = [];
+ const evalCandidates = [];
+
+ const addFailure = ({ type, confidence, failureNode, correctionNode, resolvedNode, evidence, summary }) => {
+ if (!FAILURE_TYPES.has(type) || !failureNode) return null;
+
+ const failureId = `failure_${String(failures.length + 1).padStart(3, '0')}`;
+ const lessonId = `lesson_${String(lessons.length + 1).padStart(3, '0')}`;
+ const evalId = `eval_${String(evalCandidates.length + 1).padStart(3, '0')}`;
+ const lesson = lessonFor(type, correctionNode || failureNode);
+
+ const signal = {
+ type,
+ confidence,
+ evidence,
+ resolvedBy: correctionNode?.id || resolvedNode?.id || null,
+ };
+ failureNode.failureSignals.push(signal);
+ failureNode.evalCandidate = true;
+ failureNode.lessonIds.push(lessonId);
+
+ const failure = {
+ id: failureId,
+ type,
+ confidence,
+ firstSeenNodeId: failureNode.id,
+ correctedByNodeId: correctionNode?.id || null,
+ summary,
+ evidence,
+ lesson: lesson.text,
+ evalCandidate: true,
+ };
+ failures.push(failure);
+
+ lessons.push({
+ id: lessonId,
+ title: lesson.title,
+ nodeIds: [...new Set([failureNode.id, correctionNode?.id, resolvedNode?.id].filter(Boolean))],
+ text: lesson.text,
+ });
+
+ evalCandidates.push({
+ id: evalId,
+ source: 'treetrace',
+ type: evalTypeFor(type),
+ task: evalTaskFor(type),
+ context: summary,
+ input: 'Continue development of the project while preserving the corrected direction and constraints.',
+ expected_behavior: expectedBehaviorFor(type),
+ failure_mode: failureModeFor(type),
+ sourceNodeIds: [...new Set([failureNode.id, correctionNode?.id, resolvedNode?.id].filter(Boolean))],
+ });
+
+ if (correctionNode) {
+ correctionChains.push({
+ id: `chain_${String(correctionChains.length + 1).padStart(3, '0')}`,
+ failureNodeId: failureNode.id,
+ correctionNodeId: correctionNode.id,
+ resolvedNodeId: resolvedNode?.id || null,
+ failureType: type,
+ confidence: confidenceLabel(confidence),
+ summary,
+ });
+ }
+
+ return failure;
+ };
+
+ tree.nodes.forEach((node, index) => {
+ if (node.status === 'abandoned') {
+ addFailure({
+ type: 'abandoned_path',
+ confidence: 0.9,
+ failureNode: node,
+ resolvedNode: nearestAcceptedAfter(tree.nodes, index),
+ evidence: `Branch abandoned after prompt: "${quote(node.text)}"`,
+ summary: `A side path was abandoned: ${truncate(node.title, 120)}`,
+ });
+ return;
+ }
+
+ const shouldAnalyze =
+ node.kind === 'correction' ||
+ CORRECTION_HINT.test(node.text) ||
+ FRUSTRATION_HINT.test(node.text) ||
+ PRIVACY_HINT.test(node.text);
+ if (!shouldAnalyze) return;
+
+ const failureNode = nearestFailureTarget(node, tree.nodes, index);
+ const resolvedNode = nearestAcceptedAfter(tree.nodes, index);
+ const signals = inferSignals(node);
+
+ for (const signal of signals) {
+ addFailure({
+ type: signal.type,
+ confidence: signal.confidence,
+ failureNode,
+ correctionNode: node,
+ resolvedNode,
+ evidence: `User said: "${quote(node.text)}"`,
+ summary: summarizeFailure(signal.type, failureNode, node),
+ });
+ }
+ });
+
+ const topFailureTypes = countTypes(failures);
+ tree.analysis = {
+ schemaVersion: '0.2',
+ summary: {
+ totalFailureSignals: failures.length,
+ topFailureTypes,
+ correctionChains: correctionChains.length,
+ evalCandidates: evalCandidates.length,
+ lessons: lessons.length,
+ },
+ failures,
+ correctionChains,
+ lessons,
+ evalCandidates,
+ };
+ return tree.analysis;
+}
+
+export function renderFailuresJson(tree, opts = {}) {
+ const analysis = analyzeTree(tree);
+ return {
+ schemaVersion: '0.2',
+ project: projectBlock(opts),
+ summary: analysis.summary,
+ failures: analysis.failures,
+ correctionChains: analysis.correctionChains,
+ };
+}
+
+export function renderLessonsMarkdown(tree, opts = {}) {
+ const analysis = analyzeTree(tree);
+ const lines = ['# TreeTrace Lessons', ''];
+ if (!analysis.lessons.length) {
+ lines.push('No high-confidence failure lessons were detected in this session.');
+ lines.push('');
+ return lines.join('\n');
+ }
+ analysis.lessons.forEach((lesson, i) => {
+ lines.push(`## ${i + 1}. ${lesson.title}`);
+ lines.push('');
+ lines.push(lesson.text);
+ lines.push('');
+ lines.push(`Source nodes: ${lesson.nodeIds.join(', ')}`);
+ lines.push('');
+ });
+ return lines.join('\n');
+}
+
+export function renderEvalsJsonl(tree) {
+ const analysis = analyzeTree(tree);
+ return analysis.evalCandidates.map((e) => JSON.stringify(e)).join('\n') + (analysis.evalCandidates.length ? '\n' : '');
+}
+
+export function renderMemoryMarkdown(tree, opts = {}) {
+ const analysis = analyzeTree(tree);
+ const projectName = opts.projectName || 'this project';
+ const lines = [`# TreeTrace Agent Memory`, ''];
+ lines.push(`Project: ${projectName}`);
+ lines.push('');
+ lines.push('## Durable project constraints');
+ lines.push('');
+ lines.push('- Keep the project local-first and privacy-first.');
+ lines.push('- Treat the structured outputs as the core product.');
+ lines.push('- Keep the human-readable report as one artifact among several.');
+ lines.push('');
+ lines.push('');
+ lines.push('## Lessons from this lineage');
+ lines.push('');
+ if (analysis.lessons.length) {
+ for (const lesson of analysis.lessons.slice(0, 8)) lines.push(`- ${lesson.text}`);
+ } else {
+ lines.push('- No high-confidence failure lessons were detected yet.');
+ }
+ lines.push('');
+ lines.push('## Known bad paths');
+ lines.push('');
+ const badPaths = analysis.failures.filter((f) => f.type === 'abandoned_path').slice(0, 6);
+ if (badPaths.length) {
+ for (const failure of badPaths) lines.push(`- ${failure.summary}`);
+ } else {
+ lines.push('');
+ lines.push('- Do not narrow the project to only a README generator.');
+ }
+ lines.push('');
+ lines.push('## Preferred next work');
+ lines.push('');
+ lines.push('- Improve the failure-signal heuristics with real fixtures.');
+ lines.push('- Add a compare mode for baseline and candidate exports.');
+ lines.push('');
+ lines.push('');
+ return lines.join('\n');
+}
+
+function inferSignals(node) {
+ const text = node.text;
+ const signals = [];
+ const push = (type, confidence) => {
+ if (!signals.some((s) => s.type === type)) signals.push({ type, confidence });
+ };
+
+ if (SCOPE_DRIFT_HINT.test(text)) push('scope_drift', 0.82);
+ if (/\b(i said|you forgot|you ignored|not what i (asked|wanted|meant)|asked for)\b/i.test(text)) {
+ push('ignored_constraint', 0.84);
+ }
+ if (TOOL_HINT.test(text)) push('dependency_or_environment_mismatch', 0.72);
+ if (/\bwrong tool|wrong library|use .* instead\b/i.test(text)) push('wrong_tool_choice', 0.78);
+ if (HALLUCINATION_HINT.test(text)) push('hallucinated_file_or_api', 0.82);
+ if (REPEATED_FIX_HINT.test(text)) push('repeated_failed_fix', 0.8);
+ if (/\btoo much|overbuilt|scrap .* web app|too heavy\b/i.test(text)) push('overbuilt_solution', 0.78);
+ if (UNDERBUILT_HINT.test(text)) push('underbuilt_solution', 0.76);
+ if (PRIVACY_HINT.test(text)) push('security_or_privacy_risk', 0.7);
+ if (FORMAT_HINT.test(text)) push('format_violation', 0.68);
+ if (FRUSTRATION_HINT.test(text)) push('user_frustration', 0.72);
+ if (!signals.length && node.kind === 'correction') push('misunderstood_goal', 0.62);
+
+ return signals.slice(0, 3);
+}
+
+function nearestFailureTarget(node, nodes, index) {
+ if (node.parent && node.parent.status !== 'abandoned') return node.parent;
+ for (let i = index - 1; i >= 0; i--) {
+ if (nodes[i].status !== 'abandoned') return nodes[i];
+ }
+ return node;
+}
+
+function nearestAcceptedAfter(nodes, index) {
+ for (let i = index + 1; i < nodes.length; i++) {
+ if (nodes[i].status !== 'abandoned') return nodes[i];
+ }
+ return null;
+}
+
+function summarizeFailure(type, failureNode, correctionNode) {
+ const subject = truncate(failureNode?.title || 'a previous direction', 90);
+ const correction = truncate(correctionNode?.title || 'a later correction', 90);
+ switch (type) {
+ case 'ignored_constraint':
+ return `A prior direction appears to have ignored a user constraint near "${subject}"; corrected by "${correction}".`;
+ case 'scope_drift':
+ return `The session drifted from the intended scope near "${subject}"; corrected by "${correction}".`;
+ case 'overbuilt_solution':
+ return `The work appears to have overbuilt the requested shape near "${subject}"; corrected by "${correction}".`;
+ case 'underbuilt_solution':
+ return `The work appears to have underbuilt or skipped expected scope near "${subject}"; corrected by "${correction}".`;
+ case 'security_or_privacy_risk':
+ return `A privacy or security boundary became important near "${subject}"; reinforced by "${correction}".`;
+ case 'user_frustration':
+ return `User frustration signaled that the prior path near "${subject}" was not meeting expectations.`;
+ case 'repeated_failed_fix':
+ return `A fix loop appears to have repeated near "${subject}"; corrected by "${correction}".`;
+ default:
+ return `A possible ${type.replace(/_/g, ' ')} occurred near "${subject}"; corrected by "${correction}".`;
+ }
+}
+
+function lessonFor(type, node) {
+ const prompt = truncate(node?.text || '', 180);
+ const titles = {
+ ignored_constraint: 'Preserve explicit constraints',
+ misunderstood_goal: 'Re-check the actual goal',
+ scope_drift: 'Keep scope boundaries durable',
+ wrong_tool_choice: 'Choose tools from the repo context',
+ hallucinated_file_or_api: 'Verify files and APIs before acting',
+ repeated_failed_fix: 'Break repeated fix loops',
+ overbuilt_solution: 'Avoid overbuilding beyond the requested shape',
+ underbuilt_solution: 'Do not skip required scope',
+ security_or_privacy_risk: 'Treat privacy boundaries as product requirements',
+ dependency_or_environment_mismatch: 'Respect the local environment',
+ format_violation: 'Preserve requested output formats',
+ user_frustration: 'Escalate when user frustration appears',
+ abandoned_path: 'Avoid abandoned paths unless explicitly revived',
+ };
+ const text = {
+ ignored_constraint: 'Future agents should carry explicit user constraints forward as high-priority requirements.',
+ misunderstood_goal: 'Future agents should restate and verify the goal before continuing after a correction.',
+ scope_drift: 'Future agents should preserve the corrected scope and avoid adding unrequested product shape.',
+ wrong_tool_choice: 'Future agents should prefer tools and dependencies already supported by the repo and environment.',
+ hallucinated_file_or_api: 'Future agents should verify that referenced files, commands, and APIs exist before relying on them.',
+ repeated_failed_fix: 'Future agents should stop and reassess after repeated failed fixes instead of applying another blind patch.',
+ overbuilt_solution: 'Future agents should prefer the smallest implementation that satisfies the corrected product direction.',
+ underbuilt_solution: 'Future agents should check that all explicitly requested behavior is represented before claiming completion.',
+ security_or_privacy_risk: 'Future agents should not weaken local-first privacy, redaction, or no-network guarantees without explicit approval.',
+ dependency_or_environment_mismatch: 'Future agents should validate environment assumptions before choosing dependencies or runtime paths.',
+ format_violation: 'Future agents should preserve requested output formats exactly unless the user approves a change.',
+ user_frustration: 'Future agents should treat frustration as a signal to slow down, verify assumptions, and correct course.',
+ abandoned_path: 'Future agents should avoid resurrecting abandoned branches unless the user explicitly asks for them.',
+ };
+ return {
+ title: titles[type] || 'Preserve the correction',
+ text: `${text[type] || 'Future agents should preserve this correction.'}${prompt ? ` Evidence: "${prompt}"` : ''}`,
+ };
+}
+
+function evalTypeFor(type) {
+ if (type === 'security_or_privacy_risk') return 'privacy_boundary_preservation';
+ if (type === 'scope_drift' || type === 'overbuilt_solution') return 'scope_drift_detection';
+ if (type === 'ignored_constraint' || type === 'format_violation') return 'constraint_preservation';
+ if (type === 'wrong_tool_choice' || type === 'dependency_or_environment_mismatch') return 'tool_choice_regression';
+ if (type === 'abandoned_path') return 'correction_adherence';
+ return 'instruction_following_regression';
+}
+
+function evalTaskFor(type) {
+ if (type === 'security_or_privacy_risk') return 'Continue development while preserving privacy and redaction boundaries.';
+ if (type === 'scope_drift') return 'Continue development without drifting outside the corrected scope.';
+ if (type === 'format_violation') return 'Continue development while preserving the requested output format.';
+ return 'Continue development while preserving the corrected direction from the session lineage.';
+}
+
+function expectedBehaviorFor(type) {
+ const common = ['Use the corrected prompt lineage as durable context', 'Do not repeat the documented failure mode'];
+ if (type === 'security_or_privacy_risk') return ['Preserve local-first behavior', 'Do not add telemetry or uploads', 'Keep redaction fail-closed', ...common];
+ if (type === 'scope_drift') return ['Stay inside the corrected scope', 'Do not add unrequested product surfaces', ...common];
+ if (type === 'ignored_constraint') return ['Carry explicit user constraints forward', 'Check new work against those constraints', ...common];
+ if (type === 'format_violation') return ['Preserve the requested format', 'Validate generated artifacts', ...common];
+ return common;
+}
+
+function failureModeFor(type) {
+ return `Agent repeats ${type.replace(/_/g, ' ')} despite prior correction.`;
+}
+
+function confidenceLabel(score) {
+ if (score >= 0.8) return 'high';
+ if (score >= 0.65) return 'medium';
+ return 'low';
+}
+
+function countTypes(failures) {
+ const counts = new Map();
+ for (const failure of failures) counts.set(failure.type, (counts.get(failure.type) || 0) + 1);
+ return [...counts.entries()]
+ .sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))
+ .map(([type, count]) => ({ type, count }));
+}
+
+function projectBlock(opts) {
+ return {
+ name: opts.projectName || null,
+ generatedAt: opts.generatedAt || null,
+ };
+}
+
+function quote(text) {
+ return truncate(String(text || '').replace(/\s+/g, ' '), 240).replace(/"/g, '\\"');
+}