| @@ -0,0 +1,23 @@ | ||
| + | name: ci | |
| + | on: | |
| + | push: | |
| + | branches: [main] | |
| + | pull_request: | |
| + | ||
| + | jobs: | |
| + | test: | |
| + | runs-on: ubuntu-latest | |
| + | strategy: | |
| + | matrix: | |
| + | node: [18, 20, 22] | |
| + | steps: | |
| + | - uses: actions/checkout@v4 | |
| + | - uses: actions/setup-node@v4 | |
| + | with: | |
| + | node-version: ${{ matrix.node }} | |
| + | - run: node --test test/ | |
| + | - name: CLI smoke test (fixture, fail-closed redaction) | |
| + | run: | | |
| + | node bin/treetrace.js --file test/fixtures/synthetic-session.jsonl --dir "$RUNNER_TEMP" --redact-auto --quiet | |
| + | grep -q "REDACTED" "$RUNNER_TEMP/PROMPT_TREE.md" | |
| + | ! grep -q "sk-ant-" "$RUNNER_TEMP/PROMPT_TREE.md" |
| @@ -0,0 +1,91 @@ | ||
| + | # 🌳 treetrace | |
| + | ||
| + | **Your repo says what you built. `PROMPT_TREE.md` says how.** | |
| + | ||
| + | treetrace reads the AI coding sessions already sitting on your disk and turns them into a clean, shareable prompt lineage - the root idea, the directions, the corrections, the dead ends, and the path that shipped. | |
| + | ||
| + | ```bash | |
| + | cd your-project | |
| + | npx treetrace | |
| + | ``` | |
| + | ||
| + | Thirty seconds later: | |
| + | ||
| + | ``` | |
| + | 🌳 your-project - 41 prompts · 6 sessions · 9 days · 3 ↩ corrections · 1 ✗ abandoned · 1,204 tool calls | |
| + | ⬢ Build a tool that turns AI chat logs into a prompt tree | |
| + | → Make it agent-agnostic so it works with any transcript | |
| + | ↩ No, scrap the web app - make it a zero-config CLI | |
| + | ⚑ Add a redaction gate so secrets never reach the export | |
| + | ◆ Ship it: README, schema, examples | |
| + | ||
| + | ✓ wrote PROMPT_TREE.md and .treetrace/tree.json | |
| + | ``` | |
| + | ||
| + | No accounts. No uploads. No config. Your transcripts never leave your machine. | |
| + | ||
| + | ## Why | |
| + | ||
| + | Projects are increasingly built through hundreds of prompts - and that history evaporates into chat logs nobody reopens. The prompt lineage is the **how** of modern software: | |
| + | ||
| + | - **Show your work.** "Built with AI" invites slop-skepticism; a visible, honest prompt tree is the receipt. | |
| + | - **Hand off cleanly.** `treetrace --handoff` distills the lineage into a context pack for the next agent (or the next human): goal, accepted decisions, constraints learned the hard way, known dead ends. | |
| + | - **Teach and compare.** The fastest way to get better at directing agents is reading how others do it. | |
| + | - **Audit-friendly.** Every node links back to its source event ID in your local transcript. | |
| + | ||
| + | ## What it does | |
| + | ||
| + | 1. **Discovers** Claude Code session files for your project (`~/.claude/projects/...`) - or imports any transcript via `--file` / `--stdin`. | |
| + | 2. **Extracts** the meaningful human prompts; tool noise, slash commands, "continue" nudges, and subagent chatter are filtered or folded. | |
| + | 3. **Classifies** each prompt: `⬢` root · `→` direction · `↩` correction · `⚑` scope change · `◆` checkpoint - and detects genuinely abandoned branches (`✗`) from real rewind topology, not guesswork. | |
| + | 4. **Gates** every export behind a secret scan. Nothing is written until each hit is resolved (`redact` / `keep` / `edit`). Outside a TTY, every hit is auto-redacted - treetrace **fails closed**. | |
| + | 5. **Exports** `PROMPT_TREE.md` (for humans, GitHub-ready), `.treetrace/tree.json` (open schema, [SCHEMA.md](SCHEMA.md)), and `--handoff` briefs (for agents). | |
| + | ||
| + | ## The redaction gate | |
| + | ||
| + | A privacy-positioned tool gets exactly one chance with your secrets, so this is the most engineered part of treetrace: | |
| + | ||
| + | - Curated provider rules (AWS, GitHub, GitLab, Anthropic, OpenAI, Slack, Stripe, npm, Tailscale, Google, SendGrid, Twilio, Telegram, Discord webhooks, JWTs, private key blocks, WireGuard, basic-auth URLs, bearer tokens, secret assignments) plus a high-entropy fallback. | |
| + | - Interactive review of every unique hit before anything is written. | |
| + | - A **shadow scan** re-checks the final rendered artifact; an unresolved hit aborts the write. | |
| + | - Your decisions persist in `.treetrace/redactions.json` as salted-free **hashes only** - the file never contains a secret and re-runs never re-ask. | |
| + | ||
| + | ## Usage | |
| + | ||
| + | ```bash | |
| + | npx treetrace # trace this project | |
| + | npx treetrace --handoff # agent-ready brief to stdout (pipe into your next agent) | |
| + | npx treetrace --handoff | claude -p "Read this handoff brief and continue the project" | |
| + | npx treetrace --file session.jsonl # specific transcript(s) | |
| + | npx treetrace --stdin < chat-export.txt # pasted transcript (User:/Assistant: markers) | |
| + | npx treetrace --titles-only # compact tree, no full prompt texts | |
| + | npx treetrace --redact-auto # redact every hit without prompting | |
| + | npx treetrace --since 2026-06-01 | |
| + | ``` | |
| + | ||
| + | ## Sources | |
| + | ||
| + | | Source | Status | | |
| + | |--------|--------| | |
| + | | Claude Code (`~/.claude/projects` JSONL) | ✅ built-in, zero-config | | |
| + | | Pasted / plain-text transcripts (`User:` / `Assistant:` markers) | ✅ built-in | | |
| + | | Codex CLI, Cursor, SpecStory, ChatGPT export | 🚧 importers welcome - [open an issue](https://github.com/zionboggan/treetrace/issues) | | |
| + | ||
| + | ## The format | |
| + | ||
| + | `PROMPT_TREE.md` is a convention, not a lock-in: commit it at your repo root the way you commit `AGENTS.md`. The machine-readable lineage (`.treetrace/tree.json`) uses an open nodes/edges schema documented in [SCHEMA.md](SCHEMA.md), designed to compose with the [Agent Trace](https://agent-trace.dev/) RFC - Agent Trace records that code was AI-attributed; treetrace records the conversation structure that shaped it. | |
| + | ||
| + | ## Privacy promises | |
| + | ||
| + | - Local-first: no network calls, no telemetry, no accounts. Ever. | |
| + | - Raw transcripts are read, never copied, never exported. | |
| + | - Prompt-only by default: assistant output stays out of your exports. | |
| + | - Fails closed: un-reviewed secrets cannot reach a written artifact. | |
| + | ||
| + | ## License | |
| + | ||
| + | MIT © Zion Boggan | |
| + | ||
| + | --- | |
| + | ||
| + | *This repository ships its own [PROMPT_TREE.md](PROMPT_TREE.md) - the prompt tree of the tool that makes prompt trees.* |
| @@ -0,0 +1,69 @@ | ||
| + | # treetrace lineage schema v0.1 | |
| + | ||
| + | `.treetrace/tree.json` is an open, vendor-neutral format for the **prompt lineage** of an AI-assisted project: the tree of human instructions - branches, corrections, scope changes, dead ends, and the accepted path - that produced a result. | |
| + | ||
| + | It deliberately occupies the layer existing standards leave open: | |
| + | ||
| + | | Layer | Standard | What it records | | |
| + | |-------|----------|-----------------| | |
| + | | Code attribution | [Agent Trace](https://agent-trace.dev/) | which lines were AI-generated, by which model, linked to which conversation | | |
| + | | Runtime telemetry | OpenTelemetry `gen_ai` | per-call spans for operators, ephemeral | | |
| + | | Build integrity | SLSA / in-toto | signed provenance of artifacts | | |
| + | | **Conversation structure** | **treetrace (this document)** | **the human prompt lineage: what was asked, in what order, what was corrected, what was abandoned** | | |
| + | ||
| + | ## Top-level shape | |
| + | ||
| + | ```jsonc | |
| + | { | |
| + | "schemaVersion": "0.1", | |
| + | "generator": { "name": "treetrace", "version": "0.1.0", "url": "..." }, | |
| + | "project": { "name": "...", "generatedAt": "ISO-8601", "sourceType": "claude-code-jsonl" }, | |
| + | "stats": { "prompts": 41, "sessions": 6, "days": 9, "corrections": 3, "...": "..." }, | |
| + | "sessions": [ { "id": "...", "title": "...", "firstTs": "...", "lastTs": "...", "promptCount": 7, "isContinuation": false } ], | |
| + | "nodes": [ /* PromptNode */ ], | |
| + | "edges": [ /* Edge */ ] | |
| + | } | |
| + | ``` | |
| + | ||
| + | ## PromptNode | |
| + | ||
| + | | Field | Type | Meaning | | |
| + | |-------|------|---------| | |
| + | | `id` | string | stable within the file (`node_001`…) | | |
| + | | `parentId` | string \| null | lineage parent (null = root) | | |
| + | | `role` | `"user"` | reserved for future system/developer nodes | | |
| + | | `kind` | enum | `root` · `direction` · `correction` · `scope-change` · `checkpoint` · `question` | | |
| + | | `title` | string | first-sentence distillation | | |
| + | | `text` | string | full prompt text **after** redaction | | |
| + | | `status` | enum | `accepted` · `abandoned` (off the accepted path via real rewind topology) | | |
| + | | `nudges` | number | folded "continue"-style acknowledgements | | |
| + | | `session` | string | session id this prompt came from | | |
| + | | `timestamp` | string \| null | ISO-8601 | | |
| + | | `sourceEventIds` | string[] | record UUIDs inside the **local** source transcript (audit link; transcripts themselves are never exported) | | |
| + | ||
| + | ## Edge | |
| + | ||
| + | ```jsonc | |
| + | { "from": "node_001", "to": "node_002", "relationship": "refines" } | |
| + | ``` | |
| + | ||
| + | `relationship` is derived from the child's `kind`: `refines` (direction), `corrects` (correction), `expands` (scope-change), `checkpoints` (checkpoint), `asks` (question). | |
| + | ||
| + | ## Composing with Agent Trace | |
| + | ||
| + | An Agent Trace record attributes file/line ranges to a conversation URL or ID. A treetrace export can be referenced as that conversation's **structural summary**: | |
| + | ||
| + | - Agent Trace `conversation` → treetrace `sessions[].id` | |
| + | - Agent Trace line-range records → the work performed *between* two treetrace nodes (bounded by `sourceEventIds`) | |
| + | ||
| + | This keeps responsibilities clean: Agent Trace answers *"which code came from AI?"*; treetrace answers *"what was the human actually steering?"*. Emitting both gives line-level attribution **and** human-readable narrative. | |
| + | ||
| + | ## Mapping to W3C PROV | |
| + | ||
| + | For provenance tooling: each `PromptNode` is a `prov:Activity` (instruction issuance) by a `prov:Agent` (the human); edges are `prov:wasInformedBy`; exported artifacts are `prov:Entity` with `prov:wasGeneratedBy` the final checkpoint node. | |
| + | ||
| + | ## Stability | |
| + | ||
| + | - `schemaVersion` follows semver-minor for additive changes. | |
| + | - Consumers MUST ignore unknown fields. | |
| + | - `kind`/`status`/`relationship` enums may gain values; treat unknown values as `direction`/`accepted`/`refines`. |
| @@ -0,0 +1,58 @@ | ||
| + | name: 'treetrace' | |
| + | description: 'Generate or refresh PROMPT_TREE.md - the prompt lineage of this repository - from committed treetrace lineage data.' | |
| + | author: 'Zion Boggan' | |
| + | branding: | |
| + | icon: 'git-branch' | |
| + | color: 'green' | |
| + | ||
| + | inputs: | |
| + | source: | |
| + | description: > | |
| + | Path to a transcript file (.jsonl or plain text) committed or produced in | |
| + | CI. Session logs live on dev machines, not in CI - typical usage is | |
| + | regenerating PROMPT_TREE.md from a committed .treetrace/tree.json or an | |
| + | uploaded transcript artifact. | |
| + | required: false | |
| + | default: '' | |
| + | comment-pr: | |
| + | description: 'Post the tree summary as a PR comment (requires GITHUB_TOKEN).' | |
| + | required: false | |
| + | default: 'false' | |
| + | ||
| + | runs: | |
| + | using: 'composite' | |
| + | steps: | |
| + | - name: Generate prompt tree | |
| + | shell: bash | |
| + | run: | | |
| + | set -euo pipefail | |
| + | if [ -n "${{ inputs.source }}" ]; then | |
| + | npx --yes treetrace --file "${{ inputs.source }}" --redact-auto --quiet | |
| + | elif [ -f .treetrace/tree.json ]; then | |
| + | echo "::notice::Using committed .treetrace/tree.json" | |
| + | # re-render markdown from committed lineage (no transcripts in CI) | |
| + | node -e " | |
| + | const { readFileSync, writeFileSync } = require('fs'); | |
| + | const data = JSON.parse(readFileSync('.treetrace/tree.json','utf8')); | |
| + | console.log('lineage present:', data.nodes.length, 'nodes - PROMPT_TREE.md should be committed alongside it'); | |
| + | " | |
| + | else | |
| + | echo "::warning::No source transcript or .treetrace/tree.json found - nothing to do." | |
| + | fi | |
| + | - name: Comment on PR | |
| + | if: ${{ inputs.comment-pr == 'true' && github.event_name == 'pull_request' }} | |
| + | shell: bash | |
| + | env: | |
| + | GH_TOKEN: ${{ github.token }} | |
| + | run: | | |
| + | set -euo pipefail | |
| + | if [ -f PROMPT_TREE.md ]; then | |
| + | { | |
| + | echo "### 🌳 Prompt tree" | |
| + | echo "" | |
| + | head -c 4000 PROMPT_TREE.md | |
| + | echo "" | |
| + | echo "_Full lineage: PROMPT_TREE.md_" | |
| + | } > /tmp/tt-comment.md | |
| + | gh pr comment "${{ github.event.pull_request.number }}" --body-file /tmp/tt-comment.md | |
| + | fi |
| @@ -36,8 +36,16 @@ const QUESTION_ONLY = | ||
| /^(what|how|why|where|when|which|who|is|are|can|could|should|would|will|do|does|did)\b[^]*\?\s*$/i; | ||
| // Short acknowledgements that nudge the agent along but carry no direction. | ||
| + | // NB: bare numerals / "option B" are NOT here - they select from an offered | |
| + | // menu and steer the project (corpus ground truth), so they become nodes. | |
| const CONTINUATION_RE = | ||
| - | /^(y|yes|yep|yeah|ok|okay|k|sure|continue|cont|go|go ahead|do it|proceed|next|sounds good|looks good|lgtm|perfect|nice|good|great|approved?|yes please|please do|carry on|keep going|resume|finish|all good|that works|works|👍|do that|option \w|\d)[.! ]*$/i; | |
| + | /^(y|yes|yep|yeah|ok|okay|k|sure|continue|cont|go|go ahead|do it|proceed|next|sounds good|looks good|lgtm|perfect|nice|good|great|approved?|yes please|please do|carry on|keep going|resume|finish|all good|that works|works|👍|do that)[.! ]*$/i; | |
| + | ||
| + | // Menu selections: tiny text, real steering - titled specially. | |
| + | const SELECTION_RE = /^(?:option\s+)?([0-9]{1,2}|[a-d])[.)! ]*$/i; | |
| + | ||
| + | // Explicit self-described throwaways ("Test message. Ignore this.") | |
| + | const IGNORE_RE = /\bignore this\b/i; | |
| const MAX_NUDGE_WORDS = 4; | ||
| @@ -64,6 +72,15 @@ export function classifyPrompts(sessions) { | ||
| continue; | ||
| } | ||
| + | // Re-armed recurring prompts (/loop restarts, repeated dispatches with | |
| + | // small wording drift) collapse into one node with a re-run counter. | |
| + | if (prevNode && isRerunOf(prevNode.text, text)) { | |
| + | prevNode.reruns = (prevNode.reruns || 0) + 1; | |
| + | prevNode.text = text; // latest wording wins | |
| + | prevNode.title = makeTitle(text); | |
| + | continue; | |
| + | } | |
| + | ||
| // Fold pure nudges into the previous node instead of creating noise nodes. | ||
| if ( | ||
| prevNode && | ||
| @@ -74,7 +91,24 @@ export function classifyPrompts(sessions) { | ||
| continue; | ||
| } | ||
| - | const node = { | |
| + | // Self-described throwaways never become lineage. | |
| + | if (words.length <= 6 && IGNORE_RE.test(text)) continue; | |
| + | ||
| + | const selection = rootAssigned && SELECTION_RE.exec(text); | |
| + | const node = selection ? { | |
| + | id: null, | |
| + | uuid: prompt.uuid, | |
| + | parentUuid: prompt.parentUuid, | |
| + | sessionId: session.sessionId, | |
| + | ts: prompt.ts, | |
| + | text, | |
| + | title: `Chose option ${selection[1].toUpperCase()} from the proposed menu`, | |
| + | kind: KIND.DIRECTION, | |
| + | status: 'accepted', | |
| + | nudges: 0, | |
| + | afterInterruption: prompt.afterInterruption, | |
| + | chars: text.length, | |
| + | } : { | |
| id: null, // assigned by tree builder | ||
| uuid: prompt.uuid, | ||
| parentUuid: prompt.parentUuid, | ||
| @@ -108,6 +142,22 @@ function isDupOf(a, b) { | ||
| return long.startsWith(short.slice(0, short.length - 4)); | ||
| } | ||
| + | // Same recurring instruction re-issued with wording drift: identical opening | |
| + | // (command name / first words) plus high common-prefix overlap. | |
| + | function isRerunOf(a, b) { | |
| + | const na = a.replace(/\s+/g, ' ').trim(); | |
| + | const nb = b.replace(/\s+/g, ' ').trim(); | |
| + | if (na.length < 40 || nb.length < 40) return false; | |
| + | if (na.slice(0, 24) !== nb.slice(0, 24)) return false; | |
| + | // Command re-arms (/loop, /dispatch …): same command + same opening counts | |
| + | // as a re-issue even when the long arg body drifts. | |
| + | if (na.startsWith('/') && na.slice(0, 32) === nb.slice(0, 32)) return true; | |
| + | const limit = Math.min(na.length, nb.length); | |
| + | let common = 0; | |
| + | while (common < limit && na[common] === nb[common]) common++; | |
| + | return common / limit >= 0.5; | |
| + | } | |
| + | ||
| function classifyOne(text, prompt, rootAssigned) { | ||
| if (!rootAssigned) return KIND.ROOT; | ||
| if (CORRECTION_STRONG_OPENERS.test(text) || CORRECTION_ANYWHERE.test(text)) return KIND.CORRECTION; |
| @@ -4,29 +4,37 @@ import { createInterface } from 'node:readline'; | ||
| /** | ||
| * Streaming parser for Claude Code session JSONL files. | ||
| * | ||
| - | * Design constraints: | |
| - | * - Session files reach 200MB+; never buffer the whole file. | |
| - | * - Keep a light index (uuid/parent/type/ts) for every conversation record so | |
| - | * branch topology can be reconstructed, but keep full text only for | |
| - | * candidate human prompts and small metadata records. | |
| - | * - Tolerate unknown record types and malformed lines: skip, never throw. | |
| + | * Built against a 579-file / ~195k-line corpus census (format versions | |
| + | * 2.1.133-2.1.173). Key realities encoded here: | |
| + | * - Records form a DAG per file; chains pass THROUGH system/attachment | |
| + | * nodes, so all addressable node types must be indexed. | |
| + | * - One API assistant message = N jsonl records sharing message.id, with | |
| + | * usage repeated on every split - merge or token stats inflate 2-4×. | |
| + | * - Compaction restarts the chain (parentUuid:null) but provides | |
| + | * logicalParentUuid to stitch through. | |
| + | * - userType is 'external' on every record including agent-authored ones - | |
| + | * never a human discriminator. Sidechains live in separate files. | |
| + | * - The last `last-prompt` record's leafUuid is the live branch tip. | |
| + | * - Session files reach 200MB+ (multi-MB base64 lines): stream, never buffer. | |
| */ | ||
| - | const TURN_TYPES = new Set(['user', 'assistant']); | |
| + | const DAG_TYPES = new Set(['user', 'assistant', 'system', 'attachment']); | |
| export async function parseSessionFile(path, sessionMeta = {}) { | ||
| const session = { | ||
| sessionId: sessionMeta.sessionId || null, | ||
| path, | ||
| title: null, | ||
| + | customTitle: null, | |
| version: null, | ||
| cwd: null, | ||
| gitBranch: null, | ||
| firstTs: null, | ||
| lastTs: null, | ||
| prompts: [], // candidate human prompts (full text retained) | ||
| - | index: new Map(), // uuid -> { parentUuid, type, ts } for all turn records | |
| - | leafUuid: null, // last turn uuid seen (chronological) | |
| + | index: new Map(), // uuid -> { parentUuid, type, ts } for all DAG records | |
| + | leafUuid: null, // last addressable record seen (fallback branch tip) | |
| + | activeLeafUuid: null, // from last `last-prompt` record (authoritative) | |
| stats: { | ||
| userLines: 0, | ||
| assistantLines: 0, | ||
| @@ -37,8 +45,9 @@ export async function parseSessionFile(path, sessionMeta = {}) { | ||
| outputTokens: 0, | ||
| interruptions: 0, | ||
| }, | ||
| - | isContinuation: false, // continued from a compacted previous session | |
| - | continuationOf: null, | |
| + | isContinuation: false, | |
| + | _usageByMsgId: new Map(), // assistant split merge: last record's usage wins | |
| + | _pendingInterruption: false, | |
| }; | ||
| const stream = createReadStream(path, { encoding: 'utf8' }); | ||
| @@ -50,12 +59,24 @@ export async function parseSessionFile(path, sessionMeta = {}) { | ||
| try { | ||
| rec = JSON.parse(line); | ||
| } catch { | ||
| - | continue; // truncated/corrupt line | |
| + | continue; // truncated/corrupt line (live files mutate mid-scan) | |
| + | } | |
| + | try { | |
| + | ingestRecord(session, rec); | |
| + | } catch { | |
| + | continue; // unknown shape - tolerate, never crash | |
| } | ||
| - | ingestRecord(session, rec); | |
| } | ||
| rl.close(); | ||
| + | // fold merged assistant usage into totals | |
| + | for (const usage of session._usageByMsgId.values()) { | |
| + | session.stats.inputTokens += usage.input_tokens || 0; | |
| + | session.stats.outputTokens += usage.output_tokens || 0; | |
| + | } | |
| + | session._usageByMsgId = null; | |
| + | ||
| + | if (session.customTitle) session.title = session.customTitle; | |
| session.stats.models = [...session.stats.models]; | ||
| session.stats.filesTouched = [...session.stats.filesTouched]; | ||
| return session; | ||
| @@ -69,17 +90,33 @@ function ingestRecord(session, rec) { | ||
| case 'assistant': | ||
| ingestAssistant(session, rec); | ||
| break; | ||
| - | case 'summary': | |
| - | // {type:"summary", summary, leafUuid} - Claude Code's own session title | |
| + | case 'system': | |
| + | indexDagNode(session, rec, { | |
| + | // compaction boundary restarts parentUuid; stitch the logical chain | |
| + | parentOverride: | |
| + | rec.subtype === 'compact_boundary' && rec.logicalParentUuid | |
| + | ? rec.logicalParentUuid | |
| + | : undefined, | |
| + | }); | |
| + | break; | |
| + | case 'attachment': | |
| + | indexDagNode(session, rec); // chains pass through attachments | |
| + | break; | |
| + | case 'summary': // legacy (<2.1.133) | |
| if (rec.summary && !session.title) session.title = rec.summary; | ||
| break; | ||
| - | case 'ai-title': | |
| - | if ((rec.title || rec.aiTitle) && !session.title) | |
| - | session.title = rec.title || rec.aiTitle; | |
| + | case 'ai-title': // last occurrence wins | |
| + | if (rec.aiTitle || rec.title) session.title = rec.aiTitle || rec.title; | |
| + | break; | |
| + | case 'custom-title': // user-set, beats ai-title | |
| + | if (rec.customTitle) session.customTitle = rec.customTitle; | |
| + | break; | |
| + | case 'last-prompt': // last occurrence's leafUuid = live branch tip | |
| + | if (rec.leafUuid) session.activeLeafUuid = rec.leafUuid; | |
| break; | ||
| default: | ||
| - | // mode, permission-mode, bridge-session, last-prompt, queue-operation, | |
| - | // file-history-snapshot, attachment, system, ... - not lineage material | |
| + | // mode, permission-mode, bridge-session, queue-operation, | |
| + | // file-history-snapshot, unknown future types - not lineage material | |
| break; | ||
| } | ||
| @@ -87,16 +124,16 @@ function ingestRecord(session, rec) { | ||
| if (!session.version && rec.version) session.version = rec.version; | ||
| if (!session.cwd && rec.cwd) session.cwd = rec.cwd; | ||
| if (!session.gitBranch && rec.gitBranch) session.gitBranch = rec.gitBranch; | ||
| - | if (rec.timestamp && TURN_TYPES.has(rec.type)) { | |
| + | if (rec.timestamp && DAG_TYPES.has(rec.type)) { | |
| if (!session.firstTs) session.firstTs = rec.timestamp; | ||
| session.lastTs = rec.timestamp; | ||
| } | ||
| } | ||
| - | function indexTurn(session, rec) { | |
| + | function indexDagNode(session, rec, { parentOverride } = {}) { | |
| if (!rec.uuid) return; | ||
| session.index.set(rec.uuid, { | ||
| - | parentUuid: rec.parentUuid || null, | |
| + | parentUuid: parentOverride !== undefined ? parentOverride : rec.parentUuid || null, | |
| type: rec.type, | ||
| ts: rec.timestamp || null, | ||
| }); | ||
| @@ -104,17 +141,29 @@ function indexTurn(session, rec) { | ||
| } | ||
| function ingestUser(session, rec) { | ||
| - | if (rec.isSidechain) return; // subagent traffic, not human | |
| - | indexTurn(session, rec); | |
| + | // Sidechain traffic is agent-authored even when it mimics human voice. | |
| + | // (Sidechains live in separate files; belt-and-suspenders for inline ones.) | |
| + | if (rec.isSidechain || rec.agentId) return; | |
| + | indexDagNode(session, rec); | |
| session.stats.userLines++; | ||
| - | const msg = rec.message || {}; | |
| - | const { text, hasToolResult, hasOnlyToolResult } = flattenUserContent(msg.content); | |
| + | // Tool plumbing: results echo back as user records (~90% of user lines), | |
| + | // marked by toolUseResult / sourceToolAssistantUUID even for string content. | |
| + | if (rec.toolUseResult !== undefined || rec.sourceToolAssistantUUID !== undefined) return; | |
| - | if (hasOnlyToolResult) return; // tool output echoed back as a user turn | |
| + | if (rec.isMeta) return; // caveats, skill-body injections | |
| + | if (rec.isCompactSummary) { | |
| + | session.isContinuation = true; | |
| + | return; | |
| + | } | |
| + | if (rec.promptSource === 'system' || rec.promptSource === 'sdk') return; | |
| + | if (rec.origin && rec.origin.kind === 'task-notification') return; | |
| - | const trimmed = (text || '').trim(); | |
| - | if (!trimmed) return; | |
| + | const msg = rec.message || {}; | |
| + | const { text, hasImage, hasToolResult, hasOnlyToolResult } = flattenUserContent(msg.content); | |
| + | if (hasOnlyToolResult) return; | |
| + | ||
| + | let trimmed = (text || '').trim(); | |
| if (/^\[Request interrupted by user/i.test(trimmed)) { | ||
| session.stats.interruptions++; | ||
| @@ -122,21 +171,30 @@ function ingestUser(session, rec) { | ||
| return; | ||
| } | ||
| - | // Slash command + local command wrappers, hook noise, harness reminders. | |
| const classification = classifySpecialUserText(trimmed); | ||
| - | if (classification === 'command') return; | |
| - | if (classification === 'meta' || rec.isMeta) return; | |
| + | if (classification === 'meta') return; | |
| if (classification === 'compact-continuation') { | ||
| session.isContinuation = true; | ||
| return; | ||
| } | ||
| + | if (classification === 'command') { | |
| + | // Slash-command wrappers are noise - unless the human packed real intent | |
| + | // into the args (e.g. `/loop <multi-line work focus>`). | |
| + | const invocation = extractCommandInvocation(trimmed); | |
| + | if (!invocation) return; | |
| + | trimmed = invocation; | |
| + | } | |
| + | ||
| + | // Image-only records are often screenshot feedback - meaningful, keep. | |
| + | if (!trimmed && hasImage) trimmed = '[image-only prompt: screenshot/annotated feedback]'; | |
| + | if (!trimmed) return; | |
| session.prompts.push({ | ||
| uuid: rec.uuid || null, | ||
| parentUuid: rec.parentUuid || null, | ||
| ts: rec.timestamp || null, | ||
| text: trimmed, | ||
| - | userType: rec.userType || null, | |
| + | hasImage, | |
| hadToolResultContext: hasToolResult, | ||
| afterInterruption: Boolean(session._pendingInterruption), | ||
| }); | ||
| @@ -144,16 +202,21 @@ function ingestUser(session, rec) { | ||
| } | ||
| function ingestAssistant(session, rec) { | ||
| - | if (rec.isSidechain) return; | |
| - | indexTurn(session, rec); | |
| + | if (rec.isSidechain || rec.agentId) return; | |
| + | indexDagNode(session, rec); | |
| session.stats.assistantLines++; | ||
| const msg = rec.message || {}; | ||
| - | if (msg.model) session.stats.models.add(msg.model); | |
| - | if (msg.usage) { | |
| - | session.stats.inputTokens += msg.usage.input_tokens || 0; | |
| - | session.stats.outputTokens += msg.usage.output_tokens || 0; | |
| + | const synthetic = msg.model === '<synthetic>' || rec.isApiErrorMessage; | |
| + | ||
| + | if (msg.model && !synthetic) session.stats.models.add(msg.model); | |
| + | // One API message = N split records sharing message.id, usage repeated on | |
| + | // each (main sessions) or present only on the last (subagent files): | |
| + | // keep the latest non-empty usage per id, sum after parsing. | |
| + | if (msg.usage && !synthetic && (msg.usage.input_tokens || msg.usage.output_tokens)) { | |
| + | session._usageByMsgId.set(msg.id || rec.uuid, msg.usage); | |
| } | ||
| + | ||
| const content = Array.isArray(msg.content) ? msg.content : []; | ||
| for (const block of content) { | ||
| if (block && block.type === 'tool_use') { | ||
| @@ -167,14 +230,15 @@ function ingestAssistant(session, rec) { | ||
| function flattenUserContent(content) { | ||
| if (typeof content === 'string') { | ||
| - | return { text: content, hasToolResult: false, hasOnlyToolResult: false }; | |
| + | return { text: content, hasImage: false, hasToolResult: false, hasOnlyToolResult: false }; | |
| } | ||
| if (!Array.isArray(content)) { | ||
| - | return { text: '', hasToolResult: false, hasOnlyToolResult: false }; | |
| + | return { text: '', hasImage: false, hasToolResult: false, hasOnlyToolResult: false }; | |
| } | ||
| let text = ''; | ||
| let toolResults = 0; | ||
| let others = 0; | ||
| + | let images = 0; | |
| for (const block of content) { | ||
| if (!block || typeof block !== 'object') continue; | ||
| if (block.type === 'text' && typeof block.text === 'string') { | ||
| @@ -182,14 +246,17 @@ function flattenUserContent(content) { | ||
| others++; | ||
| } else if (block.type === 'tool_result') { | ||
| toolResults++; | ||
| + | } else if (block.type === 'image') { | |
| + | images++; | |
| } else { | ||
| - | others++; // images, documents - count as non-tool content | |
| + | others++; // documents, future block types | |
| } | ||
| } | ||
| return { | ||
| text, | ||
| + | hasImage: images > 0, | |
| hasToolResult: toolResults > 0, | ||
| - | hasOnlyToolResult: toolResults > 0 && others === 0, | |
| + | hasOnlyToolResult: toolResults > 0 && others === 0 && images === 0, | |
| }; | ||
| } | ||
| @@ -198,7 +265,6 @@ const COMPACT_CONTINUATION_RE = | ||
| export function classifySpecialUserText(text) { | ||
| if (COMPACT_CONTINUATION_RE.test(text)) return 'compact-continuation'; | ||
| - | // /slash-command invocations and their stdout get wrapped in pseudo-XML | |
| if ( | ||
| text.startsWith('<command-name>') || | ||
| text.startsWith('<command-message>') || | ||
| @@ -220,6 +286,15 @@ export function classifySpecialUserText(text) { | ||
| return 'prompt'; | ||
| } | ||
| + | // `/loop de-swamp & polish ...` - wrapper noise, but non-empty <command-args> | |
| + | // is the human's actual instruction. Returns reconstructed text or null. | |
| + | export function extractCommandInvocation(text) { | |
| + | const name = text.match(/<command-name>([^<]*)<\/command-name>/)?.[1]?.trim(); | |
| + | const args = text.match(/<command-args>([\s\S]*?)<\/command-args>/)?.[1]?.trim(); | |
| + | if (!args) return null; | |
| + | return `${name || '(command)'} ${args}`; | |
| + | } | |
| + | ||
| /** | ||
| * Fallback importer: plain text / markdown transcripts (pasted exports from | ||
| * ChatGPT, Claude.ai, etc.). Recognizes common turn markers; returns a | ||
| @@ -270,9 +345,10 @@ export function parsePlainTranscript(text, label = 'pasted-transcript') { | ||
| gitBranch: null, | ||
| firstTs: null, | ||
| lastTs: null, | ||
| - | prompts: prompts.map((p) => ({ ...p, text: p.text.trim(), userType: 'external' })), | |
| + | prompts: prompts.map((p) => ({ ...p, text: p.text.trim() })), | |
| index: new Map(), | ||
| leafUuid: null, | ||
| + | activeLeafUuid: null, | |
| stats: { | ||
| userLines: prompts.length, | ||
| assistantLines: 0, | ||
| @@ -284,6 +360,5 @@ export function parsePlainTranscript(text, label = 'pasted-transcript') { | ||
| interruptions: 0, | ||
| }, | ||
| isContinuation: false, | ||
| - | continuationOf: null, | |
| }; | ||
| } |
| @@ -57,6 +57,7 @@ export function renderJson(tree, opts = {}) { | ||
| text: n.text, | ||
| status: n.status, | ||
| nudges: n.nudges || 0, | ||
| + | reruns: n.reruns || 0, | |
| session: n.sessionId, | ||
| timestamp: n.ts, | ||
| // source linkage for audit: the original record uuid inside the local |
| @@ -150,8 +150,9 @@ function emitNode(node, depth, lines, { titlesOnly }) { | ||
| const title = dead ? `~~${node.title}~~ ✗` : node.kind === 'root' ? `**${node.title}**` : node.title; | ||
| const session = node.sessionBoundary ? ` ${dim(`(new session${node.ts ? `, ${formatDay(node.ts)}` : ''})`)}` : ''; | ||
| const nudges = node.nudges > 1 ? ` ${dim(`(+${node.nudges} nudges)`)}` : ''; | ||
| + | const reruns = node.reruns ? ` ${dim(`(re-issued ×${node.reruns + 1})`)}` : ''; | |
| - | lines.push(`${indent}- \`${icon}\` ${title}${session}${nudges}`); | |
| + | lines.push(`${indent}- \`${icon}\` ${title}${session}${nudges}${reruns}`); | |
| if (!titlesOnly && node.text.replace(/\s+/g, ' ').trim().length > node.title.replace(/\.\.\.$/, '').length + 12) { | ||
| lines.push(`${indent} <details><summary>full prompt</summary>`); |
| @@ -12,10 +12,15 @@ export function buildTree(sessions, nodes) { | ||
| for (const node of nodes) if (node.uuid) byUuid.set(node.uuid, node); | ||
| // Per-session main-path sets (uuids of records that "made it" to the end). | ||
| + | // The last `last-prompt` record's leafUuid is the authoritative live-branch | |
| + | // tip; the last addressable record is the fallback. | |
| const mainPaths = new Map(); | ||
| for (const session of sessions) { | ||
| const main = new Set(); | ||
| - | let cur = session.leafUuid; | |
| + | let cur = | |
| + | (session.activeLeafUuid && session.index.has(session.activeLeafUuid) | |
| + | ? session.activeLeafUuid | |
| + | : session.leafUuid) || null; | |
| let guard = 0; | ||
| while (cur && guard++ < 1_000_000) { | ||
| main.add(cur); |