examples/rejections/.treetrace/failures.json

179 lines · json

{
  "schemaVersion": "0.3",
  "project": {
    "name": "rejections",
    "generatedAt": "2026-06-19T06:50:12.990Z"
  },
  "summary": {
    "totalFailureSignals": 9,
    "topFailureTypes": [
      {
        "type": "user_rejected_action",
        "count": 3
      },
      {
        "type": "model_refused",
        "count": 2
      },
      {
        "type": "misunderstood_goal",
        "count": 1
      },
      {
        "type": "permission_denied",
        "count": 1
      },
      {
        "type": "security_or_privacy_risk",
        "count": 1
      },
      {
        "type": "tool_execution_failed",
        "count": 1
      }
    ],
    "tierCounts": {
      "verified": 3,
      "high": 4,
      "confirmed": 1,
      "inferred": 1
    },
    "models": [
      "claude-3-opus"
    ],
    "thinkingBlocks": 0,
    "correctionChains": 1,
    "evalCandidates": 5,
    "lessons": 6
  },
  "failures": [
    {
      "id": "failure_001",
      "type": "user_rejected_action",
      "tier": "verified",
      "confidence": 1,
      "model": "claude-3-opus",
      "firstSeenNodeId": "node_001",
      "correctedByNodeId": null,
      "summary": "The user declined a proposed tool action near \"Build a thing in this repo.\".",
      "evidence": "user_declined_tool (tool_result): \"The user doesn't want to proceed with this tool use. The user wants you to answer a different question instead.\"",
      "lesson": "Future agents should not retry a tool action the user just declined without first explaining why the action is still worth taking. Specifically: user_declined_tool (tool_result): \"The user doesn't want to proceed with this tool use. The user wants you to answer a different question instead.\"",
      "evalCandidate": true
    },
    {
      "id": "failure_002",
      "type": "user_rejected_action",
      "tier": "verified",
      "confidence": 1,
      "model": "claude-3-opus",
      "firstSeenNodeId": "node_002",
      "correctedByNodeId": null,
      "summary": "The user interrupted the agent mid-response near \"Use the Edit tool to add a README instead.\".",
      "evidence": "user_interrupt (text): \"[Request interrupted by user]\"",
      "lesson": "Future agents should not retry a tool action the user just declined without first explaining why the action is still worth taking. Specifically: user_interrupt (text): \"[Request interrupted by user]\"",
      "evalCandidate": true
    },
    {
      "id": "failure_003",
      "type": "tool_execution_failed",
      "tier": "high",
      "confidence": 0.9,
      "model": "claude-3-opus",
      "firstSeenNodeId": "node_003",
      "correctedByNodeId": null,
      "summary": "A tool execution returned an error near \"Try writing a new file via the Write tool.\".",
      "evidence": "tool_execution_error (tool_result): \"mkdir: cannot create directory '/root/.config/forbidden': File exists\"",
      "lesson": "Future agents should validate command inputs and surface expected errors before running shell or write tools, instead of discovering failures after execution. Specifically: tool_execution_error (tool_result): \"mkdir: cannot create directory '/root/.config/forbidden': File exists\"",
      "evalCandidate": true
    },
    {
      "id": "failure_004",
      "type": "permission_denied",
      "tier": "high",
      "confidence": 0.85,
      "model": "claude-3-opus",
      "firstSeenNodeId": "node_003",
      "correctedByNodeId": null,
      "summary": "A tool action was denied by the environment (permission denied) near \"Try writing a new file via the Write tool.\".",
      "evidence": "permission_denied (tool_result): \"sudo: permission denied; user is not in the sudoers file. This incident will be reported.\"",
      "lesson": "Future agents should pre-flight check that required files, commands, or resources are accessible before attempting an action that needs them. Specifically: permission_denied (tool_result): \"sudo: permission denied; user is not in the sudoers file. This incident will be reported.\"",
      "evalCandidate": true
    },
    {
      "id": "failure_005",
      "type": "security_or_privacy_risk",
      "tier": "high",
      "confidence": 0.84,
      "model": "claude-3-opus",
      "firstSeenNodeId": "node_003",
      "correctedByNodeId": null,
      "summary": "An agent action touched auth, secrets, or access control near \"Try writing a new file via the Write tool.\".",
      "evidence": "Agent action touched risky-command [signals: risky command]: \"sudo rm -rf /root/.config/forbidden\"",
      "lesson": "Future agents should not weaken local-first privacy, redaction, or no-network guarantees without explicit approval. Specifically: Agent action touched risky-command [signals: risky command]: \"sudo rm -rf /root/.config/forbidden\"",
      "evalCandidate": true
    },
    {
      "id": "failure_006",
      "type": "user_rejected_action",
      "tier": "high",
      "confidence": 0.8,
      "model": null,
      "firstSeenNodeId": "node_004",
      "correctedByNodeId": null,
      "summary": "The user explicitly told the agent to stop or not proceed near \"stop, don't do that\".",
      "evidence": "user_text_decline (text): \"stop, don't do that\"",
      "lesson": "Future agents should not retry a tool action the user just declined without first explaining why the action is still worth taking. Specifically: user_text_decline (text): \"stop, don't do that\"",
      "evalCandidate": true
    },
    {
      "id": "failure_007",
      "type": "model_refused",
      "tier": "verified",
      "confidence": 0.95,
      "model": null,
      "firstSeenNodeId": "node_004",
      "correctedByNodeId": null,
      "summary": "The model refused to proceed near \"stop, don't do that\".",
      "evidence": "model_refusal (stop_reason)",
      "lesson": "Future agents should treat a refusal as a signal to rephrase or descope, not to retry the same request verbatim; if the user confirms the request is legitimate, surface the refusal reason. Specifically: model_refusal (stop_reason)",
      "evalCandidate": true
    },
    {
      "id": "failure_008",
      "type": "misunderstood_goal",
      "tier": "inferred",
      "confidence": 0.68,
      "model": "claude-3-opus",
      "firstSeenNodeId": "node_003",
      "correctedByNodeId": "node_004",
      "summary": "A possible misunderstood goal occurred near \"Try writing a new file via the Write tool.\"; corrected by \"stop, don't do that\".",
      "evidence": "User said: \"stop, don't do that\"",
      "lesson": "Future agents should restate and verify the goal before continuing after a correction. Specifically: User said: \"stop, don't do that\"",
      "evalCandidate": true
    },
    {
      "id": "failure_009",
      "type": "model_refused",
      "tier": "confirmed",
      "confidence": 0.7,
      "model": null,
      "firstSeenNodeId": "node_005",
      "correctedByNodeId": null,
      "summary": "The model refused to proceed near \"Can you at least tell me what would have happened?\".",
      "evidence": "model_refusal (text_heuristic): \"I can't help with that request. It would require me to describe how to bypass filesystem permissions, which I am programmed not to do.\"",
      "lesson": "Future agents should treat a refusal as a signal to rephrase or descope, not to retry the same request verbatim; if the user confirms the request is legitimate, surface the refusal reason. Specifically: model_refusal (text_heuristic): \"I can't help with that request. It would require me to describe how to bypass filesystem permissions, which I am programmed not to do.\"",
      "evalCandidate": true
    }
  ],
  "correctionChains": [
    {
      "id": "chain_001",
      "failureNodeId": "node_003",
      "correctionNodeId": "node_004",
      "resolvedNodeId": null,
      "failureType": "misunderstood_goal",
      "confidence": "medium",
      "summary": "A possible misunderstood goal occurred near \"Try writing a new file via the Write tool.\"; corrected by \"stop, don't do that\"."
    }
  ]
}

1	{
2	"schemaVersion": "0.3",
3	"project": {
4	"name": "rejections",
5	"generatedAt": "2026-06-19T06:50:12.990Z"
6	},
7	"summary": {
8	"totalFailureSignals": 9,
9	"topFailureTypes": [
10	{
11	"type": "user_rejected_action",
12	"count": 3
13	},
14	{
15	"type": "model_refused",
16	"count": 2
17	},
18	{
19	"type": "misunderstood_goal",
20	"count": 1
21	},
22	{
23	"type": "permission_denied",
24	"count": 1
25	},
26	{
27	"type": "security_or_privacy_risk",
28	"count": 1
29	},
30	{
31	"type": "tool_execution_failed",
32	"count": 1
33	}
34	],
35	"tierCounts": {
36	"verified": 3,
37	"high": 4,
38	"confirmed": 1,
39	"inferred": 1
40	},
41	"models": [
42	"claude-3-opus"
43	],
44	"thinkingBlocks": 0,
45	"correctionChains": 1,
46	"evalCandidates": 5,
47	"lessons": 6
48	},
49	"failures": [
50	{
51	"id": "failure_001",
52	"type": "user_rejected_action",
53	"tier": "verified",
54	"confidence": 1,
55	"model": "claude-3-opus",
56	"firstSeenNodeId": "node_001",
57	"correctedByNodeId": null,
58	"summary": "The user declined a proposed tool action near \"Build a thing in this repo.\".",
59	"evidence": "user_declined_tool (tool_result): \"The user doesn't want to proceed with this tool use. The user wants you to answer a different question instead.\"",
60	"lesson": "Future agents should not retry a tool action the user just declined without first explaining why the action is still worth taking. Specifically: user_declined_tool (tool_result): \"The user doesn't want to proceed with this tool use. The user wants you to answer a different question instead.\"",
61	"evalCandidate": true
62	},
63	{
64	"id": "failure_002",
65	"type": "user_rejected_action",
66	"tier": "verified",
67	"confidence": 1,
68	"model": "claude-3-opus",
69	"firstSeenNodeId": "node_002",
70	"correctedByNodeId": null,
71	"summary": "The user interrupted the agent mid-response near \"Use the Edit tool to add a README instead.\".",
72	"evidence": "user_interrupt (text): \"[Request interrupted by user]\"",
73	"lesson": "Future agents should not retry a tool action the user just declined without first explaining why the action is still worth taking. Specifically: user_interrupt (text): \"[Request interrupted by user]\"",
74	"evalCandidate": true
75	},
76	{
77	"id": "failure_003",
78	"type": "tool_execution_failed",
79	"tier": "high",
80	"confidence": 0.9,
81	"model": "claude-3-opus",
82	"firstSeenNodeId": "node_003",
83	"correctedByNodeId": null,
84	"summary": "A tool execution returned an error near \"Try writing a new file via the Write tool.\".",
85	"evidence": "tool_execution_error (tool_result): \"mkdir: cannot create directory '/root/.config/forbidden': File exists\"",
86	"lesson": "Future agents should validate command inputs and surface expected errors before running shell or write tools, instead of discovering failures after execution. Specifically: tool_execution_error (tool_result): \"mkdir: cannot create directory '/root/.config/forbidden': File exists\"",
87	"evalCandidate": true
88	},
89	{
90	"id": "failure_004",
91	"type": "permission_denied",
92	"tier": "high",
93	"confidence": 0.85,
94	"model": "claude-3-opus",
95	"firstSeenNodeId": "node_003",
96	"correctedByNodeId": null,
97	"summary": "A tool action was denied by the environment (permission denied) near \"Try writing a new file via the Write tool.\".",
98	"evidence": "permission_denied (tool_result): \"sudo: permission denied; user is not in the sudoers file. This incident will be reported.\"",
99	"lesson": "Future agents should pre-flight check that required files, commands, or resources are accessible before attempting an action that needs them. Specifically: permission_denied (tool_result): \"sudo: permission denied; user is not in the sudoers file. This incident will be reported.\"",
100	"evalCandidate": true
101	},
102	{
103	"id": "failure_005",
104	"type": "security_or_privacy_risk",
105	"tier": "high",
106	"confidence": 0.84,
107	"model": "claude-3-opus",
108	"firstSeenNodeId": "node_003",
109	"correctedByNodeId": null,
110	"summary": "An agent action touched auth, secrets, or access control near \"Try writing a new file via the Write tool.\".",
111	"evidence": "Agent action touched risky-command [signals: risky command]: \"sudo rm -rf /root/.config/forbidden\"",
112	"lesson": "Future agents should not weaken local-first privacy, redaction, or no-network guarantees without explicit approval. Specifically: Agent action touched risky-command [signals: risky command]: \"sudo rm -rf /root/.config/forbidden\"",
113	"evalCandidate": true
114	},
115	{
116	"id": "failure_006",
117	"type": "user_rejected_action",
118	"tier": "high",
119	"confidence": 0.8,
120	"model": null,
121	"firstSeenNodeId": "node_004",
122	"correctedByNodeId": null,
123	"summary": "The user explicitly told the agent to stop or not proceed near \"stop, don't do that\".",
124	"evidence": "user_text_decline (text): \"stop, don't do that\"",
125	"lesson": "Future agents should not retry a tool action the user just declined without first explaining why the action is still worth taking. Specifically: user_text_decline (text): \"stop, don't do that\"",
126	"evalCandidate": true
127	},
128	{
129	"id": "failure_007",
130	"type": "model_refused",
131	"tier": "verified",
132	"confidence": 0.95,
133	"model": null,
134	"firstSeenNodeId": "node_004",
135	"correctedByNodeId": null,
136	"summary": "The model refused to proceed near \"stop, don't do that\".",
137	"evidence": "model_refusal (stop_reason)",
138	"lesson": "Future agents should treat a refusal as a signal to rephrase or descope, not to retry the same request verbatim; if the user confirms the request is legitimate, surface the refusal reason. Specifically: model_refusal (stop_reason)",
139	"evalCandidate": true
140	},
141	{
142	"id": "failure_008",
143	"type": "misunderstood_goal",
144	"tier": "inferred",
145	"confidence": 0.68,
146	"model": "claude-3-opus",
147	"firstSeenNodeId": "node_003",
148	"correctedByNodeId": "node_004",
149	"summary": "A possible misunderstood goal occurred near \"Try writing a new file via the Write tool.\"; corrected by \"stop, don't do that\".",
150	"evidence": "User said: \"stop, don't do that\"",
151	"lesson": "Future agents should restate and verify the goal before continuing after a correction. Specifically: User said: \"stop, don't do that\"",
152	"evalCandidate": true
153	},
154	{
155	"id": "failure_009",
156	"type": "model_refused",
157	"tier": "confirmed",
158	"confidence": 0.7,
159	"model": null,
160	"firstSeenNodeId": "node_005",
161	"correctedByNodeId": null,
162	"summary": "The model refused to proceed near \"Can you at least tell me what would have happened?\".",
163	"evidence": "model_refusal (text_heuristic): \"I can't help with that request. It would require me to describe how to bypass filesystem permissions, which I am programmed not to do.\"",
164	"lesson": "Future agents should treat a refusal as a signal to rephrase or descope, not to retry the same request verbatim; if the user confirms the request is legitimate, surface the refusal reason. Specifically: model_refusal (text_heuristic): \"I can't help with that request. It would require me to describe how to bypass filesystem permissions, which I am programmed not to do.\"",
165	"evalCandidate": true
166	}
167	],
168	"correctionChains": [
169	{
170	"id": "chain_001",
171	"failureNodeId": "node_003",
172	"correctionNodeId": "node_004",
173	"resolvedNodeId": null,
174	"failureType": "misunderstood_goal",
175	"confidence": "medium",
176	"summary": "A possible misunderstood goal occurred near \"Try writing a new file via the Write tool.\"; corrected by \"stop, don't do that\"."
177	}
178	]
179	}