| 1 | { |
| 2 | "schemaVersion": "0.3", |
| 3 | "project": { |
| 4 | "name": "rejections", |
| 5 | "generatedAt": "2026-06-19T06:50:12.990Z" |
| 6 | }, |
| 7 | "summary": { |
| 8 | "totalFailureSignals": 9, |
| 9 | "topFailureTypes": [ |
| 10 | { |
| 11 | "type": "user_rejected_action", |
| 12 | "count": 3 |
| 13 | }, |
| 14 | { |
| 15 | "type": "model_refused", |
| 16 | "count": 2 |
| 17 | }, |
| 18 | { |
| 19 | "type": "misunderstood_goal", |
| 20 | "count": 1 |
| 21 | }, |
| 22 | { |
| 23 | "type": "permission_denied", |
| 24 | "count": 1 |
| 25 | }, |
| 26 | { |
| 27 | "type": "security_or_privacy_risk", |
| 28 | "count": 1 |
| 29 | }, |
| 30 | { |
| 31 | "type": "tool_execution_failed", |
| 32 | "count": 1 |
| 33 | } |
| 34 | ], |
| 35 | "tierCounts": { |
| 36 | "verified": 3, |
| 37 | "high": 4, |
| 38 | "confirmed": 1, |
| 39 | "inferred": 1 |
| 40 | }, |
| 41 | "models": [ |
| 42 | "claude-3-opus" |
| 43 | ], |
| 44 | "thinkingBlocks": 0, |
| 45 | "correctionChains": 1, |
| 46 | "evalCandidates": 5, |
| 47 | "lessons": 6 |
| 48 | }, |
| 49 | "failures": [ |
| 50 | { |
| 51 | "id": "failure_001", |
| 52 | "type": "user_rejected_action", |
| 53 | "tier": "verified", |
| 54 | "confidence": 1, |
| 55 | "model": "claude-3-opus", |
| 56 | "firstSeenNodeId": "node_001", |
| 57 | "correctedByNodeId": null, |
| 58 | "summary": "The user declined a proposed tool action near \"Build a thing in this repo.\".", |
| 59 | "evidence": "user_declined_tool (tool_result): \"The user doesn't want to proceed with this tool use. The user wants you to answer a different question instead.\"", |
| 60 | "lesson": "Future agents should not retry a tool action the user just declined without first explaining why the action is still worth taking. Specifically: user_declined_tool (tool_result): \"The user doesn't want to proceed with this tool use. The user wants you to answer a different question instead.\"", |
| 61 | "evalCandidate": true |
| 62 | }, |
| 63 | { |
| 64 | "id": "failure_002", |
| 65 | "type": "user_rejected_action", |
| 66 | "tier": "verified", |
| 67 | "confidence": 1, |
| 68 | "model": "claude-3-opus", |
| 69 | "firstSeenNodeId": "node_002", |
| 70 | "correctedByNodeId": null, |
| 71 | "summary": "The user interrupted the agent mid-response near \"Use the Edit tool to add a README instead.\".", |
| 72 | "evidence": "user_interrupt (text): \"[Request interrupted by user]\"", |
| 73 | "lesson": "Future agents should not retry a tool action the user just declined without first explaining why the action is still worth taking. Specifically: user_interrupt (text): \"[Request interrupted by user]\"", |
| 74 | "evalCandidate": true |
| 75 | }, |
| 76 | { |
| 77 | "id": "failure_003", |
| 78 | "type": "tool_execution_failed", |
| 79 | "tier": "high", |
| 80 | "confidence": 0.9, |
| 81 | "model": "claude-3-opus", |
| 82 | "firstSeenNodeId": "node_003", |
| 83 | "correctedByNodeId": null, |
| 84 | "summary": "A tool execution returned an error near \"Try writing a new file via the Write tool.\".", |
| 85 | "evidence": "tool_execution_error (tool_result): \"mkdir: cannot create directory '/root/.config/forbidden': File exists\"", |
| 86 | "lesson": "Future agents should validate command inputs and surface expected errors before running shell or write tools, instead of discovering failures after execution. Specifically: tool_execution_error (tool_result): \"mkdir: cannot create directory '/root/.config/forbidden': File exists\"", |
| 87 | "evalCandidate": true |
| 88 | }, |
| 89 | { |
| 90 | "id": "failure_004", |
| 91 | "type": "permission_denied", |
| 92 | "tier": "high", |
| 93 | "confidence": 0.85, |
| 94 | "model": "claude-3-opus", |
| 95 | "firstSeenNodeId": "node_003", |
| 96 | "correctedByNodeId": null, |
| 97 | "summary": "A tool action was denied by the environment (permission denied) near \"Try writing a new file via the Write tool.\".", |
| 98 | "evidence": "permission_denied (tool_result): \"sudo: permission denied; user is not in the sudoers file. This incident will be reported.\"", |
| 99 | "lesson": "Future agents should pre-flight check that required files, commands, or resources are accessible before attempting an action that needs them. Specifically: permission_denied (tool_result): \"sudo: permission denied; user is not in the sudoers file. This incident will be reported.\"", |
| 100 | "evalCandidate": true |
| 101 | }, |
| 102 | { |
| 103 | "id": "failure_005", |
| 104 | "type": "security_or_privacy_risk", |
| 105 | "tier": "high", |
| 106 | "confidence": 0.84, |
| 107 | "model": "claude-3-opus", |
| 108 | "firstSeenNodeId": "node_003", |
| 109 | "correctedByNodeId": null, |
| 110 | "summary": "An agent action touched auth, secrets, or access control near \"Try writing a new file via the Write tool.\".", |
| 111 | "evidence": "Agent action touched risky-command [signals: risky command]: \"sudo rm -rf /root/.config/forbidden\"", |
| 112 | "lesson": "Future agents should not weaken local-first privacy, redaction, or no-network guarantees without explicit approval. Specifically: Agent action touched risky-command [signals: risky command]: \"sudo rm -rf /root/.config/forbidden\"", |
| 113 | "evalCandidate": true |
| 114 | }, |
| 115 | { |
| 116 | "id": "failure_006", |
| 117 | "type": "user_rejected_action", |
| 118 | "tier": "high", |
| 119 | "confidence": 0.8, |
| 120 | "model": null, |
| 121 | "firstSeenNodeId": "node_004", |
| 122 | "correctedByNodeId": null, |
| 123 | "summary": "The user explicitly told the agent to stop or not proceed near \"stop, don't do that\".", |
| 124 | "evidence": "user_text_decline (text): \"stop, don't do that\"", |
| 125 | "lesson": "Future agents should not retry a tool action the user just declined without first explaining why the action is still worth taking. Specifically: user_text_decline (text): \"stop, don't do that\"", |
| 126 | "evalCandidate": true |
| 127 | }, |
| 128 | { |
| 129 | "id": "failure_007", |
| 130 | "type": "model_refused", |
| 131 | "tier": "verified", |
| 132 | "confidence": 0.95, |
| 133 | "model": null, |
| 134 | "firstSeenNodeId": "node_004", |
| 135 | "correctedByNodeId": null, |
| 136 | "summary": "The model refused to proceed near \"stop, don't do that\".", |
| 137 | "evidence": "model_refusal (stop_reason)", |
| 138 | "lesson": "Future agents should treat a refusal as a signal to rephrase or descope, not to retry the same request verbatim; if the user confirms the request is legitimate, surface the refusal reason. Specifically: model_refusal (stop_reason)", |
| 139 | "evalCandidate": true |
| 140 | }, |
| 141 | { |
| 142 | "id": "failure_008", |
| 143 | "type": "misunderstood_goal", |
| 144 | "tier": "inferred", |
| 145 | "confidence": 0.68, |
| 146 | "model": "claude-3-opus", |
| 147 | "firstSeenNodeId": "node_003", |
| 148 | "correctedByNodeId": "node_004", |
| 149 | "summary": "A possible misunderstood goal occurred near \"Try writing a new file via the Write tool.\"; corrected by \"stop, don't do that\".", |
| 150 | "evidence": "User said: \"stop, don't do that\"", |
| 151 | "lesson": "Future agents should restate and verify the goal before continuing after a correction. Specifically: User said: \"stop, don't do that\"", |
| 152 | "evalCandidate": true |
| 153 | }, |
| 154 | { |
| 155 | "id": "failure_009", |
| 156 | "type": "model_refused", |
| 157 | "tier": "confirmed", |
| 158 | "confidence": 0.7, |
| 159 | "model": null, |
| 160 | "firstSeenNodeId": "node_005", |
| 161 | "correctedByNodeId": null, |
| 162 | "summary": "The model refused to proceed near \"Can you at least tell me what would have happened?\".", |
| 163 | "evidence": "model_refusal (text_heuristic): \"I can't help with that request. It would require me to describe how to bypass filesystem permissions, which I am programmed not to do.\"", |
| 164 | "lesson": "Future agents should treat a refusal as a signal to rephrase or descope, not to retry the same request verbatim; if the user confirms the request is legitimate, surface the refusal reason. Specifically: model_refusal (text_heuristic): \"I can't help with that request. It would require me to describe how to bypass filesystem permissions, which I am programmed not to do.\"", |
| 165 | "evalCandidate": true |
| 166 | } |
| 167 | ], |
| 168 | "correctionChains": [ |
| 169 | { |
| 170 | "id": "chain_001", |
| 171 | "failureNodeId": "node_003", |
| 172 | "correctionNodeId": "node_004", |
| 173 | "resolvedNodeId": null, |
| 174 | "failureType": "misunderstood_goal", |
| 175 | "confidence": "medium", |
| 176 | "summary": "A possible misunderstood goal occurred near \"Try writing a new file via the Write tool.\"; corrected by \"stop, don't do that\"." |
| 177 | } |
| 178 | ] |
| 179 | } |