Zion Boggan zionboggan.com ↗

Validate Rust registry events against tlog leaves

Add a tlog leaf read path and extend Rust registry integrity validation to compare event rows against their indexed tlog payloads, including DNS sidecar fields.

Document the validation gate and opt main GitHub Actions workflows into the Node 24 runtime.

Co-authored-by: Codex (GPT-5.4) <noreply@openai.com>
f46bdef   Zion Boggan committed on May 24, 2026 (4 weeks ago)
.github/workflows/opsec.yml +3 -0
@@ -5,6 +5,9 @@ on:
push:
branches: [main]
+env:
+ FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
+
jobs:
scan:
runs-on: ubuntu-latest
.github/workflows/source-style.yml +3 -0
@@ -5,6 +5,9 @@ on:
push:
branches: [main]
+env:
+ FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
+
jobs:
comments:
runs-on: ubuntu-latest
CHANGELOG.md +5 -1
@@ -32,7 +32,11 @@
uniqueness so corrupted migrated evidence cannot look clean. Rust registry
writes now fail closed if the local transparency log cannot append, and
validation checks missing or out-of-range event tlog indexes against the
- on-disk tlog size.
+ on-disk tlog size. Validation also compares event rows to the corresponding
+ tlog leaf payload so an index cannot point at unrelated evidence and still
+ pass burn-in checks.
+- **GitHub Actions runtime hygiene.** Main CI workflows opt into the GitHub
+ Actions Node 24 runtime before the hosted runner default changes.
- **Rust policy test parity.** Fixed the `oversight-policy` crate's manifest
fixture after the v0.4.11 `Recipient.p256_pub` schema addition so the full
Rust workspace test suite compiles again.
README.md +2 -0
@@ -99,6 +99,8 @@ attribution rows, event metadata, corpus metadata, and tlog indexes without
treating the Python reference as a permanent production dependency.
Rust registry writes now fail closed if the local transparency log cannot
append, so new evidence rows cannot silently lose their audit trail.
+The validator also checks that event rows point at matching tlog leaf payloads,
+not just in-range indexes.
The next Rust-registry gate is operational burn-in: longer-running deployment
tests against real operator databases and a final wire-format stability
docs/REGISTRY_DEPLOYMENT.md +2 -1
@@ -136,7 +136,8 @@ The validation command prints JSON counts plus integrity failures for orphaned
beacons, watermarks, events, corpus rows, identity mismatches, malformed
event `extra` JSON, malformed corpus metadata JSON, duplicate or negative
tlog indexes, missing event tlog indexes, event tlog indexes outside the
-on-disk tlog size, malformed manifest JSON, invalid manifest signatures, and
+on-disk tlog size, event rows whose indexed tlog leaf carries unrelated
+evidence, malformed manifest JSON, invalid manifest signatures, and
manifest/file ID divergence. Keep the Python database as a rollback artifact
until validation, live conformance, and evidence-bundle checks pass against
the Rust service.
docs/ROADMAP.md +2 -0
@@ -246,6 +246,8 @@ manifest signatures, and manifest/file ID divergence. As of 2026-05-21, that
validation also covers event/corpus JSON sidecars and tlog index uniqueness.
As of 2026-05-22, registry writes fail closed when tlog append fails and
`--validate-db` compares event tlog indexes against the on-disk tlog size.
+As of 2026-05-24, validation also checks that each event's indexed tlog leaf
+matches the event row rather than unrelated evidence.
Remaining work: longer-running deployment tests and a wire-format stability
declaration before declaring v1.0 ready.
oversight-rust/oversight-registry/src/db.rs +131 -2
@@ -44,6 +44,7 @@ pub struct RegistryIntegrityReport {
pub negative_event_tlog_indexes: i64,
pub events_without_tlog_index: i64,
pub event_tlog_indexes_out_of_range: i64,
+ pub event_tlog_leaf_mismatches: i64,
pub tlog_size: Option<usize>,
pub malformed_manifest_json: i64,
pub invalid_manifest_signatures: i64,
@@ -232,8 +233,9 @@ pub async fn migrate_from_sqlite(
pub async fn validate_registry_integrity(
pool: &SqlitePool,
- tlog_size: Option<usize>,
+ tlog: Option<&oversight_tlog::TransparencyLog>,
) -> Result<RegistryIntegrityReport> {
+ let tlog_size = tlog.map(|log| log.size());
let counts = registry_counts(pool).await?;
let orphan_beacons = count_query(
pool,
@@ -315,6 +317,37 @@ pub async fn validate_registry_integrity(
.filter(|metadata| serde_json::from_str::<serde_json::Value>(metadata).is_err())
.count() as i64;
+ let event_rows: Vec<EventRow> = sqlx::query_as(
+ "SELECT id, token_id, file_id, recipient_id, issuer_id, kind, source_ip, user_agent, extra, timestamp, qualified_timestamp, tlog_index FROM events",
+ )
+ .fetch_all(pool)
+ .await?;
+ let mut event_tlog_leaf_mismatches = 0;
+ if let Some(log) = tlog {
+ for event in &event_rows {
+ let Some(idx) = event.tlog_index else {
+ continue;
+ };
+ if idx < 0 || idx as usize >= log.size() {
+ continue;
+ }
+ let Some(record) = log
+ .leaf_record(idx as usize)
+ .map_err(|e| RegistryError::Internal(format!("tlog leaf read failed: {e}")))?
+ else {
+ event_tlog_leaf_mismatches += 1;
+ continue;
+ };
+ let Ok(leaf) = serde_json::from_str::<serde_json::Value>(&record.leaf_data) else {
+ event_tlog_leaf_mismatches += 1;
+ continue;
+ };
+ if !event_matches_tlog_leaf(event, &leaf) {
+ event_tlog_leaf_mismatches += 1;
+ }
+ }
+ }
+
let mut malformed_manifest_json = 0;
let mut invalid_manifest_signatures = 0;
let mut mismatched_manifest_file_ids = 0;
@@ -352,6 +385,7 @@ pub async fn validate_registry_integrity(
&& negative_event_tlog_indexes == 0
&& events_without_tlog_index == 0
&& event_tlog_indexes_out_of_range == 0
+ && event_tlog_leaf_mismatches == 0
&& malformed_manifest_json == 0
&& invalid_manifest_signatures == 0
&& mismatched_manifest_file_ids == 0;
@@ -372,6 +406,7 @@ pub async fn validate_registry_integrity(
negative_event_tlog_indexes,
events_without_tlog_index,
event_tlog_indexes_out_of_range,
+ event_tlog_leaf_mismatches,
tlog_size,
malformed_manifest_json,
invalid_manifest_signatures,
@@ -379,6 +414,43 @@ pub async fn validate_registry_integrity(
})
}
+fn event_matches_tlog_leaf(event: &EventRow, leaf: &serde_json::Value) -> bool {
+ let user_agent_matches =
+ event.kind == "dns" || json_opt_str(leaf, "user_agent", event.user_agent.as_deref());
+ leaf.get("event").and_then(|v| v.as_str()) == Some("beacon")
+ && leaf.get("kind").and_then(|v| v.as_str()) == Some(event.kind.as_str())
+ && leaf.get("token_id").and_then(|v| v.as_str()) == Some(event.token_id.as_str())
+ && json_opt_str(leaf, "file_id", event.file_id.as_deref())
+ && json_opt_str(leaf, "recipient_id", event.recipient_id.as_deref())
+ && json_opt_str(leaf, "source_ip", event.source_ip.as_deref())
+ && user_agent_matches
+ && json_opt_str(leaf, "timestamp", event.qualified_timestamp.as_deref())
+ && dns_extra_matches_tlog_leaf(event, leaf)
+}
+
+fn dns_extra_matches_tlog_leaf(event: &EventRow, leaf: &serde_json::Value) -> bool {
+ if event.kind != "dns" {
+ return true;
+ }
+ let extra = event
+ .extra
+ .as_deref()
+ .and_then(|raw| serde_json::from_str::<serde_json::Value>(raw).ok())
+ .unwrap_or_else(|| serde_json::json!({}));
+ json_opt_str(leaf, "qname", extra.get("qname").and_then(|v| v.as_str()))
+ && json_opt_str(leaf, "qtype", extra.get("qtype").and_then(|v| v.as_str()))
+}
+
+fn json_opt_str(value: &serde_json::Value, key: &str, expected: Option<&str>) -> bool {
+ match expected {
+ Some(s) => value.get(key).and_then(|v| v.as_str()) == Some(s),
+ None => match value.get(key) {
+ Some(v) => v.is_null(),
+ None => true,
+ },
+ }
+}
+
async fn registry_counts(pool: &SqlitePool) -> Result<RegistryCounts> {
Ok(RegistryCounts {
manifests: count_query(pool, "SELECT COUNT(*) FROM manifests").await?,
@@ -978,12 +1050,43 @@ mod tests {
assert_eq!(report.negative_event_tlog_indexes, 0);
assert_eq!(report.events_without_tlog_index, 0);
assert_eq!(report.event_tlog_indexes_out_of_range, 0);
+ assert_eq!(report.event_tlog_leaf_mismatches, 0);
assert_eq!(report.tlog_size, None);
pool.close().await;
let _ = std::fs::remove_dir_all(dir);
}
+ #[test]
+ fn event_leaf_matching_accepts_dns_without_user_agent() {
+ let event = EventRow {
+ id: 1,
+ token_id: "token-1".into(),
+ file_id: Some("file-1".into()),
+ recipient_id: Some("recipient-1".into()),
+ issuer_id: Some("issuer-1".into()),
+ kind: "dns".into(),
+ source_ip: Some("198.51.100.10".into()),
+ user_agent: Some(String::new()),
+ extra: Some(r#"{"qname":"b.example","qtype":"A"}"#.into()),
+ timestamp: 1,
+ qualified_timestamp: Some("2026-05-24T00:00:00Z".into()),
+ tlog_index: Some(0),
+ };
+ let leaf = serde_json::json!({
+ "event": "beacon",
+ "kind": "dns",
+ "token_id": "token-1",
+ "file_id": "file-1",
+ "recipient_id": "recipient-1",
+ "source_ip": "198.51.100.10",
+ "qname": "b.example",
+ "qtype": "A",
+ "timestamp": "2026-05-24T00:00:00Z",
+ });
+ assert!(event_matches_tlog_leaf(&event, &leaf));
+ }
+
#[tokio::test]
async fn validate_registry_integrity_reports_bad_rows() {
let dir = temp_dir("validate-bad");
@@ -992,6 +1095,13 @@ mod tests {
let pool = create_pool(&db_path).await.unwrap();
run_migrations(&pool).await.unwrap();
seed_source(&pool).await;
+ let tlog = oversight_tlog::TransparencyLog::open(dir.join("tlog")).unwrap();
+ tlog.append_event(&serde_json::json!({
+ "event": "beacon",
+ "kind": "dns",
+ "token_id": "different-token",
+ }))
+ .unwrap();
sqlx::query(
"INSERT INTO manifests (file_id, recipient_id, issuer_id, issuer_ed25519_pub, manifest_json, registered_at) VALUES (?, ?, ?, ?, ?, ?)",
@@ -1059,6 +1169,22 @@ mod tests {
)
.await
.unwrap();
+ insert_event(
+ &pool,
+ "token-mismatch",
+ Some("file-1"),
+ Some("recipient-1"),
+ Some("issuer-1"),
+ "dns",
+ Some("127.0.0.1"),
+ Some("agent"),
+ Some(r#"{"qtype":"A"}"#),
+ 24,
+ Some("2026-05-24T00:00:00Z"),
+ Some(0),
+ )
+ .await
+ .unwrap();
sqlx::query(
"INSERT INTO corpus (file_id, hash_kind, hash_value, metadata, registered_at) VALUES (?, ?, ?, ?, ?)",
)
@@ -1071,7 +1197,9 @@ mod tests {
.await
.unwrap();
- let report = validate_registry_integrity(&pool, Some(1)).await.unwrap();
+ let report = validate_registry_integrity(&pool, Some(&tlog))
+ .await
+ .unwrap();
assert!(!report.ok);
assert_eq!(report.orphan_beacons, 1);
assert_eq!(report.orphan_watermarks, 1);
@@ -1084,6 +1212,7 @@ mod tests {
assert_eq!(report.negative_event_tlog_indexes, 1);
assert_eq!(report.events_without_tlog_index, 1);
assert_eq!(report.event_tlog_indexes_out_of_range, 2);
+ assert_eq!(report.event_tlog_leaf_mismatches, 1);
assert_eq!(report.tlog_size, Some(1));
pool.close().await;
oversight-rust/oversight-registry/src/main.rs +1 -1
@@ -387,7 +387,7 @@ async fn main() -> anyhow::Result<()> {
if args.validate_db {
let tlog = TransparencyLog::open(data_dir.join("tlog"))
.map_err(|e| anyhow::anyhow!("tlog validation init: {e}"))?;
- let report = db::validate_registry_integrity(&pool, Some(tlog.size()))
+ let report = db::validate_registry_integrity(&pool, Some(&tlog))
.await
.map_err(|e| anyhow::anyhow!("registry integrity validation failed: {e}"))?;
println!("{}", serde_json::to_string_pretty(&report)?);
oversight-rust/oversight-tlog/src/lib.rs +37 -4
@@ -149,10 +149,10 @@ pub fn verify_inclusion_proof(
/// On-disk leaf record format.
#[derive(Debug, Clone, Serialize, Deserialize)]
-struct LeafRecord {
- index: usize,
- leaf_hash: String,
- leaf_data: String,
+pub struct LeafRecord {
+ pub index: usize,
+ pub leaf_hash: String,
+ pub leaf_data: String,
}
/// Signed tree head.
@@ -346,6 +346,25 @@ impl TransparencyLog {
})
}
+ pub fn leaf_record(&self, index: usize) -> Result<Option<LeafRecord>> {
+ if index >= self.size() {
+ return Ok(None);
+ }
+ let f = File::open(&self.leaves_path)?;
+ let reader = BufReader::new(f);
+ for line in reader.lines() {
+ let line = line?;
+ if line.trim().is_empty() {
+ continue;
+ }
+ let rec: LeafRecord = serde_json::from_str(&line)?;
+ if rec.index == index {
+ return Ok(Some(rec));
+ }
+ }
+ Ok(None)
+ }
+
pub fn data_dir(&self) -> &Path {
&self.dir
}
@@ -481,4 +500,18 @@ mod tests {
let (_d, tl) = mktlog();
assert_eq!(tl.root(), [0u8; 32]);
}
+
+ #[test]
+ fn leaf_record_reads_appended_payload() {
+ let (_d, tl) = mktlog();
+ let event = serde_json::json!({"event": "beacon", "token_id": "token-1"});
+ tl.append_event(&event).unwrap();
+ let rec = tl.leaf_record(0).unwrap().unwrap();
+ assert_eq!(rec.index, 0);
+ assert_eq!(
+ serde_json::from_str::<serde_json::Value>(&rec.leaf_data).unwrap(),
+ serde_json::json!({"event": "beacon", "token_id": "token-1"})
+ );
+ assert!(tl.leaf_record(1).unwrap().is_none());
+ }
}