| @@ -1,5 +1,23 @@ | ||
| # Oversight CHANGELOG | ||
| + | ## Unreleased - 2026-04-20 security hardening | |
| + | ||
| + | - `oversight_core/container.py`: `max_opens` now increments only after a | |
| + | successful decrypt, and unsafe `seal_multi()` is disabled until the | |
| + | manifest format can honestly represent multiple recipients. | |
| + | - `oversight_core/policy.py`: `LOCAL_ONLY` counter locking now works on | |
| + | Windows, and `REGISTRY` / `HYBRID` fail closed instead of silently using | |
| + | local state. | |
| + | - `oversight_core/rekor.py`: offline verification now rejects DSSE envelopes | |
| + | whose subject digest does not match the expected content hash. | |
| + | - `registry/server.py`: Rekor attestations now use real watermark mark IDs | |
| + | and the manifest's actual `content_hash`. | |
| + | - `oversight_core/formats/text.py`: text adapter now applies L3 before L2/L1, | |
| + | matching the core watermark pipeline. | |
| + | - Added focused regression coverage in `tests/test_policy_unit.py`, | |
| + | `tests/test_registry_unit.py`, `tests/test_rekor_unit.py`, and | |
| + | `tests/test_text_format_unit.py`. | |
| + | ||
| ## v0.5.0 - 2026-04-19 | ||
| First release with public-Rekor attestations. Now hosted at |
| @@ -79,6 +79,10 @@ The seal command applies three watermark layers to the document, each targeting | ||
| Then it encrypts to the recipient's X25519 public key, timestamps via RFC 3161, logs to the Merkle tree, and writes the `.sealed` file plus a `.fingerprint.json` sidecar for the content fingerprint database. | ||
| + | Oversight currently emits one sealed file per recipient. Multi-recipient | |
| + | sealing is intentionally disabled until the manifest format can bind | |
| + | multiple recipients without weakening attribution evidence. | |
| + | ||
| ### What happens when you attribute | ||
| The attribute command runs a 5-phase pipeline: | ||
| @@ -99,6 +103,15 @@ The attribute command runs a 5-phase pipeline: | ||
| See `CHANGELOG.md` for full version history. | ||
| + | ## Security hardening | |
| + | ||
| + | - `max_opens` now counts only successful recipient decryptions, not failed key guesses. | |
| + | - `LOCAL_ONLY` open counters now work on Windows as well as POSIX hosts. | |
| + | - `REGISTRY` and `HYBRID` policy modes fail closed instead of silently falling back to local counters. | |
| + | - Rekor offline verification now checks the attested digest against the expected content hash. | |
| + | - Registry Rekor attestations now index by real watermark mark IDs and the manifest's actual `content_hash`. | |
| + | - Multi-recipient sealing is disabled until a recipient-honest manifest format lands. | |
| + | ||
| ## Repository layout | ||
| ``` | ||
| @@ -192,7 +205,9 @@ bash oversight-rust/tests/conformance_cross_lang.sh | ||
| - **Human paraphrasing defeats watermarks.** Someone who reads the document and rewrites it in their own words leaves no trace. Fundamental, not an engineering gap. | ||
| - **Beacons fire only when the reader has network access.** Airgapped readers leave no callback. L3 semantic watermarking is the attribution path for that case. | ||
| - | - **Our Merkle transparency log isn't RFC 6962 compliant** (uses promote-odd-trailing, not left-heavy split). Self-consistent but won't verify against Sigstore tooling. Planned migration to Rekor v2 in v0.4. | |
| + | - **The local Python Merkle transparency log is still not a full Sigstore-compatible substitute.** | |
| + | Public-log interoperability is now via Rekor DSSE attestations; the local log remains | |
| + | a lightweight registry integrity mechanism, not a drop-in replacement for Rekor. | |
| - **No independent security audit yet.** Planned for 2027. Until then: user-beware, cryptographer-review welcome. Open an issue. | ||
| - **Rust port is core-only.** ~1,500 LOC ported. The remaining ~5,500 LOC (semantic dictionary, format adapters, registry server, integrations) is multi-release scope. Python is still the canonical reference. | ||
| @@ -247,30 +247,16 @@ def seal_multi( | ||
| recipient_x25519_pubs: list[bytes], | ||
| ) -> bytes: | ||
| """ | ||
| - | Seal a single file for multiple recipients. Each recipient gets a unique | |
| - | wrap of the same DEK. See top-of-module docstring for attribution notes. | |
| + | Multi-recipient sealing is intentionally disabled. | |
| + | ||
| + | The v1 manifest binds a single recipient identity and public key into the | |
| + | issuer-signed metadata. Reusing that manifest across multiple recipient key | |
| + | slots produces containers that decrypt for several recipients while still | |
| + | claiming only one recipient in signed evidence, which is unsafe for | |
| + | attribution. Callers must currently emit one sealed file per recipient | |
| + | until the wire format grows an explicit multi-recipient manifest. | |
| """ | ||
| - | if manifest.content_hash != crypto.content_hash(plaintext): | |
| - | raise ValueError("manifest.content_hash does not match sha256(plaintext)") | |
| - | if manifest.size_bytes != len(plaintext): | |
| - | raise ValueError("manifest.size_bytes does not match len(plaintext)") | |
| - | if len(recipient_x25519_pubs) < 1: | |
| - | raise ValueError("need at least one recipient") | |
| - | if len(issuer_ed25519_priv) != 32: | |
| - | raise ValueError(f"issuer priv key must be 32 bytes, got {len(issuer_ed25519_priv)}") | |
| - | for i, pub in enumerate(recipient_x25519_pubs): | |
| - | if len(pub) != 32: | |
| - | raise ValueError(f"recipient[{i}] pubkey must be 32 bytes, got {len(pub)}") | |
| - | ||
| - | manifest.sign(issuer_ed25519_priv) | |
| - | dek = crypto.random_dek() | |
| - | slots = [crypto.wrap_dek_for_recipient(dek, pub) for pub in recipient_x25519_pubs] | |
| - | aad = manifest.content_hash.encode("ascii") | |
| - | nonce, ct = crypto.aead_encrypt(dek, plaintext, aad=aad) | |
| - | sf = SealedFile( | |
| - | manifest=manifest, | |
| - | wrapped_dek={"slots": slots}, | |
| - | aead_nonce=nonce, | |
| - | ciphertext=ct, | |
| + | raise ValueError( | |
| + | "seal_multi is disabled because the v1 manifest only supports a single " | |
| + | "recipient binding; seal one file per recipient instead" | |
| ) | ||
| - | return sf.to_bytes() |
| @@ -15,14 +15,18 @@ from .. import watermark, semantic | ||
| def apply(text: str, mark_id: bytes, layers: tuple[str, ...] = ("L1", "L2", "L3")) -> str: | ||
| - | """Apply all requested watermark layers to UTF-8 text.""" | |
| + | """Apply all requested watermark layers to UTF-8 text. | |
| + | ||
| + | Layer order matters: L3 rewrites visible words, so it must run before the | |
| + | L2/L1 steganographic layers that append whitespace and zero-width chars. | |
| + | """ | |
| t = text | ||
| - | if "L1" in layers: | |
| - | t = watermark.embed_zw(t, mark_id) | |
| - | if "L2" in layers: | |
| - | t = watermark.embed_ws(t, mark_id) | |
| if "L3" in layers: | ||
| t = semantic.apply_semantic(t, mark_id) | ||
| + | if "L2" in layers: | |
| + | t = watermark.embed_ws(t, mark_id) | |
| + | if "L1" in layers: | |
| + | t = watermark.embed_zw(t, mark_id) | |
| return t | ||
| @@ -9,7 +9,7 @@ Covers: | ||
| 4. PDF metadata marks | ||
| 5. DOCX metadata marks | ||
| 6. Seal + open (single recipient) | ||
| - | 7. Multi-recipient seal | |
| + | 7. Multi-recipient seal fails closed | |
| 8. Policy enforcement (not_after expired) | ||
| 9. Policy enforcement (max_opens counter) | ||
| 10. Semantic watermark verification (airgap-strip survivor) | ||
| @@ -209,26 +209,16 @@ def main(): | ||
| fail("recovered plaintext mismatch") | ||
| ok(f"seal/open round-trip OK ({len(blob)} bytes)") | ||
| - | banner("8. Multi-recipient seal") | |
| + | banner("8. Multi-recipient seal fails closed") | |
| m2 = Manifest.new("multi.txt", content_hash(plaintext), len(plaintext), "acme", issuer.ed25519_pub.hex(), rec, "http://localhost:8765", "text/plain") | ||
| - | multi_blob = seal_multi( | |
| - | plaintext, m2, issuer.ed25519_priv, | |
| - | [alice.x25519_pub, bob.x25519_pub, carol.x25519_pub], | |
| - | ) | |
| - | ok(f"multi-recipient blob = {len(multi_blob)} bytes") | |
| - | # Each recipient can decrypt | |
| - | for name, ident in [("alice", alice), ("bob", bob), ("carol", carol)]: | |
| - | pt, _ = open_sealed(multi_blob, ident.x25519_priv) | |
| - | if pt != plaintext: | |
| - | fail(f"multi-recipient decrypt FAILED for {name}") | |
| - | ok(f" {name} decrypts multi-recipient blob") | |
| - | # Stranger cannot | |
| - | stranger = ClassicIdentity.generate() | |
| try: | ||
| - | open_sealed(multi_blob, stranger.x25519_priv) | |
| - | fail("stranger should NOT have been able to decrypt multi-recipient blob") | |
| + | seal_multi( | |
| + | plaintext, m2, issuer.ed25519_priv, | |
| + | [alice.x25519_pub, bob.x25519_pub, carol.x25519_pub], | |
| + | ) | |
| + | fail("seal_multi should be disabled until the manifest can bind multiple recipients") | |
| except Exception as e: | ||
| - | ok(f"stranger correctly rejected: {type(e).__name__}") | |
| + | ok(f"multi-recipient seal correctly rejected: {type(e).__name__}") | |
| banner("9. Policy: not_after (expired)") | ||
| expired_m = Manifest.new("exp.txt", content_hash(plaintext), len(plaintext), "acme", issuer.ed25519_pub.hex(), rec, "http://localhost:8765") |
| @@ -0,0 +1,41 @@ | ||
| + | """ | |
| + | test_text_format_unit | |
| + | ===================== | |
| + | ||
| + | Focused checks for the text format adapter's layer ordering. | |
| + | """ | |
| + | from __future__ import annotations | |
| + | ||
| + | import os | |
| + | import sys | |
| + | ||
| + | ROOT = os.path.join(os.path.dirname(__file__), "..") | |
| + | sys.path.insert(0, ROOT) | |
| + | ||
| + | from oversight_core import watermark | |
| + | from oversight_core.formats import text as text_format | |
| + | ||
| + | ||
| + | def t1_text_adapter_matches_core_order(): | |
| + | original = ( | |
| + | "We begin to show how this is significant and we must help users find answers.\n" | |
| + | "A second paragraph helps the semantic watermark choose visible variants." | |
| + | ) | |
| + | mark_id = watermark.new_mark_id() | |
| + | via_adapter = text_format.apply(original, mark_id) | |
| + | via_core = watermark.apply_all(original, mark_id) | |
| + | assert via_adapter == via_core, "text adapter diverged from core watermark order" | |
| + | print(" [PASS] text adapter applies L3/L2/L1 in the same order as the core pipeline") | |
| + | ||
| + | ||
| + | def main(): | |
| + | print("=" * 60) | |
| + | print(" oversight_core.formats.text - focused unit tests") | |
| + | print("=" * 60) | |
| + | t1_text_adapter_matches_core_order() | |
| + | print() | |
| + | print(" ALL TESTS PASSED - 1/1") | |
| + | ||
| + | ||
| + | if __name__ == "__main__": | |
| + | main() |