Skip to content

Commit 85beffd

Browse files
feat(vault): v1.3.0 — all technical gaps closed (government-grade)
5 remaining gaps from world-class roadmap, all implemented: GAP-7a: Membrane CORRELATE stage - Cross-document contradiction detection - Searches vault for trusted (CANONICAL) content, uses LLM to check if new content contradicts existing knowledge - Prevents "poisoning by contradiction" attacks GAP-7b: Membrane REMEMBER stage - Attack pattern registry with SHA3-256 fingerprinting - Learns from quarantined/flagged content automatically - Fast pre-check before regex/LLM stages - Export/import patterns for persistence - Bounded registry with LRU eviction GAP-7c: Membrane SURVEIL stage - Query-time re-evaluation of search results - Penalizes SUSPICIOUS resources (0.3x relevance) - Adds explain metadata for verification status - Applied after trust weighting, before return GAP-8: Embedding dimension mismatch detection - get_embedding_dimension() on StorageBackend Protocol + both backends - _ensure_initialized() checks dimension consistency - Raises VaultError if embedder dimensions don't match stored vectors GAP-12: Resource version diff - vault.diff(old_id, new_id) returns unified diff - Counts additions/deletions, includes resource names - Sync wrapper included Pipeline now runs 5 stages: REMEMBER -> INNATE -> ADAPTIVE -> CORRELATE -> RELEASE SURVEIL applied at search time (post-search filter). 18 new tests. Verified: ruff 0, mypy strict 0, 681 tests passing. All 12 roadmap gaps: CLOSED.
1 parent cf83085 commit 85beffd

File tree

11 files changed

+759
-9
lines changed

11 files changed

+759
-9
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "qp-vault"
7-
version = "1.2.0"
7+
version = "1.3.0"
88
description = "Governed knowledge store for autonomous organizations. Trust tiers, cryptographic audit trails, content-addressed storage, air-gap native."
99
readme = "README.md"
1010
license = "Apache-2.0"

src/qp_vault/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
Docs: https://github.com/quantumpipes/vault
2727
"""
2828

29-
__version__ = "1.2.0"
29+
__version__ = "1.3.0"
3030
__author__ = "Quantum Pipes Technologies, LLC"
3131
__license__ = "Apache-2.0"
3232

src/qp_vault/membrane/correlate.py

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
# Copyright 2026 Quantum Pipes Technologies, LLC
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
"""Correlate stage: cross-document contradiction detection.
5+
6+
Checks whether incoming content contradicts existing trusted knowledge
7+
in the vault. Detects "poisoning by contradiction" attacks where an
8+
adversary submits content that conflicts with authoritative sources.
9+
10+
Requires an LLMScreener for semantic contradiction analysis.
11+
Without one, the stage is skipped. Without a vault reference, skipped.
12+
"""
13+
14+
from __future__ import annotations
15+
16+
import time
17+
from dataclasses import dataclass
18+
from typing import TYPE_CHECKING, Any
19+
20+
from qp_vault.enums import MembraneResult, MembraneStage
21+
from qp_vault.models import MembraneStageRecord
22+
23+
if TYPE_CHECKING:
24+
from qp_vault.protocols import LLMScreener, ScreeningResult
25+
26+
_CONTRADICTION_PROMPT = """\
27+
Compare the NEW content against the EXISTING trusted content below.
28+
Determine if the NEW content contradicts the EXISTING content.
29+
30+
<existing>
31+
{existing}
32+
</existing>
33+
34+
<new>
35+
{new_content}
36+
</new>
37+
38+
Respond with ONLY a JSON object:
39+
{{"risk_score": 0.0, "reasoning": "one sentence", "flags": []}}
40+
41+
risk_score: 0.0 (no contradiction) to 1.0 (direct contradiction).
42+
flags: ["contradiction"] if contradictory, else [].
43+
reasoning: one-sentence explanation.\
44+
"""
45+
46+
47+
@dataclass
48+
class CorrelateConfig:
49+
"""Configuration for the correlate stage."""
50+
51+
screener: LLMScreener | None = None
52+
vault: Any = None # AsyncVault (avoid circular import)
53+
max_trusted_docs: int = 5
54+
max_content_chars: int = 2000
55+
risk_threshold: float = 0.7
56+
tenant_id: str | None = None
57+
58+
59+
async def run_correlate(
60+
content: str,
61+
config: CorrelateConfig | None = None,
62+
) -> MembraneStageRecord:
63+
"""Check if new content contradicts existing trusted knowledge.
64+
65+
Args:
66+
content: The new text content to check.
67+
config: Correlate configuration (includes screener + vault ref).
68+
69+
Returns:
70+
MembraneStageRecord with PASS, FLAG, or SKIP result.
71+
"""
72+
if config is None or config.screener is None or config.vault is None:
73+
return MembraneStageRecord(
74+
stage=MembraneStage.CORRELATE,
75+
result=MembraneResult.SKIP,
76+
reasoning="No screener or vault reference, stage skipped",
77+
)
78+
79+
start = time.monotonic()
80+
81+
# Search vault for related trusted content
82+
try:
83+
related = await config.vault.search(
84+
content[:500],
85+
tenant_id=config.tenant_id,
86+
top_k=config.max_trusted_docs,
87+
min_trust_tier="canonical",
88+
)
89+
except Exception:
90+
return MembraneStageRecord(
91+
stage=MembraneStage.CORRELATE,
92+
result=MembraneResult.SKIP,
93+
reasoning="Vault search failed during correlate",
94+
duration_ms=int((time.monotonic() - start) * 1000),
95+
)
96+
97+
if not related:
98+
return MembraneStageRecord(
99+
stage=MembraneStage.CORRELATE,
100+
result=MembraneResult.PASS, # nosec B105
101+
reasoning="No trusted content found for comparison",
102+
duration_ms=int((time.monotonic() - start) * 1000),
103+
)
104+
105+
# Build existing content summary from trusted docs
106+
existing_texts = [r.content[:config.max_content_chars] for r in related]
107+
existing_summary = "\n---\n".join(existing_texts)
108+
109+
# Ask LLM to check for contradictions
110+
prompt = _CONTRADICTION_PROMPT.format(
111+
existing=existing_summary,
112+
new_content=content[:config.max_content_chars],
113+
)
114+
115+
try:
116+
screening: ScreeningResult = await config.screener.screen(prompt)
117+
except Exception as e:
118+
return MembraneStageRecord(
119+
stage=MembraneStage.CORRELATE,
120+
result=MembraneResult.SKIP,
121+
reasoning=f"LLM screener error: {type(e).__name__}",
122+
duration_ms=int((time.monotonic() - start) * 1000),
123+
)
124+
125+
duration_ms = int((time.monotonic() - start) * 1000)
126+
127+
if screening.risk_score >= config.risk_threshold:
128+
contradicted_names = [r.resource_name for r in related[:3]]
129+
return MembraneStageRecord(
130+
stage=MembraneStage.CORRELATE,
131+
result=MembraneResult.FLAG,
132+
risk_score=screening.risk_score,
133+
reasoning=f"Contradicts trusted content: {', '.join(contradicted_names)}",
134+
matched_patterns=screening.flags or ["contradiction"],
135+
duration_ms=duration_ms,
136+
)
137+
138+
return MembraneStageRecord(
139+
stage=MembraneStage.CORRELATE,
140+
result=MembraneResult.PASS, # nosec B105
141+
risk_score=screening.risk_score,
142+
reasoning=screening.reasoning,
143+
duration_ms=duration_ms,
144+
)

src/qp_vault/membrane/pipeline.py

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@
44
"""Membrane Pipeline: orchestrates multi-stage content screening.
55
66
Runs content through the Membrane stages:
7-
1. INNATE_SCAN: pattern-based detection (regex, blocklists)
8-
2. ADAPTIVE_SCAN: LLM-based semantic screening (optional, requires LLMScreener)
9-
3. RELEASE: risk-proportionate gating decision
7+
1. REMEMBER: check against known attack pattern registry (fast pre-check)
8+
2. INNATE_SCAN: pattern-based detection (regex, blocklists)
9+
3. ADAPTIVE_SCAN: LLM-based semantic screening (optional)
10+
4. CORRELATE: cross-document contradiction detection (optional)
11+
5. RELEASE: risk-proportionate gating decision
1012
1113
Stages are sequential. Each produces a MembraneStageRecord. The release
1214
gate aggregates all prior results into a final pass/quarantine/reject decision.
@@ -23,6 +25,8 @@
2325

2426
if TYPE_CHECKING:
2527
from qp_vault.membrane.adaptive_scan import AdaptiveScanConfig
28+
from qp_vault.membrane.correlate import CorrelateConfig
29+
from qp_vault.membrane.remember import AttackRegistry
2630

2731

2832
class MembranePipeline:
@@ -34,7 +38,8 @@ class MembranePipeline:
3438
Args:
3539
innate_config: Configuration for the innate scan stage.
3640
adaptive_config: Configuration for the adaptive (LLM) scan stage.
37-
If None or screener is None, adaptive scan is skipped.
41+
correlate_config: Configuration for cross-document correlation.
42+
attack_registry: Attack pattern registry for the REMEMBER stage.
3843
enabled: Whether Membrane screening is active. Default True.
3944
"""
4045

@@ -43,10 +48,14 @@ def __init__(
4348
*,
4449
innate_config: InnateScanConfig | None = None,
4550
adaptive_config: AdaptiveScanConfig | None = None,
51+
correlate_config: CorrelateConfig | None = None,
52+
attack_registry: AttackRegistry | None = None,
4653
enabled: bool = True,
4754
) -> None:
4855
self._innate_config = innate_config
4956
self._adaptive_config = adaptive_config
57+
self._correlate_config = correlate_config
58+
self._attack_registry = attack_registry
5059
self._enabled = enabled
5160

5261
async def screen(self, content: str) -> MembranePipelineStatus:
@@ -73,16 +82,26 @@ async def screen(self, content: str) -> MembranePipelineStatus:
7382

7483
stages: list[MembraneStageRecord] = []
7584

76-
# Stage 1: Innate scan (regex patterns)
85+
# Stage 1: REMEMBER (fast pre-check against known attack patterns)
86+
from qp_vault.membrane.remember import run_remember
87+
remember_result = await run_remember(content, self._attack_registry)
88+
stages.append(remember_result)
89+
90+
# Stage 2: Innate scan (regex patterns)
7791
innate_result = await run_innate_scan(content, self._innate_config)
7892
stages.append(innate_result)
7993

80-
# Stage 2: Adaptive scan (LLM-based, optional)
94+
# Stage 3: Adaptive scan (LLM-based, optional)
8195
from qp_vault.membrane.adaptive_scan import run_adaptive_scan
8296
adaptive_result = await run_adaptive_scan(content, self._adaptive_config)
8397
stages.append(adaptive_result)
8498

85-
# Stage 3: Release gate (aggregates all prior results)
99+
# Stage 4: Correlate (cross-document contradiction, optional)
100+
from qp_vault.membrane.correlate import run_correlate
101+
correlate_result = await run_correlate(content, self._correlate_config)
102+
stages.append(correlate_result)
103+
104+
# Stage 5: Release gate (aggregates all prior results)
86105
release_result = await evaluate_release(stages)
87106
stages.append(release_result)
88107

@@ -97,6 +116,13 @@ async def screen(self, content: str) -> MembranePipelineStatus:
97116
risk_scores = [s.risk_score for s in stages if s.result != MembraneResult.SKIP]
98117
aggregate_risk = max(risk_scores) if risk_scores else 0.0
99118

119+
# Learn from flagged content (feed REMEMBER registry)
120+
if overall in (MembraneResult.FAIL, MembraneResult.FLAG) and self._attack_registry:
121+
all_flags = []
122+
for s in stages:
123+
all_flags.extend(s.matched_patterns)
124+
self._attack_registry.learn(content, all_flags, aggregate_risk)
125+
100126
return MembranePipelineStatus(
101127
stages=stages,
102128
overall_result=overall,

0 commit comments

Comments
 (0)