From 75ab721a190914bce1f73af1137e46123b9e3211 Mon Sep 17 00:00:00 2001 From: Patrick Roland Date: Mon, 8 Jun 2026 19:42:53 -0500 Subject: [PATCH 01/24] feat(osint): add passive executor and resolver scoping --- docs/how-to/passive-osint-enrichment.md | 70 ++++ docs/rfcs/RFC-016-osint-layer.md | 24 +- mkdocs.yml | 1 + src/zettelforge/osint/__init__.py | 48 ++- .../infrastructure/bgp_collector.py | 9 +- src/zettelforge/osint/entity_resolver.py | 143 ++++++- src/zettelforge/osint/executor.py | 369 ++++++++++++++++++ tests/test_osint_entity_resolver.py | 79 ++++ tests/test_osint_executor.py | 197 ++++++++++ 9 files changed, 894 insertions(+), 46 deletions(-) create mode 100644 docs/how-to/passive-osint-enrichment.md create mode 100644 src/zettelforge/osint/executor.py create mode 100644 tests/test_osint_entity_resolver.py create mode 100644 tests/test_osint_executor.py diff --git a/docs/how-to/passive-osint-enrichment.md b/docs/how-to/passive-osint-enrichment.md new file mode 100644 index 0000000..9886756 --- /dev/null +++ b/docs/how-to/passive-osint-enrichment.md @@ -0,0 +1,70 @@ +--- +title: "Passive OSINT Enrichment" +description: "Run the RFC-016 OSINT executor, persist collector output into the knowledge graph, and keep active scanning gated behind explicit opt-in." +diataxis_type: "how-to" +audience: "Python developers and CTI operators using ZettelForge" +tags: [osint, enrichment, knowledge-graph, dns, whois, bgp, passive-recon] +last_updated: "2026-06-08" +version: "2.7.0" +--- + +# Passive OSINT Enrichment + +ZettelForge ships a passive OSINT executor that runs registered collectors, validates each tuple against the ontology, canonicalizes entity values, and persists nodes and edges into the knowledge graph. + +## Install + +Install the OSINT extra when you need DNS, WHOIS, or IP/ASN enrichment: + +```bash +pip install "zettelforge[osint]" +``` + +The base package already includes `httpx`, so passive BGPView lookups work with the standard install. The OSINT extra adds `dnspython`, `python-whois`, and `ipwhois` for the DNS and WHOIS collectors. + +## Run a passive enrichment + +```python +from zettelforge.osint import run_osint_collection + +result = run_osint_collection("DomainName", "Example.COM.") +print(result.canonical_input_value) # example.com +print(result.persisted_count) +print(result.error_count) +``` + +The executor accepts these seed types: `DomainName`, `IPv4Address`, `IPv6Address`, `ASNumber`, and `Netblock`. + +Common passive flows: + +- `DomainName` seeds drive DNS, WHOIS, and certificate transparency collectors. +- `IPv4Address` and `IPv6Address` seeds drive WHOIS enrichment. +- `ASNumber` seeds drive passive BGP prefix lookups. + +## Dry-run or narrow scope + +Pass `persist=False` to validate collector output without writing to the knowledge graph: + +```python +from zettelforge.osint import run_osint_collection + +result = run_osint_collection("ASNumber", "AS15169", persist=False) +``` + +Use `collector_names=(...)` when you want to run only specific collectors from the registry. + +## Safety controls + +Active port scanning stays disabled unless the operator explicitly enables it: + +```bash +export ZETTELFORGE_OSINT_ACTIVE_SCAN=1 +``` + +Without that flag, the port scanner returns an empty result and no probe is sent. Keep that flag unset unless you own the target network or have explicit authorization to scan it. + +## What gets persisted + +The executor writes canonical entity values to the graph, so duplicate spellings collapse onto the same nodes. For example, `AS15169` and `15169` resolve to the same canonical ASN node, and alternate domain spellings register as aliases for the same canonical domain node. + +Errors are collected per collector or tuple. A single failing collector does not abort the run. diff --git a/docs/rfcs/RFC-016-osint-layer.md b/docs/rfcs/RFC-016-osint-layer.md index 1219d19..3b5ffa5 100644 --- a/docs/rfcs/RFC-016-osint-layer.md +++ b/docs/rfcs/RFC-016-osint-layer.md @@ -1,8 +1,8 @@ # RFC-016: ZettelForge OSINT Layer -## Status (2026-04-28) +## Status (2026-06-08) -**Phase 1 (Infrastructure): functional. Phases 2-5: declared, collectors stubbed.** +**Phase 1 (Infrastructure): functional. Phase 1.5: executor, resolver wiring, and passive BGP lookup shipped. Phases 2-5 remain declared/stubbed.** Branch: `rfc/osint-layer-scaffold` (PR #147 amended). What ships: @@ -12,15 +12,17 @@ What ships: validates against. Auto-merged into the global ``ENTITY_TYPES`` / ``RELATION_TYPES`` at import time. - **Phase 1 collectors (functional)** — ``dns_collector`` (A/AAAA/NS/MX), - ``whois_collector`` (domain + IP RDAP), ``cert_collector`` (crt.sh). - All sync, all mocked in tests, no network at test time. -- **Phase 1.5 + 2-5 collectors (stubs)** — ``bgp_collector``, - ``port_scanner`` (gated behind ``ZETTELFORGE_OSINT_ACTIVE_SCAN``), - Phase 2-4 collectors (Hunter, Holehe, Namechk, Wappalyzer, BuiltWith, - Twitter, Hashtag, HIBP, Breach Directory). Each registers metadata so - discovery works; each returns ``[]`` until its API integration lands. -- **Tests** — 67 new mocked tests covering entity validation, edge - validation, canonicalization, and collector shape. + ``whois_collector`` (domain + IP RDAP), ``cert_collector`` (crt.sh), plus + the passive ``bgp_collector`` for ASN prefix lookups. All sync, all mocked + in tests, no network at test time. +- **Phase 1.5 executor + resolver wiring** — ``run_osint_collection`` drives + registry discovery, ontology validation, canonicalization, and KG + persistence through ``entity_resolver`` and ``KnowledgeGraph``. + ``port_scanner`` remains gated behind ``ZETTELFORGE_OSINT_ACTIVE_SCAN``; + later Phase 2-4 collectors still register as stubs until their API + integrations land. +- **Tests** — Focused mocked coverage for entity validation, edge + validation, canonicalization, executor ingest, and passive BGP lookup. - **Investigation / EntityResolver** — Phase 4 / 1.5 utility scaffolds. Three deviations from the literal text of this RFC are tracked in diff --git a/mkdocs.yml b/mkdocs.yml index 5926004..1f099d6 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -67,6 +67,7 @@ nav: - Query APT Tools: how-to/query-apt-tools.md - Reproduce Benchmarks: how-to/reproduce-benchmarks.md - Resolve Aliases: how-to/resolve-aliases.md + - Passive OSINT Enrichment: how-to/passive-osint-enrichment.md - Run Temporal Query: how-to/run-temporal-query.md - Set Up MCP Server: how-to/set-up-mcp-server.md - Store Threat Actor: how-to/store-threat-actor.md diff --git a/src/zettelforge/osint/__init__.py b/src/zettelforge/osint/__init__.py index 59ed5c3..5ef3356 100644 --- a/src/zettelforge/osint/__init__.py +++ b/src/zettelforge/osint/__init__.py @@ -6,22 +6,39 @@ ``TRANSFORM_REGISTRY`` at module load time. Phase 1 (Infrastructure) ships functional collectors (DNS, WHOIS, crt.sh) -plus stubs for BGP and port scanning. Phases 2-5 ship as graceful stubs: -each collector registers its metadata so callers can discover it, and -returns ``[]`` until the underlying API integration lands. +plus the passive BGP collector. Phase 1.5 adds the OSINT executor and +resolver wiring; active port scanning remains gated behind explicit +operator opt-in. Later collectors stay as graceful stubs until their +respective phases land. Public surface: -- ``OSINT_ENTITY_TYPES`` / ``OSINT_RELATION_TYPES`` / ``ONTOLOGY`` — additive +- ``OSINT_ENTITY_TYPES`` / ``OSINT_RELATION_TYPES`` / ``ONTOLOGY`` -- additive ontology declarations. -- ``TRANSFORM_REGISTRY`` — the singleton registry. -- ``CollectorTuple`` — collector return-row shape. -- ``TransformMetadata`` / ``TransformRegistry`` — types for adding new +- ``TRANSFORM_REGISTRY`` -- the singleton registry. +- ``CollectorTuple`` -- collector return-row shape. +- ``TransformMetadata`` / ``TransformRegistry`` -- types for adding new collectors. -- ``Investigation`` / ``EntityResolver`` — Phase 4 / Phase 1.5 utilities - (re-exported from their modules). +- ``add_resolved`` / ``canonicalise_value`` / ``resolve`` -- entity + resolver helpers. +- ``run_osint_collection`` / ``collect_osint`` -- the passive ingest API. """ +from zettelforge.osint.entity_resolver import ( + add_resolved, + canonicalise_organization, + canonicalise_value, + register_alias, + resolve, +) +from zettelforge.osint.executor import ( + SUPPORTED_SEED_TYPES, + OSINTCollectionResult, + OSINTExecutionError, + PersistedOSINTTuple, + collect_osint, + run_osint_collection, +) from zettelforge.osint.ontology import ( ONTOLOGY, OSINT_ENTITY_TYPES, @@ -46,7 +63,7 @@ ) # Merge OSINT types into the global ontology before any collector runs. -# Idempotent — safe under repeated imports (pytest, REPL re-imports, etc.). +# Idempotent -- safe under repeated imports (pytest, REPL re-imports, etc.). merge_into_global_ontology() # Trigger collector self-registration. Each subpackage's __init__ imports @@ -62,11 +79,18 @@ "ONTOLOGY", "OSINT_ENTITY_TYPES", "OSINT_RELATION_TYPES", + "SUPPORTED_SEED_TYPES", "TRANSFORM_REGISTRY", "CollectorFn", "CollectorTuple", + "OSINTCollectionResult", + "OSINTExecutionError", + "PersistedOSINTTuple", "TransformMetadata", "TransformRegistry", + "add_resolved", + "canonicalise_organization", + "canonicalise_value", "canonicalize_asn", "canonicalize_cidr", "canonicalize_domain", @@ -75,6 +99,10 @@ "canonicalize_port", "canonicalize_url", "canonicalize_web_title", + "collect_osint", "get_transform_registry", "merge_into_global_ontology", + "register_alias", + "resolve", + "run_osint_collection", ] diff --git a/src/zettelforge/osint/collectors/infrastructure/bgp_collector.py b/src/zettelforge/osint/collectors/infrastructure/bgp_collector.py index 05e69db..cb12e00 100644 --- a/src/zettelforge/osint/collectors/infrastructure/bgp_collector.py +++ b/src/zettelforge/osint/collectors/infrastructure/bgp_collector.py @@ -1,14 +1,13 @@ """ -BGP collector — Phase 1.5 stub (RFC-016 §5). +BGP collector — Phase 1.5 passive enrichment (RFC-016 §5). Fetches ASN / netblock data from BGPView's public JSON API. Sync; matches the rest of the codebase. Returns ``[]`` on any HTTP / parse failure or when ``httpx`` is unavailable; the agent sees an empty result, never an exception. -The Phase 1 PR ships this with the registration metadata and the live -BGPView call wired up but treated as best-effort. Hardening (retry / caching -/ rate-limit budget) lands with Phase 1.5. +This collector is intentionally passive and best-effort. Hardening (retry / +caching / rate-limit budget) is intentionally deferred. """ from __future__ import annotations @@ -114,7 +113,7 @@ def collect(input_entity_type: str, input_value: str) -> list[CollectorTuple]: _METADATA = TransformMetadata( name="bgp_collector", - description="BGPView lookup: enumerate netblocks announced by an ASN.", + description="Passive BGPView lookup: enumerate netblocks announced by an ASN.", input_types=("ASNumber",), output_types=(("Netblock", "part_of_as"),), api_dependencies=("bgpview",), diff --git a/src/zettelforge/osint/entity_resolver.py b/src/zettelforge/osint/entity_resolver.py index 2aeefdc..6c53907 100644 --- a/src/zettelforge/osint/entity_resolver.py +++ b/src/zettelforge/osint/entity_resolver.py @@ -1,15 +1,16 @@ """ -OSINT Entity Resolver — canonical key normalisation and alias index. +OSINT Entity Resolver -- canonical key normalisation and alias index. Phase 1.5 utility scaffold. Provides merge-when-duplicate semantics for the OSINT layer: - Canonical key: ``(entity_type, normalised_value)`` -- Alias index: alternate representations → canonical node ID +- Alias index: alternate representations -> canonical node ID - Merge strategy: LWW for properties, accumulate for edges -Phase 1 collectors don't yet route through this module; the Phase 1.5 -work that wires it in lives behind the same RFC-016 umbrella. +The passive OSINT executor wires these helpers into KG ingest; later +workflow orchestration can reuse the same canonicalisation and aliasing +logic. Note on canonical key conventions: this module mirrors the conventions in ``zettelforge.osint.ontology`` (``DomainName`` not ``Domain``, @@ -28,6 +29,10 @@ canonicalize_asn, canonicalize_cidr, canonicalize_domain, + canonicalize_mx, + canonicalize_port, + canonicalize_url, + canonicalize_web_title, ) if TYPE_CHECKING: @@ -86,7 +91,7 @@ def canonicalise_asn(value: str) -> str: Mirrors :func:`zettelforge.osint.ontology.canonicalize_asn`. Accepts ``AS12345``, ``as12345``, ``"12345"``, or an int. Raises - ``ValueError`` on hex (``0x3039``) or other non-decimal input — + ``ValueError`` on hex (``0x3039``) or other non-decimal input -- earlier scaffold versions silently stripped non-digits which turned ``0x3039`` into ``0339`` instead of the hex value 12345. """ @@ -98,28 +103,68 @@ def canonicalise_netblock(value: str) -> str: return canonicalize_cidr(value) -# Global alias index: canonical_key → node_id -_ALIAS_INDEX: dict[str, str] = {} -# Reverse: alternate representation → canonical key -_ALIAS_REVERSE: dict[str, str] = {} +def canonicalise_url(value: str) -> str: + """Return the canonical URL form used by ``URL`` and ``Website`` nodes.""" + return canonicalize_url(value) + + +def canonicalise_organization(value: str) -> str: + """Return a stable canonical form for organization names. + + WHOIS and RDAP sources vary in case and spacing. Normalising to a + lowercased, whitespace-collapsed form keeps duplicates from being + reinserted under cosmetic variations. + """ + return re.sub(r"\s+", " ", value).strip().casefold() -def resolve(entity_type: str, value: str) -> str | None: - """Return the canonical key for an entity if already indexed, else None.""" +# Global alias index: canonical_key -> node_id. This bucket is the default +# scope used when callers do not pass a KnowledgeGraph explicitly. +_ALIAS_INDEX: dict[str, str] = {} +# Reverse: alternate representation -> canonical key +_ALIAS_REVERSE: dict[str, str] = {} +_KG_ALIAS_INDEX_ATTR = "_osint_alias_index" +_KG_ALIAS_REVERSE_ATTR = "_osint_alias_reverse" + + +def _alias_maps(kg: KnowledgeGraph | None) -> tuple[dict[str, str], dict[str, str]]: + if kg is None: + return _ALIAS_INDEX, _ALIAS_REVERSE + index = getattr(kg, _KG_ALIAS_INDEX_ATTR, None) + if index is None: + index = {} + setattr(kg, _KG_ALIAS_INDEX_ATTR, index) + reverse = getattr(kg, _KG_ALIAS_REVERSE_ATTR, None) + if reverse is None: + reverse = {} + setattr(kg, _KG_ALIAS_REVERSE_ATTR, reverse) + return index, reverse + + +def resolve(entity_type: str, value: str, *, kg: KnowledgeGraph | None = None) -> str | None: + """Return the canonical node ID for an entity if already indexed, else None.""" canonical = _canonical_key(entity_type, value) - return _ALIAS_INDEX.get(canonical) + alias_index, _ = _alias_maps(kg) + return alias_index.get(canonical) -def register_alias(canonical_key: str, alternate: str, node_id: str) -> None: +def register_alias( + canonical_key: str, + alternate: str, + node_id: str, + *, + kg: KnowledgeGraph | None = None, +) -> None: """Record an alternate representation that maps to the canonical node.""" - _ALIAS_REVERSE[alternate] = canonical_key - _ALIAS_INDEX[canonical_key] = node_id + alias_index, alias_reverse = _alias_maps(kg) + alias_reverse[alternate] = canonical_key + alias_index[canonical_key] = node_id def _canonical_key(entity_type: str, value: str) -> str: """Build a normalised ``":"`` string. - The entity type names match the global ``ENTITY_TYPES`` table — + The entity type names match the global ``ENTITY_TYPES`` table -- ``DomainName`` and not ``Domain``. Unknown types fall through to a whitespace-trimmed value so callers don't have to special-case every possible type. @@ -128,17 +173,54 @@ def _canonical_key(entity_type: str, value: str) -> str: return f"{entity_type}:{canonicalise_ipv4(value)}" if entity_type == "IPv6Address": return f"{entity_type}:{ipaddress.IPv6Address(value.strip())}" - if entity_type in ("DomainName", "Website"): + if entity_type == "DomainName": return f"{entity_type}:{canonicalise_domain(value)}" + if entity_type in ("URL", "Website"): + return f"{entity_type}:{canonicalise_url(value)}" if entity_type == "PhoneNumber": return f"{entity_type}:{canonicalise_phone(value)}" if entity_type == "ASNumber": return f"{entity_type}:{canonicalise_asn(value)}" if entity_type == "Netblock": return f"{entity_type}:{canonicalise_netblock(value)}" + if entity_type == "NSRecord": + return f"{entity_type}:{canonicalise_domain(value)}" + if entity_type == "Organization": + return f"{entity_type}:{canonicalise_organization(value)}" + if entity_type == "MXRecord": + raw = value.strip() + if " " not in raw: + raise ValueError(f"MXRecord must be 'priority exchange', got {value!r}") + priority, exchange = raw.split(None, 1) + return f"{entity_type}:{canonicalize_mx(priority, exchange)}" + if entity_type == "Port": + raw = value.strip() + if "/" not in raw: + raise ValueError(f"Port must be 'number/protocol', got {value!r}") + number, protocol = raw.split("/", 1) + return f"{entity_type}:{canonicalize_port(number, protocol)}" + if entity_type == "WebTitle": + raw = value.strip() + if "::" not in raw: + raise ValueError(f"WebTitle must be 'url::title', got {value!r}") + url, title = raw.split("::", 1) + return f"{entity_type}:{canonicalize_web_title(url, title)}" return f"{entity_type}:{value.strip()}" +def canonicalise_value(entity_type: str, value: str) -> str: + """Return the canonical value portion for an entity type. + + This is the public counterpart to ``_canonical_key`` for callers that + need to pass the same value to ``KnowledgeGraph.add_node()`` and + ``KnowledgeGraph.add_edge()``. Keeping KG values canonical avoids creating + duplicate nodes for equivalent OSINT entities such as ``AS15169`` and + ``15169``. + """ + canonical = _canonical_key(entity_type, value) + return canonical.split(":", 1)[1] + + def add_resolved( kg: KnowledgeGraph, entity_type: str, @@ -151,14 +233,35 @@ def add_resolved( on properties. """ canonical = _canonical_key(entity_type, entity_value) - existing_id = _ALIAS_INDEX.get(canonical) + canonical_value = canonical.split(":", 1)[1] + alias_index, alias_reverse = _alias_maps(kg) + existing_id = alias_index.get(canonical) if existing_id: node = kg.get_node_by_id(existing_id) if node and properties: node["properties"].update(properties) node["updated_at"] = datetime.now().isoformat() + if canonical_value != entity_value: + alias_reverse[entity_value] = canonical return existing_id, False - node_id = kg.add_node(entity_type, entity_value, properties) - _ALIAS_INDEX[canonical] = node_id + node_id = kg.add_node(entity_type, canonical_value, properties) + alias_index[canonical] = node_id + if canonical_value != entity_value: + alias_reverse[entity_value] = canonical return node_id, True + + +__all__ = [ + "add_resolved", + "canonicalise_asn", + "canonicalise_domain", + "canonicalise_ipv4", + "canonicalise_netblock", + "canonicalise_organization", + "canonicalise_phone", + "canonicalise_url", + "canonicalise_value", + "register_alias", + "resolve", +] diff --git a/src/zettelforge/osint/executor.py b/src/zettelforge/osint/executor.py new file mode 100644 index 0000000..67e7d58 --- /dev/null +++ b/src/zettelforge/osint/executor.py @@ -0,0 +1,369 @@ +""" +OSINT collector executor and KG ingestion path (RFC-016 Phase 1.5). + +This module turns registered collector functions into an end-to-end passive +OSINT enrichment API: + +1. Resolve matching collectors from ``TRANSFORM_REGISTRY``. +2. Run them fail-closed. +3. Validate emitted ``CollectorTuple`` rows against the ontology. +4. Canonicalize/dedupe entities via ``entity_resolver``. +5. Persist nodes and edges through ``KnowledgeGraph.add_node`` / ``add_edge``. + +Collectors remain synchronous and directly testable. The executor owns the +cross-cutting concerns that would otherwise be duplicated by every caller. +""" + +from __future__ import annotations + +from collections.abc import Sequence +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any + +from zettelforge.knowledge_graph import KnowledgeGraph, get_knowledge_graph +from zettelforge.log import get_logger +from zettelforge.ontology import OntologyValidator +from zettelforge.osint.entity_resolver import add_resolved, canonicalise_value +from zettelforge.osint.ontology import merge_into_global_ontology +from zettelforge.osint.transform_registry import ( + CollectorTuple, + TransformMetadata, + TransformRegistry, + get_transform_registry, +) + +_logger = get_logger("zettelforge.osint.executor") + +SUPPORTED_SEED_TYPES = ("DomainName", "IPv4Address", "IPv6Address", "ASNumber", "Netblock") + + +@dataclass(frozen=True) +class OSINTExecutionError: + """Non-fatal executor error for a collector or tuple.""" + + collector_name: str + message: str + tuple_index: int | None = None + + +@dataclass(frozen=True) +class PersistedOSINTTuple: + """A validated collector tuple after KG persistence.""" + + collector_name: str + output_entity_type: str + output_value: str + output_node_id: str + edge_id: str + from_entity_type: str + from_value: str + to_entity_type: str + to_value: str + edge_type: str + + +@dataclass +class OSINTCollectionResult: + """Structured return value from ``run_osint_collection``.""" + + input_entity_type: str + input_value: str + canonical_input_value: str + seed_node_id: str | None + collectors_run: list[str] = field(default_factory=list) + tuples_collected: int = 0 + persisted: list[PersistedOSINTTuple] = field(default_factory=list) + errors: list[OSINTExecutionError] = field(default_factory=list) + started_at: str = field(default_factory=lambda: datetime.now().isoformat()) + finished_at: str | None = None + + @property + def persisted_count(self) -> int: + return len(self.persisted) + + @property + def error_count(self) -> int: + return len(self.errors) + + +_ENDPOINT_PROP_KEYS: dict[str, tuple[str, ...]] = { + "ASNumber": ("asn", "number"), + "DomainName": ("domain", "value"), + "IPv4Address": ("ip", "address", "value"), + "IPv6Address": ("ip", "address", "value"), + "MXRecord": ("mx", "value", "exchange"), + "NSRecord": ("ns", "nsdname", "value"), + "Netblock": ("cidr", "netblock", "prefix"), + "Organization": ("organization", "org", "name"), + "Port": ("port", "value"), + "Website": ("url", "website", "value"), +} + + +def run_osint_collection( + input_entity_type: str, + input_value: str, + *, + kg: KnowledgeGraph | None = None, + registry: TransformRegistry | None = None, + validator: OntologyValidator | None = None, + collector_names: Sequence[str] | None = None, + persist: bool = True, +) -> OSINTCollectionResult: + """Run registered OSINT collectors for one seed entity. + + Parameters + ---------- + input_entity_type: + Seed type. Phase 1.5 supports DomainName, IPv4Address, IPv6Address, + ASNumber, and Netblock. + input_value: + Seed value. It is canonicalized before KG writes. + kg: + Optional ``KnowledgeGraph`` instance. Defaults to the global KG. + registry: + Optional collector registry. Defaults to ``TRANSFORM_REGISTRY``. + validator: + Optional ontology validator. + collector_names: + Optional allow-list of collector names to run for this seed. + persist: + When false, collectors and validation run but no KG writes occur. + """ + merge_into_global_ontology() + + if input_entity_type not in SUPPORTED_SEED_TYPES: + raise ValueError( + f"unsupported OSINT seed type {input_entity_type!r}; " + f"expected one of {', '.join(SUPPORTED_SEED_TYPES)}" + ) + + registry = registry or get_transform_registry() + validator = validator or OntologyValidator() + kg = kg or get_knowledge_graph() + allowed_collectors = None if collector_names is None else set(collector_names) + + canonical_input_value = canonicalise_value(input_entity_type, input_value) + seed_props = _entity_properties(input_entity_type, canonical_input_value) + _validate_entity_or_raise(validator, input_entity_type, seed_props) + + seed_node_id: str | None = None + if persist: + seed_node_id, _ = add_resolved(kg, input_entity_type, canonical_input_value, seed_props) + + result = OSINTCollectionResult( + input_entity_type=input_entity_type, + input_value=input_value, + canonical_input_value=canonical_input_value, + seed_node_id=seed_node_id, + ) + + matches = registry.find_by_input(input_entity_type) + if allowed_collectors is not None: + matches = [(meta, fn) for meta, fn in matches if meta.name in allowed_collectors] + + for meta, fn in matches: + result.collectors_run.append(meta.name) + try: + tuples = fn(input_entity_type, canonical_input_value) + except Exception as exc: # fail-closed; one bad collector cannot abort the run + _logger.warning("osint_collector_failed", collector=meta.name, error=str(exc)) + result.errors.append(OSINTExecutionError(meta.name, str(exc))) + continue + + result.tuples_collected += len(tuples) + for index, tup in enumerate(tuples): + try: + _validate_tuple(meta, tup, validator) + if persist: + persisted = _persist_tuple( + kg=kg, + validator=validator, + collector=meta, + tup=tup, + input_entity_type=input_entity_type, + canonical_input_value=canonical_input_value, + ) + result.persisted.append(persisted) + except ValueError as exc: + result.errors.append(OSINTExecutionError(meta.name, str(exc), tuple_index=index)) + + result.finished_at = datetime.now().isoformat() + return result + + +def collect_osint(*args: Any, **kwargs: Any) -> OSINTCollectionResult: + """Compatibility alias for agents that prefer verb-first naming.""" + return run_osint_collection(*args, **kwargs) + + +def _validate_tuple( + collector: TransformMetadata, + tup: CollectorTuple, + validator: OntologyValidator, +) -> None: + output_props = _entity_properties(tup.output_entity_type, tup.output_value, tup.output_props) + _validate_entity_or_raise(validator, tup.output_entity_type, output_props) + + ok, errors = validator.validate_relation( + tup.from_entity_type, tup.edge_type, tup.to_entity_type + ) + if not ok: + raise ValueError( + f"{collector.name} emitted invalid relation " + f"{tup.from_entity_type} -[{tup.edge_type}]-> {tup.to_entity_type}: " + + "; ".join(errors) + ) + + +def _persist_tuple( + *, + kg: KnowledgeGraph, + validator: OntologyValidator, + collector: TransformMetadata, + tup: CollectorTuple, + input_entity_type: str, + canonical_input_value: str, +) -> PersistedOSINTTuple: + output_value = canonicalise_value(tup.output_entity_type, tup.output_value) + output_props = _entity_properties(tup.output_entity_type, output_value, tup.output_props) + + from_value = _derive_endpoint_value(tup, "from", input_entity_type, canonical_input_value) + to_value = _derive_endpoint_value(tup, "to", input_entity_type, canonical_input_value) + + from_props = _endpoint_properties(tup.from_entity_type, from_value, tup, input_entity_type) + to_props = _endpoint_properties(tup.to_entity_type, to_value, tup, input_entity_type) + _validate_entity_or_raise(validator, tup.from_entity_type, from_props) + _validate_entity_or_raise(validator, tup.to_entity_type, to_props) + + output_node_id, _ = add_resolved(kg, tup.output_entity_type, output_value, output_props) + add_resolved(kg, tup.from_entity_type, from_value, from_props) + add_resolved(kg, tup.to_entity_type, to_value, to_props) + + edge_props = dict(tup.edge_props) + edge_props.setdefault("collector", collector.name) + edge_props.setdefault("source", collector.name) + edge_props.setdefault("osint", True) + edge_props.setdefault("edge_type", "osint") + + edge_id = kg.add_edge( + tup.from_entity_type, + from_value, + tup.to_entity_type, + to_value, + tup.edge_type, + edge_props, + ) + + return PersistedOSINTTuple( + collector_name=collector.name, + output_entity_type=tup.output_entity_type, + output_value=output_value, + output_node_id=output_node_id, + edge_id=edge_id, + from_entity_type=tup.from_entity_type, + from_value=from_value, + to_entity_type=tup.to_entity_type, + to_value=to_value, + edge_type=tup.edge_type, + ) + + +def _derive_endpoint_value( + tup: CollectorTuple, + side: str, + input_entity_type: str, + canonical_input_value: str, +) -> str: + if side == "from": + endpoint_type = tup.from_entity_type + if endpoint_type == input_entity_type: + return canonical_input_value + if endpoint_type == tup.output_entity_type: + return canonicalise_value(endpoint_type, tup.output_value) + elif side == "to": + endpoint_type = tup.to_entity_type + if endpoint_type == tup.output_entity_type: + return canonicalise_value(endpoint_type, tup.output_value) + if endpoint_type == input_entity_type: + return canonical_input_value + else: + raise ValueError(f"unknown endpoint side {side!r}") + + for key in _ENDPOINT_PROP_KEYS.get(endpoint_type, ("value",)): + raw = tup.edge_props.get(key) or tup.output_props.get(key) + if raw not in (None, ""): + return canonicalise_value(endpoint_type, str(raw)) + + raise ValueError( + f"cannot derive {side} endpoint value for {endpoint_type} from collector tuple " + f"{tup!r}; add an explicit edge property such as cidr/asn/value" + ) + + +def _endpoint_properties( + entity_type: str, + value: str, + tup: CollectorTuple, + input_entity_type: str, +) -> dict[str, Any]: + if entity_type == tup.output_entity_type: + return _entity_properties(entity_type, value, tup.output_props) + return _entity_properties(entity_type, value) + + +def _entity_properties( + entity_type: str, + value: str, + incoming: dict[str, Any] | None = None, +) -> dict[str, Any]: + props = dict(incoming or {}) + canonical = canonicalise_value(entity_type, value) + + if entity_type in ("DomainName", "IPv4Address", "IPv6Address", "URL"): + props.setdefault("value", canonical) + elif entity_type == "ASNumber": + props.setdefault("number", int(canonical)) + elif entity_type == "Netblock": + props.setdefault("cidr", canonical) + elif entity_type == "Organization": + props.setdefault("name", canonical) + elif entity_type == "NSRecord": + props.setdefault("nsdname", canonical) + elif entity_type == "MXRecord": + if "priority" not in props or "exchange" not in props: + priority, _, exchange = canonical.partition(" ") + if priority and exchange: + props.setdefault("priority", int(priority)) + props.setdefault("exchange", exchange) + elif entity_type == "Port": + if "number" not in props or "protocol" not in props: + number, _, protocol = canonical.partition("/") + if number and protocol: + props.setdefault("number", int(number)) + props.setdefault("protocol", protocol) + elif entity_type == "Website": + props.setdefault("url", canonical) + + return props + + +def _validate_entity_or_raise( + validator: OntologyValidator, + entity_type: str, + properties: dict[str, Any], +) -> None: + ok, errors = validator.validate_entity(entity_type, properties) + if not ok: + raise ValueError(f"invalid {entity_type} properties: " + "; ".join(errors)) + + +__all__ = [ + "SUPPORTED_SEED_TYPES", + "OSINTCollectionResult", + "OSINTExecutionError", + "PersistedOSINTTuple", + "collect_osint", + "run_osint_collection", +] diff --git a/tests/test_osint_entity_resolver.py b/tests/test_osint_entity_resolver.py new file mode 100644 index 0000000..ef67d88 --- /dev/null +++ b/tests/test_osint_entity_resolver.py @@ -0,0 +1,79 @@ +# ruff: noqa: S101 + +from __future__ import annotations + +import pytest + +from zettelforge import osint as _osint # noqa: F401 -- side effects +from zettelforge.knowledge_graph import KnowledgeGraph +from zettelforge.osint import entity_resolver + + +def test_add_resolved_registers_alias_for_existing_node(tmp_path, monkeypatch) -> None: + monkeypatch.setattr(entity_resolver, '_ALIAS_INDEX', {}) + monkeypatch.setattr(entity_resolver, '_ALIAS_REVERSE', {}) + + kg = KnowledgeGraph(data_dir=str(tmp_path)) + node_id, created = entity_resolver.add_resolved( + kg, + 'DomainName', + 'example.com', + {'value': 'example.com'}, + ) + assert created is True + + resolved_id, created_again = entity_resolver.add_resolved( + kg, + 'DomainName', + 'Example.COM.', + {'value': 'example.com', 'source': 'unit-test'}, + ) + assert resolved_id == node_id + assert created_again is False + assert kg._osint_alias_reverse['Example.COM.'] == 'DomainName:example.com' + assert entity_resolver.resolve('DomainName', 'Example.COM.', kg=kg) == node_id + + node = kg.get_node('DomainName', 'example.com') + assert node is not None + assert node['properties']['value'] == 'example.com' + assert node['properties']['source'] == 'unit-test' + + +def test_canonicalise_organization_normalizes_case_and_whitespace() -> None: + assert entity_resolver.canonicalise_value('Organization', ' Example Corp ') == 'example corp' + + +@pytest.mark.parametrize( + ('entity_type', 'raw', 'expected'), + [ + ('URL', 'HTTPS://Example.com/path', 'https://example.com/path'), + ('Website', 'HTTP://Example.com', 'http://example.com/'), + ('NSRecord', 'NS1.Example.com.', 'ns1.example.com'), + ('MXRecord', '10 MX.Example.com.', '10 mx.example.com'), + ('Port', '443/TCp', '443/tcp'), + ('WebTitle', 'HTTPS://Example.com/:: Title ', 'https://example.com/::Title'), + ], +) +def test_canonicalise_value_covers_common_osint_node_shapes( + entity_type: str, + raw: str, + expected: str, +) -> None: + assert entity_resolver.canonicalise_value(entity_type, raw) == expected + + +def test_add_resolved_scopes_aliases_to_each_knowledge_graph(tmp_path, monkeypatch) -> None: + monkeypatch.setattr(entity_resolver, '_ALIAS_INDEX', {}) + monkeypatch.setattr(entity_resolver, '_ALIAS_REVERSE', {}) + + kg_one = KnowledgeGraph(data_dir=str(tmp_path / 'one')) + kg_two = KnowledgeGraph(data_dir=str(tmp_path / 'two')) + + node_one, created_one = entity_resolver.add_resolved(kg_one, 'DomainName', 'example.com') + node_two, created_two = entity_resolver.add_resolved(kg_two, 'DomainName', 'example.com') + + assert created_one is True + assert created_two is True + assert node_one != node_two + assert entity_resolver.resolve('DomainName', 'example.com', kg=kg_one) == node_one + assert entity_resolver.resolve('DomainName', 'example.com', kg=kg_two) == node_two diff --git a/tests/test_osint_executor.py b/tests/test_osint_executor.py new file mode 100644 index 0000000..c606719 --- /dev/null +++ b/tests/test_osint_executor.py @@ -0,0 +1,197 @@ +# ruff: noqa: S101 + +from __future__ import annotations + +from unittest.mock import patch + +from zettelforge import osint as _osint # noqa: F401 -- side effects +from zettelforge.knowledge_graph import KnowledgeGraph +from zettelforge.osint import entity_resolver +from zettelforge.osint.collectors.infrastructure import bgp_collector +from zettelforge.osint.executor import run_osint_collection +from zettelforge.osint.transform_registry import ( + CollectorTuple, + TransformMetadata, + TransformRegistry, +) + + +def _fake_dns_collect(input_entity_type: str, input_value: str) -> list[CollectorTuple]: + assert input_entity_type == 'DomainName' + assert input_value == 'example.com' + return [ + CollectorTuple( + output_entity_type='IPv4Address', + output_value='1.2.3.4', + edge_type='resolves_to', + from_entity_type='DomainName', + to_entity_type='IPv4Address', + output_props={'value': '1.2.3.4'}, + edge_props={'source': 'unit-test'}, + ) + ] + + +def _boom_collect(input_entity_type: str, input_value: str) -> list[CollectorTuple]: + raise RuntimeError('collector boom') + + +def test_run_osint_collection_persists_nodes_and_edges(tmp_path, monkeypatch) -> None: + monkeypatch.setattr(entity_resolver, '_ALIAS_INDEX', {}) + monkeypatch.setattr(entity_resolver, '_ALIAS_REVERSE', {}) + + kg = KnowledgeGraph(data_dir=str(tmp_path)) + registry = TransformRegistry() + registry.register( + TransformMetadata( + name='fake_dns', + description='Fake DNS collector for executor tests.', + input_types=('DomainName',), + output_types=(('IPv4Address', 'resolves_to'),), + ), + _fake_dns_collect, + ) + + result = run_osint_collection('DomainName', 'Example.COM.', kg=kg, registry=registry) + + assert result.collectors_run == ['fake_dns'] + assert result.canonical_input_value == 'example.com' + assert result.error_count == 0 + assert result.persisted_count == 1 + assert result.seed_node_id is not None + assert result.persisted[0].output_value == '1.2.3.4' + + seed = kg.get_node('DomainName', 'example.com') + target = kg.get_node('IPv4Address', '1.2.3.4') + assert seed is not None + assert target is not None + + neighbors = kg.get_neighbors('DomainName', 'example.com', 'resolves_to') + assert [item['node']['entity_value'] for item in neighbors] == ['1.2.3.4'] + + +def test_run_osint_collection_records_nonfatal_collector_errors(tmp_path, monkeypatch) -> None: + monkeypatch.setattr(entity_resolver, '_ALIAS_INDEX', {}) + monkeypatch.setattr(entity_resolver, '_ALIAS_REVERSE', {}) + + kg = KnowledgeGraph(data_dir=str(tmp_path)) + registry = TransformRegistry() + registry.register( + TransformMetadata( + name='boom', + description='Failing collector used to verify fail-closed handling.', + input_types=('DomainName',), + output_types=(('IPv4Address', 'resolves_to'),), + ), + _boom_collect, + ) + registry.register( + TransformMetadata( + name='fake_dns', + description='Fake DNS collector for executor tests.', + input_types=('DomainName',), + output_types=(('IPv4Address', 'resolves_to'),), + ), + _fake_dns_collect, + ) + + result = run_osint_collection('DomainName', 'example.com', kg=kg, registry=registry) + + assert result.collectors_run == ['boom', 'fake_dns'] + assert result.error_count == 1 + assert result.errors[0].collector_name == 'boom' + assert result.persisted_count == 1 + assert result.tuples_collected == 1 + + +def test_run_osint_collection_respects_empty_allowlist(tmp_path, monkeypatch) -> None: + monkeypatch.setattr(entity_resolver, '_ALIAS_INDEX', {}) + monkeypatch.setattr(entity_resolver, '_ALIAS_REVERSE', {}) + + kg = KnowledgeGraph(data_dir=str(tmp_path)) + registry = TransformRegistry() + registry.register( + TransformMetadata( + name='fake_dns', + description='Fake DNS collector for executor tests.', + input_types=('DomainName',), + output_types=(('IPv4Address', 'resolves_to'),), + ), + _fake_dns_collect, + ) + + result = run_osint_collection( + 'DomainName', + 'example.com', + kg=kg, + registry=registry, + collector_names=[], + ) + + assert result.collectors_run == [] + assert result.tuples_collected == 0 + assert result.error_count == 0 + assert result.persisted_count == 0 + assert result.seed_node_id is not None + assert kg.get_neighbors('DomainName', 'example.com', 'resolves_to') == [] + + +def _invalid_endpoint_collect(input_entity_type: str, input_value: str) -> list[CollectorTuple]: + return [ + CollectorTuple( + output_entity_type='Organization', + output_value='Example Corp', + edge_type='owned_by', + from_entity_type='Netblock', + to_entity_type='Organization', + output_props={'name': 'Example Corp'}, + edge_props={'cidr': 'not-a-cidr'}, + ) + ] + + +def test_run_osint_collection_does_not_persist_partial_tuple_on_invalid_endpoint( + tmp_path, + monkeypatch, +) -> None: + monkeypatch.setattr(entity_resolver, '_ALIAS_INDEX', {}) + monkeypatch.setattr(entity_resolver, '_ALIAS_REVERSE', {}) + + kg = KnowledgeGraph(data_dir=str(tmp_path)) + registry = TransformRegistry() + registry.register( + TransformMetadata( + name='invalid_endpoint', + description='Collector that emits an invalid endpoint payload.', + input_types=('ASNumber',), + output_types=(('Organization', 'owned_by'),), + ), + _invalid_endpoint_collect, + ) + + result = run_osint_collection('ASNumber', 'AS15169', kg=kg, registry=registry) + + assert result.collectors_run == ['invalid_endpoint'] + assert result.error_count == 1 + assert result.persisted_count == 0 # seed writes are not counted in the result + assert result.seed_node_id is not None + assert kg.get_node('Organization', 'example corp') is None + assert kg.get_neighbors('ASNumber', '15169', 'owned_by') == [] + + +def test_bgp_collector_emits_netblocks_from_asn() -> None: + payload = { + 'owner': {'name': 'Google LLC'}, + 'prefixes': [ + {'prefix': '8.8.8.0/24'}, + {'prefix': '8.8.4.0/24'}, + {'prefix': 'invalid'}, + ], + } + with patch.object(bgp_collector, '_bgpview_get', return_value=payload): + out = bgp_collector.collect('ASNumber', 'AS15169') + + assert [item.output_value for item in out] == ['8.8.8.0/24', '8.8.4.0/24'] + assert all(item.from_entity_type == 'Netblock' for item in out) + assert all(item.to_entity_type == 'ASNumber' for item in out) + assert all(item.output_props['org'] == 'Google LLC' for item in out) From 41d36ca117472cb3a7535d5ea9cc5ba4f092e94b Mon Sep 17 00:00:00 2001 From: Patrick Roland <48327651+rolandpg@users.noreply.github.com> Date: Mon, 8 Jun 2026 21:00:27 -0500 Subject: [PATCH 02/24] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> Signed-off-by: Patrick Roland <48327651+rolandpg@users.noreply.github.com> --- src/zettelforge/osint/executor.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/zettelforge/osint/executor.py b/src/zettelforge/osint/executor.py index 67e7d58..6d00ba3 100644 --- a/src/zettelforge/osint/executor.py +++ b/src/zettelforge/osint/executor.py @@ -186,6 +186,15 @@ def run_osint_collection( canonical_input_value=canonical_input_value, ) result.persisted.append(persisted) + else: + from_value = _derive_endpoint_value( + tup, "from", input_entity_type, canonical_input_value + ) + to_value = _derive_endpoint_value(tup, "to", input_entity_type, canonical_input_value) + from_props = _endpoint_properties(tup.from_entity_type, from_value, tup, input_entity_type) + to_props = _endpoint_properties(tup.to_entity_type, to_value, tup, input_entity_type) + _validate_entity_or_raise(validator, tup.from_entity_type, from_props) + _validate_entity_or_raise(validator, tup.to_entity_type, to_props) except ValueError as exc: result.errors.append(OSINTExecutionError(meta.name, str(exc), tuple_index=index)) From f7fa81f0a85aeb425d459cb141d90b0d96d135ed Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 9 Jun 2026 02:16:18 +0000 Subject: [PATCH 03/24] Persist existing-node updates in OSINT resolver --- src/zettelforge/osint/entity_resolver.py | 4 +--- tests/test_osint_entity_resolver.py | 5 +++++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/zettelforge/osint/entity_resolver.py b/src/zettelforge/osint/entity_resolver.py index 6c53907..bff3e24 100644 --- a/src/zettelforge/osint/entity_resolver.py +++ b/src/zettelforge/osint/entity_resolver.py @@ -22,7 +22,6 @@ import ipaddress import re -from datetime import datetime from typing import TYPE_CHECKING from zettelforge.osint.ontology import ( @@ -239,8 +238,7 @@ def add_resolved( if existing_id: node = kg.get_node_by_id(existing_id) if node and properties: - node["properties"].update(properties) - node["updated_at"] = datetime.now().isoformat() + kg.add_node(entity_type, canonical_value, properties) if canonical_value != entity_value: alias_reverse[entity_value] = canonical return existing_id, False diff --git a/tests/test_osint_entity_resolver.py b/tests/test_osint_entity_resolver.py index ef67d88..ab5c458 100644 --- a/tests/test_osint_entity_resolver.py +++ b/tests/test_osint_entity_resolver.py @@ -38,6 +38,11 @@ def test_add_resolved_registers_alias_for_existing_node(tmp_path, monkeypatch) - assert node['properties']['value'] == 'example.com' assert node['properties']['source'] == 'unit-test' + reloaded = KnowledgeGraph(data_dir=str(tmp_path)) + reloaded_node = reloaded.get_node('DomainName', 'example.com') + assert reloaded_node is not None + assert reloaded_node['properties']['source'] == 'unit-test' + def test_canonicalise_organization_normalizes_case_and_whitespace() -> None: assert entity_resolver.canonicalise_value('Organization', ' Example Corp ') == 'example corp' From b399716d0cba8bdfb4b9f38c72d35feccdc869e3 Mon Sep 17 00:00:00 2001 From: Patrick Roland Date: Mon, 8 Jun 2026 21:32:26 -0500 Subject: [PATCH 04/24] fix(osint): enforce tuple validation and persist updates --- src/zettelforge/osint/entity_resolver.py | 3 +- src/zettelforge/osint/executor.py | 76 +++++++++++++++++++---- tests/test_osint_executor.py | 77 ++++++++++++++++++++++++ 3 files changed, 143 insertions(+), 13 deletions(-) diff --git a/src/zettelforge/osint/entity_resolver.py b/src/zettelforge/osint/entity_resolver.py index bff3e24..e542345 100644 --- a/src/zettelforge/osint/entity_resolver.py +++ b/src/zettelforge/osint/entity_resolver.py @@ -236,8 +236,7 @@ def add_resolved( alias_index, alias_reverse = _alias_maps(kg) existing_id = alias_index.get(canonical) if existing_id: - node = kg.get_node_by_id(existing_id) - if node and properties: + if properties: kg.add_node(entity_type, canonical_value, properties) if canonical_value != entity_value: alias_reverse[entity_value] = canonical diff --git a/src/zettelforge/osint/executor.py b/src/zettelforge/osint/executor.py index 6d00ba3..841493c 100644 --- a/src/zettelforge/osint/executor.py +++ b/src/zettelforge/osint/executor.py @@ -23,7 +23,7 @@ from zettelforge.knowledge_graph import KnowledgeGraph, get_knowledge_graph from zettelforge.log import get_logger -from zettelforge.ontology import OntologyValidator +from zettelforge.ontology import RELATION_TYPES, OntologyValidator from zettelforge.osint.entity_resolver import add_resolved, canonicalise_value from zettelforge.osint.ontology import merge_into_global_ontology from zettelforge.osint.transform_registry import ( @@ -175,7 +175,13 @@ def run_osint_collection( result.tuples_collected += len(tuples) for index, tup in enumerate(tuples): try: - _validate_tuple(meta, tup, validator) + _validate_tuple( + meta, + tup, + validator, + input_entity_type, + canonical_input_value, + ) if persist: persisted = _persist_tuple( kg=kg, @@ -190,9 +196,15 @@ def run_osint_collection( from_value = _derive_endpoint_value( tup, "from", input_entity_type, canonical_input_value ) - to_value = _derive_endpoint_value(tup, "to", input_entity_type, canonical_input_value) - from_props = _endpoint_properties(tup.from_entity_type, from_value, tup, input_entity_type) - to_props = _endpoint_properties(tup.to_entity_type, to_value, tup, input_entity_type) + to_value = _derive_endpoint_value( + tup, "to", input_entity_type, canonical_input_value + ) + from_props = _endpoint_properties( + tup.from_entity_type, from_value, tup, input_entity_type + ) + to_props = _endpoint_properties( + tup.to_entity_type, to_value, tup, input_entity_type + ) _validate_entity_or_raise(validator, tup.from_entity_type, from_props) _validate_entity_or_raise(validator, tup.to_entity_type, to_props) except ValueError as exc: @@ -211,10 +223,28 @@ def _validate_tuple( collector: TransformMetadata, tup: CollectorTuple, validator: OntologyValidator, + input_entity_type: str, + canonical_input_value: str, ) -> None: output_props = _entity_properties(tup.output_entity_type, tup.output_value, tup.output_props) _validate_entity_or_raise(validator, tup.output_entity_type, output_props) + if tup.edge_type not in RELATION_TYPES and tup.edge_type not in validator.custom_relations: + raise ValueError( + f"{collector.name} emitted unregistered relation " + f"{tup.from_entity_type} -[{tup.edge_type}]-> {tup.to_entity_type}" + ) + + if (tup.output_entity_type, tup.edge_type) not in collector.output_types: + allowed = ( + ", ".join(f"{entity}:{edge}" for entity, edge in collector.output_types) or "" + ) + raise ValueError( + f"{collector.name} emitted undeclared tuple " + f"{tup.output_entity_type} -[{tup.edge_type}]-> {tup.to_entity_type}; " + f"allowed outputs: {allowed}" + ) + ok, errors = validator.validate_relation( tup.from_entity_type, tup.edge_type, tup.to_entity_type ) @@ -225,6 +255,13 @@ def _validate_tuple( + "; ".join(errors) ) + _validate_tuple_endpoints( + tup=tup, + validator=validator, + input_entity_type=input_entity_type, + canonical_input_value=canonical_input_value, + ) + def _persist_tuple( *, @@ -237,14 +274,14 @@ def _persist_tuple( ) -> PersistedOSINTTuple: output_value = canonicalise_value(tup.output_entity_type, tup.output_value) output_props = _entity_properties(tup.output_entity_type, output_value, tup.output_props) - - from_value = _derive_endpoint_value(tup, "from", input_entity_type, canonical_input_value) - to_value = _derive_endpoint_value(tup, "to", input_entity_type, canonical_input_value) - + from_value, to_value = _validate_tuple_endpoints( + tup=tup, + validator=validator, + input_entity_type=input_entity_type, + canonical_input_value=canonical_input_value, + ) from_props = _endpoint_properties(tup.from_entity_type, from_value, tup, input_entity_type) to_props = _endpoint_properties(tup.to_entity_type, to_value, tup, input_entity_type) - _validate_entity_or_raise(validator, tup.from_entity_type, from_props) - _validate_entity_or_raise(validator, tup.to_entity_type, to_props) output_node_id, _ = add_resolved(kg, tup.output_entity_type, output_value, output_props) add_resolved(kg, tup.from_entity_type, from_value, from_props) @@ -279,6 +316,23 @@ def _persist_tuple( ) +def _validate_tuple_endpoints( + *, + tup: CollectorTuple, + validator: OntologyValidator, + input_entity_type: str, + canonical_input_value: str, +) -> tuple[str, str]: + from_value = _derive_endpoint_value(tup, "from", input_entity_type, canonical_input_value) + to_value = _derive_endpoint_value(tup, "to", input_entity_type, canonical_input_value) + + from_props = _endpoint_properties(tup.from_entity_type, from_value, tup, input_entity_type) + to_props = _endpoint_properties(tup.to_entity_type, to_value, tup, input_entity_type) + _validate_entity_or_raise(validator, tup.from_entity_type, from_props) + _validate_entity_or_raise(validator, tup.to_entity_type, to_props) + return from_value, to_value + + def _derive_endpoint_value( tup: CollectorTuple, side: str, diff --git a/tests/test_osint_executor.py b/tests/test_osint_executor.py index c606719..b232b14 100644 --- a/tests/test_osint_executor.py +++ b/tests/test_osint_executor.py @@ -150,6 +150,20 @@ def _invalid_endpoint_collect(input_entity_type: str, input_value: str) -> list[ ] +def _typo_relation_collect(input_entity_type: str, input_value: str) -> list[CollectorTuple]: + return [ + CollectorTuple( + output_entity_type='IPv4Address', + output_value='1.2.3.4', + edge_type='reslove_to', + from_entity_type='DomainName', + to_entity_type='IPv4Address', + output_props={'value': '1.2.3.4'}, + edge_props={}, + ) + ] + + def test_run_osint_collection_does_not_persist_partial_tuple_on_invalid_endpoint( tmp_path, monkeypatch, @@ -179,6 +193,69 @@ def test_run_osint_collection_does_not_persist_partial_tuple_on_invalid_endpoint assert kg.get_neighbors('ASNumber', '15169', 'owned_by') == [] +def test_run_osint_collection_validates_endpoints_in_dry_run( + tmp_path, + monkeypatch, +) -> None: + monkeypatch.setattr(entity_resolver, '_ALIAS_INDEX', {}) + monkeypatch.setattr(entity_resolver, '_ALIAS_REVERSE', {}) + + kg = KnowledgeGraph(data_dir=str(tmp_path)) + registry = TransformRegistry() + registry.register( + TransformMetadata( + name='invalid_endpoint', + description='Collector that emits an invalid endpoint payload.', + input_types=('ASNumber',), + output_types=(('Organization', 'owned_by'),), + ), + _invalid_endpoint_collect, + ) + + result = run_osint_collection( + 'ASNumber', + 'AS15169', + kg=kg, + registry=registry, + persist=False, + ) + + assert result.collectors_run == ['invalid_endpoint'] + assert result.error_count == 1 + assert result.persisted_count == 0 + assert result.seed_node_id is None + assert kg.get_node('Organization', 'example corp') is None + assert kg.get_neighbors('ASNumber', '15169', 'owned_by') == [] + + +def test_run_osint_collection_rejects_unregistered_relation_even_when_metadata_matches( + tmp_path, + monkeypatch, +) -> None: + monkeypatch.setattr(entity_resolver, '_ALIAS_INDEX', {}) + monkeypatch.setattr(entity_resolver, '_ALIAS_REVERSE', {}) + + kg = KnowledgeGraph(data_dir=str(tmp_path)) + registry = TransformRegistry() + registry.register( + TransformMetadata( + name='typo_relation', + description='Collector that emits a typo relation.', + input_types=('DomainName',), + output_types=(('IPv4Address', 'reslove_to'),), + ), + _typo_relation_collect, + ) + + result = run_osint_collection('DomainName', 'example.com', kg=kg, registry=registry) + + assert result.collectors_run == ['typo_relation'] + assert result.error_count == 1 + assert result.persisted_count == 0 + assert result.seed_node_id is not None + assert kg.get_neighbors('DomainName', 'example.com', 'reslove_to') == [] + + def test_bgp_collector_emits_netblocks_from_asn() -> None: payload = { 'owner': {'name': 'Google LLC'}, From a6b7fd0a9bcafbb42c68204d594ae12e40639b29 Mon Sep 17 00:00:00 2001 From: Patrick Roland Date: Tue, 9 Jun 2026 12:59:40 -0500 Subject: [PATCH 05/24] feat(config): add enrichment off-switch for deterministic benchmarks Restores the benchmark isolation the removed disable_enrichment kwarg provided. ZETTELFORGE_ENRICHMENT_ENABLED=false gates causal extraction, LLM NER, and neighbor evolution dispatch. LoCoMo harness repaired (dead kwarg removed) and pinned to deterministic ingestion. Co-Authored-By: Claude Opus 4.8 (1M context) --- benchmarks/locomo_benchmark.py | 5 +- src/zettelforge/config.py | 10 ++++ src/zettelforge/memory_manager.py | 82 ++++++++++++++++--------------- tests/test_enrichment_switch.py | 70 ++++++++++++++++++++++++++ 4 files changed, 127 insertions(+), 40 deletions(-) create mode 100644 tests/test_enrichment_switch.py diff --git a/benchmarks/locomo_benchmark.py b/benchmarks/locomo_benchmark.py index 9719b9f..ae9072f 100644 --- a/benchmarks/locomo_benchmark.py +++ b/benchmarks/locomo_benchmark.py @@ -32,6 +32,10 @@ from typing import List, Dict, Optional, Tuple from datetime import datetime +# Must be set before any zettelforge import resolves the config singleton: +# benchmark ingestion is deterministic, no background LLM enrichment. +os.environ.setdefault("ZETTELFORGE_ENRICHMENT_ENABLED", "false") + from zettelforge import MemoryManager @@ -443,7 +447,6 @@ def run_benchmark( mm = MemoryManager( jsonl_path=f"{tmpdir}/notes.jsonl", lance_path=f"{tmpdir}/vectordb", - disable_enrichment=True, ) # Ingest diff --git a/src/zettelforge/config.py b/src/zettelforge/config.py index 6182be2..8c39366 100644 --- a/src/zettelforge/config.py +++ b/src/zettelforge/config.py @@ -176,6 +176,11 @@ class LLMNerConfig: enabled: bool = True # Always-on LLM NER via background enrichment queue +@dataclass +class EnrichmentConfig: + enabled: bool = True # Master switch for background enrichment dispatch + + @dataclass class ExtractionConfig: max_facts: int = 5 @@ -337,6 +342,7 @@ class ZettelForgeConfig: embedding: EmbeddingConfig = field(default_factory=EmbeddingConfig) llm: LLMConfig = field(default_factory=LLMConfig) llm_ner: LLMNerConfig = field(default_factory=LLMNerConfig) + enrichment: EnrichmentConfig = field(default_factory=EnrichmentConfig) extraction: ExtractionConfig = field(default_factory=ExtractionConfig) retrieval: RetrievalConfig = field(default_factory=RetrievalConfig) synthesis: SynthesisConfig = field(default_factory=SynthesisConfig) @@ -611,6 +617,10 @@ def _apply_env(cfg: ZettelForgeConfig): if v := os.environ.get("ZETTELFORGE_LLM_NER_ENABLED"): cfg.llm_ner.enabled = v.lower() in ("true", "1", "yes") + # Background enrichment master switch (benchmarks, offline ingestion) + if v := os.environ.get("ZETTELFORGE_ENRICHMENT_ENABLED"): + cfg.enrichment.enabled = v.lower() in ("true", "1", "yes") + # RFC-013: PII detection via Presidio if v := os.environ.get("ZETTELFORGE_PII_ENABLED"): cfg.governance.pii.enabled = v.lower() in ("true", "1", "yes") diff --git a/src/zettelforge/memory_manager.py b/src/zettelforge/memory_manager.py index 5630b23..9a663b9 100644 --- a/src/zettelforge/memory_manager.py +++ b/src/zettelforge/memory_manager.py @@ -401,56 +401,60 @@ def _remember_inner( # overhead only. In sync=True mode the LLM work runs inline and is intentionally # EXCLUDED from this bucket — mixing LLM latency into "dispatch" would corrupt # the Phase 0.5 attribution. sync=True is retained for tests/debug. + # The whole block is gated by config.enrichment.enabled + # (ZETTELFORGE_ENRICHMENT_ENABLED) so benchmarks and offline ingestion + # get deterministic writes with no LLM dispatch. dispatch_start = time.perf_counter() if not sync else None - job = _EnrichmentJob( - note_id=note.id, - domain=domain, - content_len=len(content), - resolved_entities=resolved_entities, - ) - if sync: - self._run_enrichment(job) - else: - try: - self._enrichment_queue.put_nowait(job) - self._pending_enrichment.add(note.id) - except queue.Full: - self._logger.warning("enrichment_queue_full", note_id=note.id) - - # Phase 6c: LLM NER enrichment (always-on, background — RFC-001 amendment) - if get_config().llm_ner.enabled: - ner_job = _EnrichmentJob( + if get_config().enrichment.enabled: + job = _EnrichmentJob( note_id=note.id, domain=domain, content_len=len(content), resolved_entities=resolved_entities, - job_type="llm_ner", ) if sync: - self._run_llm_ner(ner_job) + self._run_enrichment(job) else: try: - self._enrichment_queue.put_nowait(ner_job) - except queue.Full: - self._logger.warning("llm_ner_queue_full", note_id=note.id) - - # Phase 6d: Neighbor evolution (A-Mem inspired — background worker) - # Skip if fewer than 3 notes exist — not enough neighbors to evolve against - if self.store.count_notes() >= 3: - evolution_job = _EnrichmentJob( - note_id=note.id, - domain=domain, - content_len=len(content), - job_type="neighbor_evolution", - ) - if sync: - self._run_evolution(evolution_job) - else: - try: - self._enrichment_queue.put_nowait(evolution_job) + self._enrichment_queue.put_nowait(job) self._pending_enrichment.add(note.id) except queue.Full: - self._logger.warning("evolution_queue_full", note_id=note.id) + self._logger.warning("enrichment_queue_full", note_id=note.id) + + # Phase 6c: LLM NER enrichment (always-on, background — RFC-001 amendment) + if get_config().llm_ner.enabled: + ner_job = _EnrichmentJob( + note_id=note.id, + domain=domain, + content_len=len(content), + resolved_entities=resolved_entities, + job_type="llm_ner", + ) + if sync: + self._run_llm_ner(ner_job) + else: + try: + self._enrichment_queue.put_nowait(ner_job) + except queue.Full: + self._logger.warning("llm_ner_queue_full", note_id=note.id) + + # Phase 6d: Neighbor evolution (A-Mem inspired — background worker) + # Skip if fewer than 3 notes exist — not enough neighbors to evolve against + if self.store.count_notes() >= 3: + evolution_job = _EnrichmentJob( + note_id=note.id, + domain=domain, + content_len=len(content), + job_type="neighbor_evolution", + ) + if sync: + self._run_evolution(evolution_job) + else: + try: + self._enrichment_queue.put_nowait(evolution_job) + self._pending_enrichment.add(note.id) + except queue.Full: + self._logger.warning("evolution_queue_full", note_id=note.id) if dispatch_start is not None: phase_timings_ms["enrichment_dispatch"] = (time.perf_counter() - dispatch_start) * 1000 diff --git a/tests/test_enrichment_switch.py b/tests/test_enrichment_switch.py new file mode 100644 index 0000000..5df87fe --- /dev/null +++ b/tests/test_enrichment_switch.py @@ -0,0 +1,70 @@ +"""Enrichment off-switch: ZETTELFORGE_ENRICHMENT_ENABLED gates all background jobs. + +Benchmarks and offline ingestion need deterministic writes with no LLM +enrichment dispatch (causal extraction, LLM NER, neighbor evolution). +""" + +import pytest + +from zettelforge.config import get_config, reload_config + + +@pytest.fixture(autouse=True) +def _restore_config(): + yield + reload_config() + + +def test_enrichment_config_default_enabled(monkeypatch): + monkeypatch.delenv('ZETTELFORGE_ENRICHMENT_ENABLED', raising=False) + cfg = reload_config() + assert cfg.enrichment.enabled is True + + +def test_enrichment_env_override(monkeypatch): + monkeypatch.setenv('ZETTELFORGE_ENRICHMENT_ENABLED', 'false') + cfg = reload_config() + assert cfg.enrichment.enabled is False + + +def test_remember_dispatches_nothing_when_disabled(tmp_path, monkeypatch): + monkeypatch.setenv('ZETTELFORGE_ENRICHMENT_ENABLED', 'false') + reload_config() + from zettelforge.memory_manager import MemoryManager + + mm = MemoryManager( + jsonl_path=str(tmp_path / 'notes.jsonl'), + lance_path=str(tmp_path / 'vec'), + ) + for i in range(4): + mm.remember( + f'APT28 used DROPBEAR in campaign {i}.', + source_type='threat_report', + source_ref=f'r{i}', + domain='cti', + ) + assert mm._enrichment_queue.qsize() == 0 + assert len(mm._pending_enrichment) == 0 + + +def test_remember_dispatches_jobs_when_enabled(tmp_path, monkeypatch): + monkeypatch.delenv('ZETTELFORGE_ENRICHMENT_ENABLED', raising=False) + reload_config() + assert get_config().enrichment.enabled is True + from zettelforge.memory_manager import MemoryManager + + mm = MemoryManager( + jsonl_path=str(tmp_path / 'notes.jsonl'), + lance_path=str(tmp_path / 'vec'), + ) + # Count dispatches without letting the background worker consume them + # (avoids racing the worker and avoids real LLM calls). + dispatched = [] + monkeypatch.setattr(mm._enrichment_queue, 'put_nowait', dispatched.append) + mm.remember( + 'APT28 used DROPBEAR in a campaign.', + source_type='threat_report', + source_ref='r0', + domain='cti', + ) + assert len(dispatched) > 0 From 9318096f436e6ad8f522ab132d01f1d07689d82b Mon Sep 17 00:00:00 2001 From: Patrick Roland Date: Tue, 9 Jun 2026 13:01:21 -0500 Subject: [PATCH 06/24] feat(memory): restore remember_chunked for chunked ingestion Sentence-boundary greedy packing to chunk_size with ordinal source_ref provenance. Unblocks the CTI benchmark chunked_800 strategy and the MemPalace-granularity LoCoMo experiment. CTI harness pinned to deterministic (enrichment-off) ingestion. Co-Authored-By: Claude Opus 4.8 (1M context) --- benchmarks/cti_retrieval_benchmark.py | 2 + src/zettelforge/memory_manager.py | 64 ++++++++++++++++++++++++ tests/test_remember_chunked.py | 72 +++++++++++++++++++++++++++ 3 files changed, 138 insertions(+) create mode 100644 tests/test_remember_chunked.py diff --git a/benchmarks/cti_retrieval_benchmark.py b/benchmarks/cti_retrieval_benchmark.py index 9336839..33027ee 100644 --- a/benchmarks/cti_retrieval_benchmark.py +++ b/benchmarks/cti_retrieval_benchmark.py @@ -22,6 +22,8 @@ from typing import List, Dict, Tuple os.environ["ZETTELFORGE_BACKEND"] = "jsonl" +# Deterministic ingestion: no background LLM enrichment during benchmarks. +os.environ.setdefault("ZETTELFORGE_ENRICHMENT_ENABLED", "false") from zettelforge import MemoryManager diff --git a/src/zettelforge/memory_manager.py b/src/zettelforge/memory_manager.py index 9a663b9..be51aeb 100644 --- a/src/zettelforge/memory_manager.py +++ b/src/zettelforge/memory_manager.py @@ -11,6 +11,7 @@ import atexit import concurrent.futures import queue +import re import threading import time import uuid @@ -471,6 +472,69 @@ def _remember_inner( ) return note, "created" + def remember_chunked( + self, + content: str, + source_type: str = "conversation", + source_ref: str = "", + domain: str = "general", + chunk_size: int = 800, + sync: bool = False, + ) -> list[MemoryNote]: + """ + Split long content on sentence boundaries and store each chunk as its + own note via remember(). + + Smaller chunks give retrieval finer granularity on long conversational + sessions (MemPalace-style 800-char chunks). Content at or under + chunk_size is stored as a single note. Chunked notes carry an ordinal + source_ref suffix ("{source_ref}#c{i}") so provenance survives the + split. + + Args: + content: Raw text to store. + source_type: Origin type (conversation, threat_report, etc.). + source_ref: Source identifier; chunks get "#c{i}" appended. + domain: Memory domain (cti, general, etc.). + chunk_size: Greedy sentence-packing target in characters. A single + sentence longer than chunk_size becomes its own chunk. + sync: Passed through to remember(). + + Returns: list of stored MemoryNote, in document order. + """ + text = content.strip() + if len(text) <= chunk_size: + note, _ = self.remember( + text, source_type=source_type, source_ref=source_ref, domain=domain, sync=sync + ) + return [note] + + sentences = re.split(r"(?<=[.!?])\s+", text) + chunks: list[str] = [] + current: list[str] = [] + current_len = 0 + for sentence in sentences: + if current and current_len + len(sentence) + 1 > chunk_size: + chunks.append(" ".join(current)) + current = [] + current_len = 0 + current.append(sentence) + current_len += len(sentence) + 1 + if current: + chunks.append(" ".join(current)) + + notes: list[MemoryNote] = [] + for i, chunk in enumerate(chunks): + note, _ = self.remember( + chunk, + source_type=source_type, + source_ref=f"{source_ref}#c{i}", + domain=domain, + sync=sync, + ) + notes.append(note) + return notes + def remember_with_extraction( self, content: str, diff --git a/tests/test_remember_chunked.py b/tests/test_remember_chunked.py new file mode 100644 index 0000000..a950c32 --- /dev/null +++ b/tests/test_remember_chunked.py @@ -0,0 +1,72 @@ +"""remember_chunked splits long content on sentence boundaries into bounded chunks. + +Restores the chunked-ingestion API the CTI benchmark exercises and the +MemPalace comparison identified as the conversational-granularity lever. +""" + +import pytest + +from zettelforge.config import reload_config + + +@pytest.fixture(autouse=True) +def _no_enrichment(monkeypatch): + monkeypatch.setenv('ZETTELFORGE_ENRICHMENT_ENABLED', 'false') + reload_config() + yield + reload_config() + + +def _manager(tmp_path): + from zettelforge.memory_manager import MemoryManager + + return MemoryManager( + jsonl_path=str(tmp_path / 'notes.jsonl'), + lance_path=str(tmp_path / 'vec'), + ) + + +def test_remember_chunked_splits_and_stores(tmp_path): + mm = _manager(tmp_path) + content = ' '.join(f'Sentence number {i} about APT28 operations.' for i in range(60)) + notes = mm.remember_chunked( + content, + source_type='threat_report', + source_ref='r1', + domain='cti', + chunk_size=800, + ) + assert len(notes) >= 2 + assert all(len(n.content.raw) <= 900 for n in notes) + assert mm.store.count_notes() == len(notes) + # Chunks carry an ordinal source_ref so provenance survives the split + refs = [n.content.source_ref for n in notes] + assert refs == [f'r1#c{i}' for i in range(len(notes))] + + +def test_remember_chunked_short_content_single_note(tmp_path): + mm = _manager(tmp_path) + notes = mm.remember_chunked( + 'Short note.', + source_type='threat_report', + source_ref='r1', + domain='cti', + chunk_size=800, + ) + assert len(notes) == 1 + assert notes[0].content.source_ref == 'r1' + + +def test_remember_chunked_never_drops_text(tmp_path): + mm = _manager(tmp_path) + content = ' '.join(f'Fact {i} is recorded here.' for i in range(120)) + notes = mm.remember_chunked( + content, + source_type='conversation', + source_ref='s1', + domain='general', + chunk_size=400, + ) + rebuilt = ' '.join(n.content.raw for n in notes) + for i in range(120): + assert f'Fact {i} is recorded here.' in rebuilt From 9fceb3dc7d917592b4a346adee3181413fe853d3 Mon Sep 17 00:00:00 2001 From: Patrick Roland Date: Tue, 9 Jun 2026 13:13:17 -0500 Subject: [PATCH 07/24] fix(recall): graph stage reads the per-store KG, not the global JSONL KG _update_knowledge_graph writes MENTIONED_IN edges to the manager's storage backend, but _recall_inner traversed the process-global JSONL KG (~109MB on this host). Isolated stores saw up to ~2000 phantom note IDs per entity query (each a wasted SQLite lookup) and never saw their own graph, so the graph signal was dead in any custom-data-dir deployment. Adds StorageBackend.get_kg_edges_from and a StoreGraphSource adapter; GraphRetriever now accepts any GraphSource. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/zettelforge/graph_retriever.py | 35 ++++++++++- src/zettelforge/memory_manager.py | 12 ++-- src/zettelforge/sqlite_backend.py | 23 +++++++ src/zettelforge/storage_backend.py | 11 ++++ tests/test_graph_scoping.py | 96 ++++++++++++++++++++++++++++++ 5 files changed, 170 insertions(+), 7 deletions(-) create mode 100644 tests/test_graph_scoping.py diff --git a/src/zettelforge/graph_retriever.py b/src/zettelforge/graph_retriever.py index 9c01a77..9f2fbaa 100644 --- a/src/zettelforge/graph_retriever.py +++ b/src/zettelforge/graph_retriever.py @@ -7,8 +7,39 @@ """ from dataclasses import dataclass, field +from typing import Protocol -from zettelforge.knowledge_graph import KnowledgeGraph + +class GraphSource(Protocol): + """Read interface the traversal needs from any graph store.""" + + def get_node(self, entity_type: str, entity_value: str) -> dict | None: ... + + def get_node_by_id(self, node_id: str) -> dict | None: ... + + def get_outgoing_edges(self, node_id: str) -> list[dict]: ... + + +class StoreGraphSource: + """Adapts a StorageBackend's scoped KG tables to the traversal interface. + + Recall's graph stage must read the same per-store graph that + _update_knowledge_graph writes. The process-global JSONL KG mixes every + store on the machine and grows without bound, so traversing it from an + isolated store yields phantom note IDs and unbounded BFS cost. + """ + + def __init__(self, store) -> None: + self._store = store + + def get_node(self, entity_type: str, entity_value: str) -> dict | None: + return self._store.get_kg_node(entity_type, entity_value) + + def get_node_by_id(self, node_id: str) -> dict | None: + return self._store.get_kg_node_by_id(node_id) + + def get_outgoing_edges(self, node_id: str) -> list[dict]: + return self._store.get_kg_edges_from(node_id) @dataclass @@ -24,7 +55,7 @@ class ScoredResult: class GraphRetriever: """Retrieve notes by traversing the knowledge graph from query entities.""" - def __init__(self, knowledge_graph: KnowledgeGraph): + def __init__(self, knowledge_graph: GraphSource): self.kg = knowledge_graph def retrieve_note_ids( diff --git a/src/zettelforge/memory_manager.py b/src/zettelforge/memory_manager.py index be51aeb..3bf7c1d 100644 --- a/src/zettelforge/memory_manager.py +++ b/src/zettelforge/memory_manager.py @@ -799,13 +799,15 @@ def _recall_inner( rest.append((note, score)) vector_scored = boosted + rest - # Blended retrieval: combine vector similarity with graph traversal + # Blended retrieval: combine vector similarity with graph traversal. + # The graph stage reads the per-store KG (same graph the write path + # populates), not the process-global JSONL KG: the global graph mixes + # every store on the machine, so traversing it from an isolated store + # returns phantom note IDs and unbounded BFS cost. from zettelforge.blended_retriever import BlendedRetriever - from zettelforge.graph_retriever import GraphRetriever - from zettelforge.knowledge_graph import get_knowledge_graph + from zettelforge.graph_retriever import GraphRetriever, StoreGraphSource - kg = get_knowledge_graph() - graph_retriever = GraphRetriever(kg) + graph_retriever = GraphRetriever(StoreGraphSource(self.store)) _graph_start = time.perf_counter() graph_results = graph_retriever.retrieve_note_ids(query_entities=resolved, max_depth=2) _graph_latency_ms = (time.perf_counter() - _graph_start) * 1000 diff --git a/src/zettelforge/sqlite_backend.py b/src/zettelforge/sqlite_backend.py index fdb3b7b..9b0cf43 100644 --- a/src/zettelforge/sqlite_backend.py +++ b/src/zettelforge/sqlite_backend.py @@ -591,6 +591,29 @@ def add_kg_edge( self._conn.commit() return edge_id + def get_kg_edges_from(self, node_id: str) -> list[dict]: + """Outgoing KG edges from a node (scoped graph traversal read path).""" + with self._write_lock: + self._check_open() + cur = self._conn.execute( + "SELECT edge_id, from_node_id, to_node_id, relationship, edge_type, " + "note_id, properties FROM kg_edges WHERE from_node_id = ?", + (node_id,), + ) + rows = cur.fetchall() + return [ + { + "edge_id": row["edge_id"], + "from_node_id": row["from_node_id"], + "to_node_id": row["to_node_id"], + "relationship": row["relationship"], + "edge_type": row["edge_type"], + "note_id": row["note_id"], + "properties": json.loads(row["properties"] or "{}"), + } + for row in rows + ] + def get_kg_neighbors( self, entity_type: str, diff --git a/src/zettelforge/storage_backend.py b/src/zettelforge/storage_backend.py index 6543c43..4e2027c 100644 --- a/src/zettelforge/storage_backend.py +++ b/src/zettelforge/storage_backend.py @@ -173,6 +173,17 @@ def get_kg_node_by_id(self, node_id: str) -> dict | None: """ ... + def get_kg_edges_from(self, node_id: str) -> list[dict]: + """Outgoing KG edges from a node, by internal node_id. + + Read path for scoped graph traversal (GraphRetriever via + StoreGraphSource). Concrete backends must override; the default + fails loud rather than silently returning an empty graph. + """ + raise NotImplementedError( + f"{self.__class__.__name__} does not implement get_kg_edges_from()" + ) + def add_temporal_edge( self, from_type: str, diff --git a/tests/test_graph_scoping.py b/tests/test_graph_scoping.py new file mode 100644 index 0000000..ef543ce --- /dev/null +++ b/tests/test_graph_scoping.py @@ -0,0 +1,96 @@ +"""Recall's graph stage must read the per-store KG, not the process-global one. + +_update_knowledge_graph writes MENTIONED_IN edges to the manager's storage +backend (scoped SQLite). Before this fix, _recall_inner traversed the +process-global JSONL KG instead: isolated stores saw thousands of phantom +note nodes from other stores (latency) and never saw their own graph +(dead graph signal). +""" + +import pytest + +from zettelforge.config import reload_config + + +@pytest.fixture(autouse=True) +def _no_enrichment(monkeypatch): + monkeypatch.setenv('ZETTELFORGE_ENRICHMENT_ENABLED', 'false') + reload_config() + yield + reload_config() + + +def _manager(tmp_path, sub): + from zettelforge.memory_manager import MemoryManager + + d = tmp_path / sub + d.mkdir() + return MemoryManager(jsonl_path=str(d / 'notes.jsonl'), lance_path=str(d / 'vec')) + + +def test_backend_get_kg_edges_from(tmp_path): + mm = _manager(tmp_path, 'a') + mm.store.add_kg_edge('actor', 'APT28', 'note', 'n1', 'MENTIONED_IN') + node = mm.store.get_kg_node('actor', 'APT28') + assert node is not None + edges = mm.store.get_kg_edges_from(node['node_id']) + assert len(edges) == 1 + target = mm.store.get_kg_node_by_id(edges[0]['to_node_id']) + assert target['entity_type'] == 'note' + assert target['entity_value'] == 'n1' + + +def test_graph_retriever_sees_own_store_writes(tmp_path): + from zettelforge.graph_retriever import GraphRetriever, StoreGraphSource + + mm = _manager(tmp_path, 'a') + note, _ = mm.remember( + 'APT28 used the DROPBEAR backdoor to target NATO members.', + source_type='threat_report', + source_ref='r1', + domain='cti', + ) + # Mirror _recall_inner's entity resolution for the query + query_entities = mm.indexer.extractor.extract_all('What does APT28 use?') + resolved = { + etype: [mm.resolver.resolve(etype, e) for e in elist] + for etype, elist in query_entities.items() + } + assert any(resolved.values()), 'extractor should find APT28 in the query' + + retriever = GraphRetriever(StoreGraphSource(mm.store)) + results = retriever.retrieve_note_ids(query_entities=resolved, max_depth=2) + assert any(r.note_id == note.id for r in results) + + +def test_recall_graph_isolated_between_stores(tmp_path): + mm_a = _manager(tmp_path, 'a') + mm_a.remember( + 'APT28 used the DROPBEAR backdoor to target NATO members.', + source_type='threat_report', + source_ref='r1', + domain='cti', + ) + + mm_b = _manager(tmp_path, 'b') + mm_b.remember( + 'The weather in Toronto stayed mild through October.', + source_type='conversation', + source_ref='s1', + domain='general', + ) + + lookups = {'n': 0} + orig = mm_b.store.get_note_by_id + + def counting(nid): + lookups['n'] += 1 + return orig(nid) + + mm_b.store.get_note_by_id = counting + results = mm_b.recall('What does APT28 use?', k=10, exclude_superseded=False) + + # Store B has one note; the graph stage must not import thousands of + # phantom candidates from store A or the global KG. + assert lookups['n'] <= 10 + assert all('APT28' not in n.content.raw for n in results) From 810f26b8ce1abb58895e95f311b0e7264bc0e0c7 Mon Sep 17 00:00:00 2001 From: Patrick Roland Date: Tue, 9 Jun 2026 13:17:04 -0500 Subject: [PATCH 08/24] perf(recall): bound cross-encoder rerank cost with config policy rerank_enabled / rerank_max_candidates / rerank_doc_chars on RetrievalConfig plus ZETTELFORGE_RERANK_ENABLED kill switch. Only the head of the blended ranking is reranked; the tail keeps blended order. Defaults preserve prior behavior pending benchmark-tuned values. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/zettelforge/config.py | 10 +++ src/zettelforge/memory_manager.py | 19 ++++-- tests/test_rerank_policy.py | 103 ++++++++++++++++++++++++++++++ 3 files changed, 125 insertions(+), 7 deletions(-) create mode 100644 tests/test_rerank_policy.py diff --git a/src/zettelforge/config.py b/src/zettelforge/config.py index 8c39366..0153d3a 100644 --- a/src/zettelforge/config.py +++ b/src/zettelforge/config.py @@ -193,6 +193,12 @@ class RetrievalConfig: similarity_threshold: float = 0.25 entity_boost: float = 2.5 max_graph_depth: int = 2 + # Cross-encoder rerank policy: the reranker is the dominant read-path + # cost (ONNX on CPU), so its work is bounded. Defaults preserve prior + # behavior; benchmark-tuned values are set in config.default.yaml. + rerank_enabled: bool = True + rerank_max_candidates: int = 50 + rerank_doc_chars: int = 512 @dataclass @@ -621,6 +627,10 @@ def _apply_env(cfg: ZettelForgeConfig): if v := os.environ.get("ZETTELFORGE_ENRICHMENT_ENABLED"): cfg.enrichment.enabled = v.lower() in ("true", "1", "yes") + # Cross-encoder rerank kill switch + if v := os.environ.get("ZETTELFORGE_RERANK_ENABLED"): + cfg.retrieval.rerank_enabled = v.lower() in ("true", "1", "yes") + # RFC-013: PII detection via Presidio if v := os.environ.get("ZETTELFORGE_PII_ENABLED"): cfg.governance.pii.enabled = v.lower() in ("true", "1", "yes") diff --git a/src/zettelforge/memory_manager.py b/src/zettelforge/memory_manager.py index 3bf7c1d..b412364 100644 --- a/src/zettelforge/memory_manager.py +++ b/src/zettelforge/memory_manager.py @@ -870,23 +870,28 @@ def _recall_inner( results.append(en) result_ids.add(en.id) - # ── Enterprise: Cross-encoder reranking ───────────────────────────── - if len(results) > 1: + # ── Cross-encoder reranking (policy-bounded) ──────────────────────── + # The reranker is the dominant read-path cost. Only the head of the + # blended ranking is reranked; the tail keeps its blended order. + retrieval_cfg = get_config().retrieval + if retrieval_cfg.rerank_enabled and len(results) > 1: try: - reranker = _get_reranker() # Returns None in Community + reranker = _get_reranker() if reranker is not None: - docs = [n.content.raw[:512] for n in results] + head = results[: retrieval_cfg.rerank_max_candidates] + tail = results[len(head) :] + docs = [n.content.raw[: retrieval_cfg.rerank_doc_chars] for n in head] scores = list(reranker.rerank(query, docs)) - # B905: strict=True — scores and results have identical + # B905: strict=True — scores and docs have identical # length by construction (one score per doc), so a length # mismatch would be a programming error, not a silent # truncation bug. paired = sorted( - zip(scores, results, strict=True), + zip(scores, head, strict=True), key=lambda x: x[0], reverse=True, ) - results = [note for _, note in paired] + results = [note for _, note in paired] + tail except Exception: self._logger.warning("reranking_failed_using_original_order", exc_info=True) diff --git a/tests/test_rerank_policy.py b/tests/test_rerank_policy.py new file mode 100644 index 0000000..a413132 --- /dev/null +++ b/tests/test_rerank_policy.py @@ -0,0 +1,103 @@ +"""Cross-encoder rerank policy: bounded candidates, bounded doc length, kill switch. + +The reranker is the dominant read-path cost (ONNX cross-encoder on CPU). +These knobs bound its work without changing the blended order of the +unreranked tail. +""" + +import pytest + +from zettelforge.config import get_config, reload_config + + +@pytest.fixture(autouse=True) +def _no_enrichment(monkeypatch): + monkeypatch.setenv('ZETTELFORGE_ENRICHMENT_ENABLED', 'false') + reload_config() + yield + reload_config() + + +class _FakeReranker: + def __init__(self): + self.calls = [] + + def rerank(self, query, docs): + self.calls.append((query, list(docs))) + # Reverse order: last doc gets the highest score + return [float(i) for i in range(len(docs))] + + +def _corpus_manager(tmp_path): + from zettelforge.memory_manager import MemoryManager + + mm = MemoryManager( + jsonl_path=str(tmp_path / 'notes.jsonl'), + lance_path=str(tmp_path / 'vec'), + ) + for i in range(8): + mm.remember( + f'Report {i}: threat actor activity involving infrastructure item {i}. ' * 8, + source_type='threat_report', + source_ref=f'r{i}', + domain='cti', + ) + return mm + + +def test_rerank_receives_bounded_candidates_and_doc_chars(tmp_path, monkeypatch): + import zettelforge.memory_manager as mmod + + fake = _FakeReranker() + monkeypatch.setattr(mmod, '_get_reranker', lambda: fake) + cfg = get_config() + monkeypatch.setattr(cfg.retrieval, 'rerank_max_candidates', 3) + monkeypatch.setattr(cfg.retrieval, 'rerank_doc_chars', 100) + + mm = _corpus_manager(tmp_path) + mm.recall('threat actor infrastructure', k=8, exclude_superseded=False) + + assert fake.calls, 'reranker should have been invoked' + _, docs = fake.calls[-1] + assert len(docs) <= 3 + assert all(len(d) <= 100 for d in docs) + + +def test_rerank_disabled_skips_reranker(tmp_path, monkeypatch): + import zettelforge.memory_manager as mmod + + fake = _FakeReranker() + monkeypatch.setattr(mmod, '_get_reranker', lambda: fake) + monkeypatch.setattr(get_config().retrieval, 'rerank_enabled', False) + + mm = _corpus_manager(tmp_path) + results = mm.recall('threat actor infrastructure', k=8, exclude_superseded=False) + + assert fake.calls == [] + assert results, 'recall still returns blended results' + + +def test_rerank_tail_preserves_blended_order(tmp_path, monkeypatch): + import zettelforge.memory_manager as mmod + + fake = _FakeReranker() + monkeypatch.setattr(mmod, '_get_reranker', lambda: fake) + cfg = get_config() + monkeypatch.setattr(cfg.retrieval, 'rerank_max_candidates', 2) + + mm = _corpus_manager(tmp_path) + results = mm.recall('threat actor infrastructure', k=8, exclude_superseded=False) + + assert len(results) >= 3 + # Head (first 2) was reranked: fake scores reverse their relative order. + # Tail (3rd onward) must match the no-rerank ordering for the same query. + monkeypatch.setattr(cfg.retrieval, 'rerank_enabled', False) + unreranked = mm.recall('threat actor infrastructure', k=8, exclude_superseded=False) + assert [n.id for n in results[:2]] == [n.id for n in reversed(unreranked[:2])] + assert [n.id for n in results[2:]] == [n.id for n in unreranked[2:]] + + +def test_env_kill_switch(monkeypatch): + monkeypatch.setenv('ZETTELFORGE_RERANK_ENABLED', 'false') + cfg = reload_config() + assert cfg.retrieval.rerank_enabled is False From 9ff6cf2c934bf09012431b75ad0f6b8c192c096b Mon Sep 17 00:00:00 2001 From: Patrick Roland Date: Tue, 9 Jun 2026 13:17:04 -0500 Subject: [PATCH 09/24] perf(embedding): LRU+TTL cache for repeated embedding requests SmartCache (config.cache sizing) keyed by (model, sha256(text)) in front of embedding compute. First integration of the previously dormant cache.py module. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/zettelforge/vector_memory.py | 44 +++++++++++++++++++- tests/test_embedding_cache.py | 71 ++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 tests/test_embedding_cache.py diff --git a/src/zettelforge/vector_memory.py b/src/zettelforge/vector_memory.py index 489a94c..5e92a8c 100644 --- a/src/zettelforge/vector_memory.py +++ b/src/zettelforge/vector_memory.py @@ -97,9 +97,51 @@ def preload_embedding_model() -> None: # ── Embedding ──────────────────────────────────────────────────────────────── +# (model, text)-keyed LRU+TTL cache in front of embedding compute. Agents +# re-ask the same queries; recomputing the vector each time is pure waste. +_embedding_cache = None +_embedding_cache_lock = threading.Lock() + + +def _get_embedding_cache(): + global _embedding_cache + if _embedding_cache is None: + with _embedding_cache_lock: + if _embedding_cache is None: + from zettelforge.cache import SmartCache + from zettelforge.config import get_config + + cache_cfg = get_config().cache + _embedding_cache = SmartCache( + maxsize=cache_cfg.max_entries, + ttl_seconds=cache_cfg.ttl_seconds, + ) + return _embedding_cache + + +def reset_embedding_cache_for_tests() -> None: + """Drop the embedding cache so tests see a cold state.""" + global _embedding_cache + with _embedding_cache_lock: + _embedding_cache = None + def get_embedding(text: str, model: str | None = None) -> list[float]: - """Generate embedding. Uses fastembed (in-process) by default, ollama/HTTP as fallback.""" + """Generate embedding, cached by (model, text). Compute is delegated to + _compute_embedding (fastembed in-process by default, ollama/HTTP fallback).""" + cache = _get_embedding_cache() + key_model = model or get_embedding_model() + key = f"{key_model}:{hashlib.sha256(text.encode()).hexdigest()}" + cached = cache.get(key) + if cached is not None: + return cached + embedding = _compute_embedding(text, model) + cache.set(key, embedding) + return embedding + + +def _compute_embedding(text: str, model: str | None = None) -> list[float]: + """Compute an embedding. Uses fastembed (in-process) by default, ollama/HTTP as fallback.""" provider = get_embedding_provider() if provider == "fastembed": diff --git a/tests/test_embedding_cache.py b/tests/test_embedding_cache.py new file mode 100644 index 0000000..5306bd9 --- /dev/null +++ b/tests/test_embedding_cache.py @@ -0,0 +1,71 @@ +"""Query-embedding LRU cache: repeated texts hit the model once. + +Embedding is the second-largest read-path cost; agents re-ask the same +queries, so a (model, text)-keyed cache pays for itself immediately. +""" + +import pytest + +from zettelforge.config import reload_config + + +@pytest.fixture(autouse=True) +def _fresh_cache(): + import zettelforge.vector_memory as vm + + vm.reset_embedding_cache_for_tests() + yield + vm.reset_embedding_cache_for_tests() + reload_config() + + +def test_repeated_text_computes_once(monkeypatch): + import zettelforge.vector_memory as vm + + calls = {'n': 0} + orig = vm._compute_embedding + + def counting(text, model=None): + calls['n'] += 1 + return orig(text, model) + + monkeypatch.setattr(vm, '_compute_embedding', counting) + + e1 = vm.get_embedding('What tools does APT28 use?') + e2 = vm.get_embedding('What tools does APT28 use?') + assert calls['n'] == 1 + assert e1 == e2 + + +def test_distinct_texts_compute_separately(monkeypatch): + import zettelforge.vector_memory as vm + + calls = {'n': 0} + orig = vm._compute_embedding + + def counting(text, model=None): + calls['n'] += 1 + return orig(text, model) + + monkeypatch.setattr(vm, '_compute_embedding', counting) + + vm.get_embedding('first query') + vm.get_embedding('second query') + assert calls['n'] == 2 + + +def test_cache_keyed_by_model(monkeypatch): + import zettelforge.vector_memory as vm + + calls = {'n': 0} + orig = vm._compute_embedding + + def counting(text, model=None): + calls['n'] += 1 + return orig(text, model) + + monkeypatch.setattr(vm, '_compute_embedding', counting) + + vm.get_embedding('same text', model='model-a') + vm.get_embedding('same text', model='model-b') + assert calls['n'] == 2 From 6bae64ca5f73cb327dbca92ecf5354a97e1bab4f Mon Sep 17 00:00:00 2001 From: Patrick Roland Date: Tue, 9 Jun 2026 13:28:22 -0500 Subject: [PATCH 10/24] perf(defense): vectorize MemSAD gate, cache n-gram counters, bound reference fetch The gate was 93% of remember() latency at 50 references (1.1s/ingest on LoCoMo): leave-one-out calibration ran O(n^2) pure-Python 768-dim cosines and rebuilt every reference's n-gram Counter n times per ingest, and the call site fetched the entire domain per write. - numpy pairwise cosine + one-shot leave-one-out JSD over a shared vocabulary (Counter subtraction from the pooled total is exact) - content-hash keyed n-gram counter cache - get_recent_notes_by_domain bounded SQL fetch (4x overfetch window) - pure-Python originals retained as degenerate-shape fallbacks Characterization tests pin score/threshold/flag equivalence to 1e-9 against the verbatim original math. Warm-path calibration: 75ms -> 1.6ms on synthetic 50x700-word references; full evaluate ~3.4ms. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/zettelforge/memory_defense.py | 132 +++++++++++++- src/zettelforge/memory_manager.py | 8 +- src/zettelforge/sqlite_backend.py | 10 + src/zettelforge/storage_backend.py | 10 + tests/test_defense_reference_window.py | 60 ++++++ tests/test_memory_defense_equivalence.py | 223 +++++++++++++++++++++++ 6 files changed, 439 insertions(+), 4 deletions(-) create mode 100644 tests/test_defense_reference_window.py create mode 100644 tests/test_memory_defense_equivalence.py diff --git a/src/zettelforge/memory_defense.py b/src/zettelforge/memory_defense.py index 1cd6529..d3f39f8 100644 --- a/src/zettelforge/memory_defense.py +++ b/src/zettelforge/memory_defense.py @@ -19,6 +19,8 @@ from pathlib import Path from typing import Any +import numpy as np + from zettelforge.config import MemoryDefenseConfig, get_config from zettelforge.log import get_logger from zettelforge.memory_store import get_default_data_dir @@ -26,6 +28,28 @@ _logger = get_logger("zettelforge.memory_defense") _VALID_MODES = {"audit", "block", "quarantine"} +# Content-hash keyed n-gram counter cache. The gate recounts the same +# reference texts on every ingest; counts are pure functions of +# (text, ngram_size), so caching is exact. +_NGRAM_CACHE_MAX = 8192 +_ngram_cache: dict[tuple[str, int], Counter[str]] = {} + + +def reset_defense_caches_for_tests() -> None: + """Drop module caches so tests see a cold state.""" + _ngram_cache.clear() + + +def _cached_ngram_counts(text: str, ngram_size: int) -> Counter[str]: + key = (hashlib.sha1(text.encode(), usedforsecurity=False).hexdigest(), int(ngram_size)) + cached = _ngram_cache.get(key) + if cached is None: + if len(_ngram_cache) >= _NGRAM_CACHE_MAX: + _ngram_cache.clear() + cached = _ngram_counts(text, ngram_size) + _ngram_cache[key] = cached + return cached + class MemoryAnomalyError(RuntimeError): """Raised when memory defense blocks or quarantines a write.""" @@ -281,6 +305,42 @@ def _select_reference_notes(candidate: Any, notes: list[Any], limit: int) -> lis def _calibration_scores(notes: list[Any], cfg: MemoryDefenseConfig) -> list[float]: + """Leave-one-out anomaly scores over the reference set. + + Vectorized with numpy: the previous pure-Python loop (O(n^2) cosines + plus n full n-gram recounts of every other note) dominated remember() + latency (~1.1s at 50 references). Falls back to the original loop when + vectors are missing or dimensions are mixed. + """ + if len(notes) < 2: + return _calibration_scores_py(notes, cfg) + + vectors = [_note_vector(note) for note in notes] + dims = {len(v) for v in vectors} + if len(dims) != 1 or 0 in dims: + return _calibration_scores_py(notes, cfg) + + matrix = np.asarray(vectors, dtype=np.float64) + norms = np.linalg.norm(matrix, axis=1) + safe_norms = np.where(norms == 0.0, 1.0, norms) + normalized = matrix / safe_norms[:, None] + normalized[norms == 0.0] = 0.0 + sims = normalized @ normalized.T + np.fill_diagonal(sims, -np.inf) + max_sim = sims.max(axis=1) + np.fill_diagonal(sims, 0.0) + mean_sim = sims.sum(axis=1) / (len(notes) - 1) + memsad = 0.5 * max_sim + 0.5 * mean_sim + + jsd = _leave_one_out_jsd([_note_text(note) for note in notes], cfg.ngram_size) + blended = memsad + cfg.lexical_weight * np.asarray(jsd, dtype=np.float64) + # Plain Python floats at the boundary: numpy scalars would leak into + # decision fields (np.bool_ flags, unserializable np.float64). + return [float(score) for score in blended] + + +def _calibration_scores_py(notes: list[Any], cfg: MemoryDefenseConfig) -> list[float]: + """Original pure-Python scoring; retained as the degenerate-shape fallback.""" scores: list[float] = [] for i, note in enumerate(notes): refs = notes[:i] + notes[i + 1 :] @@ -294,7 +354,73 @@ def _calibration_scores(notes: list[Any], cfg: MemoryDefenseConfig) -> list[floa return scores +def _leave_one_out_jsd(texts: list[str], ngram_size: int) -> list[float]: + """JSD(note_i, all-others) for every i, over one shared vocabulary. + + Counter subtraction from the pooled total is exact (integer counts), so + each row reproduces _jensen_shannon(c_i, sum-of-others) bit-for-bit up + to float summation order. + """ + counters = [_cached_ngram_counts(text, ngram_size) for text in texts] + total: Counter[str] = Counter() + for counter in counters: + total.update(counter) + vocab = {key: idx for idx, key in enumerate(total)} + n, v = len(counters), len(vocab) + + counts = np.zeros((n, max(v, 1)), dtype=np.float64) + for i, counter in enumerate(counters): + for key, value in counter.items(): + counts[i, vocab[key]] = float(value) + row_tot = counts.sum(axis=1) + total_vec = counts.sum(axis=0) + rest = total_vec[None, :] - counts + rest_tot = row_tot.sum() - row_tot + + results = np.zeros(n, dtype=np.float64) + both_empty = (row_tot == 0.0) & (rest_tot == 0.0) + one_empty = ((row_tot == 0.0) | (rest_tot == 0.0)) & ~both_empty + results[one_empty] = 1.0 + + regular = ~(both_empty | one_empty) + if regular.any(): + p = counts[regular] / row_tot[regular, None] + q = rest[regular] / rest_tot[regular, None] + m = 0.5 * (p + q) + with np.errstate(divide="ignore", invalid="ignore"): + p_term = np.where(p > 0.0, p * np.log2(np.where(p > 0.0, p / m, 1.0)), 0.0) + q_term = np.where(q > 0.0, q * np.log2(np.where(q > 0.0, q / m, 1.0)), 0.0) + results[regular] = np.clip(0.5 * (p_term.sum(axis=1) + q_term.sum(axis=1)), 0.0, 1.0) + return list(results) + + def _memsad_score(candidate_vector: list[float], refs: list[Any]) -> tuple[float, float, float]: + if not refs: + return 0.0, 0.0, 0.0 + ref_vectors = [_note_vector(ref) for ref in refs] + dim = len(candidate_vector) + if dim == 0 or any(len(v) != dim for v in ref_vectors): + return _memsad_score_py(candidate_vector, refs) + + matrix = np.asarray(ref_vectors, dtype=np.float64) + candidate = np.asarray(candidate_vector, dtype=np.float64) + cand_norm = float(np.linalg.norm(candidate)) + if cand_norm == 0.0: + similarities = np.zeros(len(refs), dtype=np.float64) + else: + norms = np.linalg.norm(matrix, axis=1) + safe_norms = np.where(norms == 0.0, 1.0, norms) + similarities = (matrix @ candidate) / (safe_norms * cand_norm) + similarities[norms == 0.0] = 0.0 + max_similarity = float(similarities.max()) + mean_similarity = float(similarities.mean()) + return 0.5 * max_similarity + 0.5 * mean_similarity, max_similarity, mean_similarity + + +def _memsad_score_py( + candidate_vector: list[float], refs: list[Any] +) -> tuple[float, float, float]: + """Original pure-Python scoring; retained as the degenerate-shape fallback.""" similarities = [_cosine(candidate_vector, _note_vector(ref)) for ref in refs] if not similarities: return 0.0, 0.0, 0.0 @@ -322,10 +448,10 @@ def _stddev(values: list[float], mean: float) -> float: def _lexical_jsd(text: str, reference_texts: list[str], ngram_size: int) -> float: - candidate = _ngram_counts(text, ngram_size) - reference = Counter() + candidate = _cached_ngram_counts(text, ngram_size) + reference: Counter[str] = Counter() for ref_text in reference_texts: - reference.update(_ngram_counts(ref_text, ngram_size)) + reference.update(_cached_ngram_counts(ref_text, ngram_size)) return _jensen_shannon(candidate, reference) diff --git a/src/zettelforge/memory_manager.py b/src/zettelforge/memory_manager.py index b412364..1e6c1a7 100644 --- a/src/zettelforge/memory_manager.py +++ b/src/zettelforge/memory_manager.py @@ -294,7 +294,13 @@ def _remember_inner( _p = time.perf_counter() try: - reference_notes = self.store.get_notes_by_domain(domain) + # Bounded reference window: the gate keeps the most recent + # max_reference_notes valid-vector notes, so fetching the whole + # domain (O(n) rows + Pydantic parses per ingest) is waste. 4x + # overfetch leaves margin for notes the gate filters out. + _defense_cfg = get_config().governance.memory_defense + _fetch_limit = max(200, 4 * _defense_cfg.max_reference_notes) + reference_notes = self.store.get_recent_notes_by_domain(domain, _fetch_limit) self.memory_defense.enforce( note, reference_notes, diff --git a/src/zettelforge/sqlite_backend.py b/src/zettelforge/sqlite_backend.py index 9b0cf43..a08afa2 100644 --- a/src/zettelforge/sqlite_backend.py +++ b/src/zettelforge/sqlite_backend.py @@ -424,6 +424,16 @@ def get_recent_notes(self, limit: int = 10) -> list[MemoryNote]: rows = cur.fetchall() return [_row_to_note(r) for r in rows] + def get_recent_notes_by_domain(self, domain: str, limit: int) -> list[MemoryNote]: + with self._write_lock: + self._check_open() + cur = self._conn.execute( + "SELECT * FROM notes WHERE domain = ? ORDER BY created_at DESC LIMIT ?", + (domain, limit), + ) + rows = cur.fetchall() + return [_row_to_note(r) for r in rows] + def count_notes(self) -> int: with self._write_lock: self._check_open() diff --git a/src/zettelforge/storage_backend.py b/src/zettelforge/storage_backend.py index 4e2027c..faed865 100644 --- a/src/zettelforge/storage_backend.py +++ b/src/zettelforge/storage_backend.py @@ -173,6 +173,16 @@ def get_kg_node_by_id(self, node_id: str) -> dict | None: """ ... + def get_recent_notes_by_domain(self, domain: str, limit: int) -> list[MemoryNote]: + """Most recent notes in a domain (newest first), capped at limit. + + Concrete default delegates to get_notes_by_domain; backends with + indexed storage should override with a bounded query. + """ + notes = self.get_notes_by_domain(domain) + notes.sort(key=lambda n: getattr(n, "created_at", "") or "", reverse=True) + return notes[: max(0, int(limit))] + def get_kg_edges_from(self, node_id: str) -> list[dict]: """Outgoing KG edges from a node, by internal node_id. diff --git a/tests/test_defense_reference_window.py b/tests/test_defense_reference_window.py new file mode 100644 index 0000000..443c583 --- /dev/null +++ b/tests/test_defense_reference_window.py @@ -0,0 +1,60 @@ +"""Bounded reference fetch for the memory defense gate. + +The gate only keeps the most recent max_reference_notes; fetching the +whole domain per ingest was O(n) rows + Pydantic parses. +""" + +import pytest + +from zettelforge.config import reload_config + + +@pytest.fixture(autouse=True) +def _no_enrichment(monkeypatch): + monkeypatch.setenv('ZETTELFORGE_ENRICHMENT_ENABLED', 'false') + reload_config() + yield + reload_config() + + +def test_recent_notes_by_domain_orders_and_limits(tmp_path): + from zettelforge.memory_manager import MemoryManager + + mm = MemoryManager( + jsonl_path=str(tmp_path / 'n.jsonl'), lance_path=str(tmp_path / 'v') + ) + for i in range(5): + mm.remember(f'cti note {i}', source_type='threat_report', + source_ref=f'c{i}', domain='cti') + for i in range(3): + mm.remember(f'general note {i}', source_type='conversation', + source_ref=f'g{i}', domain='general') + + recent = mm.store.get_recent_notes_by_domain('cti', 3) + assert len(recent) == 3 + assert all(n.metadata.domain == 'cti' for n in recent) + timestamps = [n.created_at for n in recent] + assert timestamps == sorted(timestamps, reverse=True) + + +def test_defense_gate_receives_bounded_reference_set(tmp_path, monkeypatch): + from zettelforge.memory_manager import MemoryManager + + mm = MemoryManager( + jsonl_path=str(tmp_path / 'n.jsonl'), lance_path=str(tmp_path / 'v') + ) + seen = {'sizes': []} + orig = mm.memory_defense.enforce + + def recording(note, reference_notes, **kwargs): + seen['sizes'].append(len(reference_notes)) + return orig(note, reference_notes, **kwargs) + + monkeypatch.setattr(mm.memory_defense, 'enforce', recording) + for i in range(6): + mm.remember(f'note {i}', source_type='conversation', + source_ref=f's{i}', domain='general') + + # Window is max(200, 4 * max_reference_notes); with 6 notes the gate + # sees at most the existing store, never more than the window. + assert seen['sizes'] == [min(i, 200) for i in range(6)] diff --git a/tests/test_memory_defense_equivalence.py b/tests/test_memory_defense_equivalence.py new file mode 100644 index 0000000..21bee72 --- /dev/null +++ b/tests/test_memory_defense_equivalence.py @@ -0,0 +1,223 @@ +"""MemSAD vectorization must be numerically equivalent to the original math. + +The gate ran leave-one-out calibration in pure Python (~1.1s per ingest at +50 references; 93% of remember() latency). The numpy rewrite must produce +identical scores, thresholds, and flag decisions. Oracle functions below are +verbatim copies of the pre-vectorization implementation. +""" + +import math +import random +from collections import Counter +from types import SimpleNamespace + +import pytest + +from zettelforge.config import get_config, reload_config + + +# ── Oracle: verbatim pre-vectorization implementation ────────────────────── + +def _oracle_cosine(a, b): + if not a or not b or len(a) != len(b): + return 0.0 + dot = sum(x * y for x, y in zip(a, b, strict=True)) + norm_a = math.sqrt(sum(x * x for x in a)) + norm_b = math.sqrt(sum(y * y for y in b)) + if norm_a == 0.0 or norm_b == 0.0: + return 0.0 + return dot / (norm_a * norm_b) + + +def _oracle_ngram_counts(text, ngram_size): + normalized = " ".join(text.lower().split()) + if not normalized: + return Counter() + n = max(1, int(ngram_size)) + if len(normalized) <= n: + return Counter([normalized]) + return Counter(normalized[i : i + n] for i in range(0, len(normalized) - n + 1)) + + +def _oracle_jensen_shannon(left, right): + if not left and not right: + return 0.0 + if not left or not right: + return 1.0 + left_total = sum(left.values()) + right_total = sum(right.values()) + keys = set(left) | set(right) + divergence = 0.0 + for key in keys: + p = left[key] / left_total + q = right[key] / right_total + m = 0.5 * (p + q) + if p: + divergence += 0.5 * p * math.log2(p / m) + if q: + divergence += 0.5 * q * math.log2(q / m) + return min(1.0, max(0.0, divergence)) + + +def _oracle_lexical_jsd(text, reference_texts, ngram_size): + candidate = _oracle_ngram_counts(text, ngram_size) + reference = Counter() + for ref_text in reference_texts: + reference.update(_oracle_ngram_counts(ref_text, ngram_size)) + return _oracle_jensen_shannon(candidate, reference) + + +def _oracle_memsad_score(candidate_vector, refs): + similarities = [_oracle_cosine(candidate_vector, r) for r in refs] + if not similarities: + return 0.0, 0.0, 0.0 + max_similarity = max(similarities) + mean_similarity = sum(similarities) / len(similarities) + return 0.5 * max_similarity + 0.5 * mean_similarity, max_similarity, mean_similarity + + +def _oracle_calibration_scores(vectors, texts, cfg_lexical_weight, cfg_ngram_size): + scores = [] + for i in range(len(vectors)): + ref_vecs = vectors[:i] + vectors[i + 1 :] + if not ref_vecs: + continue + memsad, _, _ = _oracle_memsad_score(vectors[i], ref_vecs) + jsd = _oracle_lexical_jsd(texts[i], texts[:i] + texts[i + 1 :], cfg_ngram_size) + scores.append(memsad + cfg_lexical_weight * jsd) + return scores + + +# ── Fixtures ──────────────────────────────────────────────────────────────── + +def _make_note(i, dim=64, text=None, seed=None): + rng = random.Random(seed if seed is not None else i) + vec = [rng.uniform(-1, 1) for _ in range(dim)] + body = text if text is not None else ( + f"Session {i}: " + " ".join(f"token{(i * 7 + j) % 23}" for j in range(120)) + ) + return SimpleNamespace( + id=f"n{i}", + content=SimpleNamespace(raw=body), + embedding=SimpleNamespace(vector=vec), + created_at=f"2026-06-09T{10 + i // 60:02d}:{i % 60:02d}:00", + ) + + +@pytest.fixture(autouse=True) +def _fresh(monkeypatch): + import zettelforge.memory_defense as md + + monkeypatch.setenv('ZETTELFORGE_ENRICHMENT_ENABLED', 'false') + reload_config() + md.reset_defense_caches_for_tests() + yield + md.reset_defense_caches_for_tests() + reload_config() + + +def test_memsad_score_matches_oracle(): + import zettelforge.memory_defense as md + + notes = [_make_note(i) for i in range(12)] + candidate = _make_note(99, seed=4242) + got = md._memsad_score(candidate.embedding.vector, notes) + want = _oracle_memsad_score( + candidate.embedding.vector, [n.embedding.vector for n in notes] + ) + assert got == pytest.approx(want, abs=1e-9) + + +def test_calibration_scores_match_oracle(): + import zettelforge.memory_defense as md + + cfg = get_config().governance.memory_defense + notes = [_make_note(i) for i in range(12)] + got = md._calibration_scores(notes, cfg) + want = _oracle_calibration_scores( + [n.embedding.vector for n in notes], + [n.content.raw for n in notes], + cfg.lexical_weight, + cfg.ngram_size, + ) + assert got == pytest.approx(want, abs=1e-9) + + +def test_calibration_handles_empty_text_note(): + import zettelforge.memory_defense as md + + cfg = get_config().governance.memory_defense + notes = [_make_note(i) for i in range(6)] + notes[2] = _make_note(2, text=" ") + got = md._calibration_scores(notes, cfg) + want = _oracle_calibration_scores( + [n.embedding.vector for n in notes], + [n.content.raw for n in notes], + cfg.lexical_weight, + cfg.ngram_size, + ) + assert got == pytest.approx(want, abs=1e-9) + + +def test_lexical_jsd_matches_oracle(): + import zettelforge.memory_defense as md + + cfg = get_config().governance.memory_defense + notes = [_make_note(i) for i in range(8)] + candidate_text = "A brand new memory about painting classes in Toronto." + got = md._lexical_jsd(candidate_text, [n.content.raw for n in notes], cfg.ngram_size) + want = _oracle_lexical_jsd(candidate_text, [n.content.raw for n in notes], cfg.ngram_size) + assert got == pytest.approx(want, abs=1e-9) + + +def test_evaluate_decision_matches_oracle_fields(): + import zettelforge.memory_defense as md + + notes = [_make_note(i) for i in range(60)] + candidate = _make_note(99, seed=31337) + gate = md.MemoryAnomalyGate() + decision = gate.evaluate(candidate, notes, domain="cti") + assert decision.score is not None, f"early-out decision: {decision.reason}" + + cfg = get_config().governance.memory_defense + refs = md._select_reference_notes(candidate, notes, cfg.max_reference_notes) + want_cal = _oracle_calibration_scores( + [n.embedding.vector for n in refs], + [n.content.raw for n in refs], + cfg.lexical_weight, + cfg.ngram_size, + ) + want_memsad, want_max, want_mean = _oracle_memsad_score( + candidate.embedding.vector, [n.embedding.vector for n in refs] + ) + want_jsd = _oracle_lexical_jsd( + candidate.content.raw, [n.content.raw for n in refs], cfg.ngram_size + ) + want_score = want_memsad + cfg.lexical_weight * want_jsd + want_mean_cal = sum(want_cal) / len(want_cal) + want_std = math.sqrt( + sum((v - want_mean_cal) ** 2 for v in want_cal) / (len(want_cal) - 1) + ) + want_threshold = want_mean_cal + float(cfg.kappa) * want_std + + assert decision.score == pytest.approx(want_score, abs=1e-9) + assert decision.threshold == pytest.approx(want_threshold, abs=1e-9) + assert decision.memsad_score == pytest.approx(want_memsad, abs=1e-9) + assert decision.lexical_jsd == pytest.approx(want_jsd, abs=1e-9) + assert decision.max_similarity == pytest.approx(want_max, abs=1e-9) + assert decision.flagged == (want_score > want_threshold) + + +def test_counter_cache_invalidates_on_content_change(): + import zettelforge.memory_defense as md + + cfg = get_config().governance.memory_defense + note = _make_note(1, text="original text about hiking") + first = md._lexical_jsd("query text", [note.content.raw], cfg.ngram_size) + note.content.raw = "completely different content about databases" + second = md._lexical_jsd("query text", [note.content.raw], cfg.ngram_size) + want = _oracle_lexical_jsd( + "query text", ["completely different content about databases"], cfg.ngram_size + ) + assert second == pytest.approx(want, abs=1e-9) + assert first != pytest.approx(second, abs=1e-9) From 74054dd4e381443e6b25ad3e6dd24d764da85c70 Mon Sep 17 00:00:00 2001 From: Patrick Roland Date: Tue, 9 Jun 2026 13:31:51 -0500 Subject: [PATCH 11/24] perf(recall): adopt benchmark-tuned rerank defaults (256 chars, 8 candidates) CTI grid 2026-06-09: accuracy holds at 75% from 512c-50n down to 128c-8n; p50 drops 91ms to 51ms at 256c-8n in-grid. 256c-8n picked over 128c-8n for rerank-context headroom. Co-Authored-By: Claude Opus 4.8 (1M context) --- config.default.yaml | 6 ++++++ src/zettelforge/config.py | 9 +++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/config.default.yaml b/config.default.yaml index acf8ffb..9acd79c 100644 --- a/config.default.yaml +++ b/config.default.yaml @@ -298,6 +298,12 @@ retrieval: similarity_threshold: 0.25 entity_boost: 2.5 max_graph_depth: 2 + # Cross-encoder rerank policy (tuned 2026-06-09, CTI suite): accuracy + # holds while bounding the dominant read-path cost. Env kill switch: + # ZETTELFORGE_RERANK_ENABLED=false + rerank_enabled: true + rerank_max_candidates: 8 + rerank_doc_chars: 256 # ── Synthesis ─────────────────────────────────────────────────────────────── diff --git a/src/zettelforge/config.py b/src/zettelforge/config.py index 0153d3a..7f26724 100644 --- a/src/zettelforge/config.py +++ b/src/zettelforge/config.py @@ -194,11 +194,12 @@ class RetrievalConfig: entity_boost: float = 2.5 max_graph_depth: int = 2 # Cross-encoder rerank policy: the reranker is the dominant read-path - # cost (ONNX on CPU), so its work is bounded. Defaults preserve prior - # behavior; benchmark-tuned values are set in config.default.yaml. + # cost (ONNX on CPU), so its work is bounded. Tuned on the CTI suite + # (2026-06-09 grid): accuracy holds at 75% from 512c-50n down to + # 128c-8n while p50 drops 91ms -> 42ms; 256c-8n picked for headroom. rerank_enabled: bool = True - rerank_max_candidates: int = 50 - rerank_doc_chars: int = 512 + rerank_max_candidates: int = 8 + rerank_doc_chars: int = 256 @dataclass From b2f4d61d0696c250eaa7c4ff2e5e7214b8598ae7 Mon Sep 17 00:00:00 2001 From: Patrick Roland Date: Tue, 9 Jun 2026 13:36:42 -0500 Subject: [PATCH 12/24] perf(onnx): pin intra-op threads for small-batch inference, add rerank_model knob 20-core oversubscription thrashed small batches: rerank 8x256c pairs 23.7ms -> 11.5ms and single-query embedding 5.9ms -> 4.5ms at 8 threads (GB10 measurements). rerank_model makes the cross-encoder swappable; model grid kept ms-marco-MiniLM-L-6-v2 (jina tiny/turbo no better). Co-Authored-By: Claude Opus 4.8 (1M context) --- src/zettelforge/config.py | 9 +++++++++ src/zettelforge/memory_manager.py | 21 +++++++++++++-------- src/zettelforge/vector_memory.py | 5 ++++- 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/src/zettelforge/config.py b/src/zettelforge/config.py index 7f26724..ca096a5 100644 --- a/src/zettelforge/config.py +++ b/src/zettelforge/config.py @@ -100,6 +100,10 @@ class EmbeddingConfig: url: str = "http://127.0.0.1:11434" # only used when provider=ollama model: str = "nomic-ai/nomic-embed-text-v1.5-Q" dimensions: int = 768 + # ONNX intra-op threads for single-query embedding. Oversubscription on + # many-core hosts hurts small-batch latency (measured 5.9ms -> 4.5ms at + # 8 threads on a 20-core GB10). 0 = onnxruntime default. + threads: int = 8 @dataclass @@ -200,6 +204,11 @@ class RetrievalConfig: rerank_enabled: bool = True rerank_max_candidates: int = 8 rerank_doc_chars: int = 256 + rerank_model: str = "Xenova/ms-marco-MiniLM-L-6-v2" + # ONNX intra-op threads for the cross-encoder. Small rerank batches + # thrash when onnxruntime grabs every core (measured 23.7ms -> 11.5ms + # at 8 threads on a 20-core GB10). 0 = onnxruntime default. + rerank_threads: int = 8 @dataclass diff --git a/src/zettelforge/memory_manager.py b/src/zettelforge/memory_manager.py index 1e6c1a7..2264aa4 100644 --- a/src/zettelforge/memory_manager.py +++ b/src/zettelforge/memory_manager.py @@ -49,21 +49,26 @@ from zettelforge.vector_memory import preload_embedding_model from zettelforge.vector_retriever import VectorRetriever -# ── Reranker singleton ─────────────────────────────────────────────────────── -_reranker = None +# ── Reranker singletons (one per configured model) ────────────────────────── +_rerankers: dict[str, object] = {} _reranker_lock = threading.Lock() def _get_reranker(): - """Get or create cross-encoder reranker (singleton, ~80MB, loads once).""" - global _reranker - if _reranker is None: + """Get or create the configured cross-encoder reranker (loads once per model).""" + retrieval_cfg = get_config().retrieval + model = retrieval_cfg.rerank_model + reranker = _rerankers.get(model) + if reranker is None: with _reranker_lock: - if _reranker is None: + reranker = _rerankers.get(model) + if reranker is None: from fastembed.rerank.cross_encoder import TextCrossEncoder - _reranker = TextCrossEncoder("Xenova/ms-marco-MiniLM-L-6-v2") - return _reranker + threads = retrieval_cfg.rerank_threads or None + reranker = TextCrossEncoder(model, threads=threads) + _rerankers[model] = reranker + return reranker @dataclass diff --git a/src/zettelforge/vector_memory.py b/src/zettelforge/vector_memory.py index 5e92a8c..2a76f4b 100644 --- a/src/zettelforge/vector_memory.py +++ b/src/zettelforge/vector_memory.py @@ -72,7 +72,10 @@ def _get_embed_model(): if _embed_model is None: from fastembed import TextEmbedding - _embed_model = TextEmbedding(get_embedding_model()) + from zettelforge.config import get_config + + threads = get_config().embedding.threads or None + _embed_model = TextEmbedding(get_embedding_model(), threads=threads) return _embed_model From b3d2c5eee019e01236272ce477fd11c240ef8c24 Mon Sep 17 00:00:00 2001 From: Patrick Roland Date: Tue, 9 Jun 2026 13:41:12 -0500 Subject: [PATCH 13/24] feat(entities): free-text person extraction for conversational recall Person names were only extracted from 'Name:' dialogue lines, so conversational queries produced no entities and graph traversal never fired on them (RFC-001 gap). Single capitalized tokens in running text now qualify, filtered by sentence position, proper-noun-phrase adjacency, and an expanded stopword list (demonyms, vendors, celebrations). CTI suite unchanged at 75%. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/zettelforge/entity_indexer.py | 99 +++++++++++++++++++++++++++ tests/test_conversational_entities.py | 55 +++++++++++++++ 2 files changed, 154 insertions(+) diff --git a/src/zettelforge/entity_indexer.py b/src/zettelforge/entity_indexer.py index 9b84a69..c748670 100644 --- a/src/zettelforge/entity_indexer.py +++ b/src/zettelforge/entity_indexer.py @@ -128,6 +128,13 @@ class EntityExtractor: # Regex for conversational person names from dialogue format "Name: text" _PERSON_PATTERN = re.compile(r"(?:^|\n)\s*([A-Z][a-z]{2,15}):", re.MULTILINE) + # Candidate person names anywhere in free text. Filtered hard below: + # sentence-initial tokens and multi-word proper noun phrases are skipped, + # so only single capitalized tokens in running text survive ("with + # Caroline", "What did Melanie paint?"). This is what lets query-side + # entity extraction fire graph traversal on conversational questions. + _FREETEXT_PERSON_PATTERN = re.compile(r"\b([A-Z][a-z]{2,15})\b") + # Common words that match the person pattern but aren't names _NAME_STOPWORDS: ClassVar[set[str]] = { "the", @@ -180,6 +187,80 @@ class EntityExtractor: "october", "november", "december", + # Capitalized mid-sentence terms that are not given names + "god", + "mom", + "dad", + "mum", + "grandma", + "grandpa", + "christmas", + "thanksgiving", + "halloween", + "easter", + "english", + "french", + "spanish", + "german", + "chinese", + "japanese", + "american", + "british", + "canadian", + "russian", + "iranian", + "korean", + "israeli", + "ukrainian", + "indian", + "pakistani", + "european", + "asian", + "african", + "italian", + "dutch", + "polish", + "turkish", + "mexican", + "brazilian", + "australian", + # Vendors and platforms that appear capitalized mid-sentence in CTI text + "microsoft", + "windows", + "linux", + "android", + "cisco", + "fortinet", + "ivanti", + "oracle", + "intel", + "nvidia", + "samsung", + "internet", + "facebook", + "instagram", + "youtube", + "netflix", + "spotify", + "amazon", + "google", + "apple", + "reddit", + "twitter", + "tiktok", + "covid", + "awesome", + "great", + "cool", + "nice", + "haha", + "congrats", + "sorry", + "happy", + "glad", + "today", + "tomorrow", + "yesterday", } # Regex for common locations @@ -240,6 +321,24 @@ def extract_regex(self, text: str) -> dict[str, list[str]]: for name in person_matches: if name.lower() not in self._NAME_STOPWORDS and len(name) >= 3: persons.add(name.lower()) + + # Person names from running text: single capitalized tokens that are + # not sentence-initial and not part of a proper noun phrase. + for match in self._FREETEXT_PERSON_PATTERN.finditer(text): + word = match.group(1) + lower = word.lower() + if lower in persons or lower in self._NAME_STOPWORDS: + continue + prefix = text[: match.start()].rstrip(" \"'(") + if not prefix or prefix[-1] in ".!?:\n": + continue # sentence-initial token, not a reliable name signal + following = re.match(r"\s+[A-Z][a-z]", text[match.end() :]) + if following is not None: + continue # "Cobalt Strike", "New York": proper noun phrase + preceding = re.search(r"([A-Za-z][\w-]*)\s*$", prefix) + if preceding is not None and preceding.group(1)[0].isupper(): + continue # second word of a proper noun phrase + persons.add(lower) results["person"] = list(persons) # Locations diff --git a/tests/test_conversational_entities.py b/tests/test_conversational_entities.py index 760431b..bf35f81 100644 --- a/tests/test_conversational_entities.py +++ b/tests/test_conversational_entities.py @@ -224,3 +224,58 @@ def test_search_entities_finds_across_types(self): results = idx.search_entities("paris") assert "location" in results assert "organization" in results + + +class TestFreeTextPersonExtraction: + """Person names from free text, not just 'Name:' dialogue lines. + + Query-side extraction is what lets graph traversal fire on + conversational questions ("What did Melanie paint?"); ingest-side + extraction is what indexes friends mentioned inside turns. + """ + + def test_query_side_person_extracted(self): + ext = EntityExtractor() + result = ext.extract_regex("What did Melanie paint last May?") + assert "melanie" in result["person"] + + def test_mid_sentence_person_extracted(self): + ext = EntityExtractor() + result = ext.extract_regex("I went hiking with Caroline and her dog.") + assert "caroline" in result["person"] + + def test_sentence_initial_words_not_persons(self): + ext = EntityExtractor() + result = ext.extract_regex("What tools does the group use? The group adapted.") + assert "what" not in result["person"] + assert "the" not in result["person"] + + def test_proper_noun_phrases_skipped(self): + ext = EntityExtractor() + result = ext.extract_regex( + "APT28 used Cobalt Strike against New York targets." + ) + assert "cobalt" not in result["person"] + assert "strike" not in result["person"] + assert "new" not in result["person"] + assert "york" not in result["person"] + + def test_capitalized_common_terms_skipped(self): + ext = EntityExtractor() + result = ext.extract_regex("We celebrated Christmas in Toronto with Mom.") + assert "christmas" not in result["person"] + assert "mom" not in result["person"] + assert "toronto" in result["location"] + + def test_dialogue_names_still_extracted(self): + ext = EntityExtractor() + result = ext.extract_regex("Melanie: I started a painting class!") + assert "melanie" in result["person"] + + def test_demonyms_and_vendors_not_persons(self): + ext = EntityExtractor() + result = ext.extract_regex( + "APT28 is a Russian threat actor abusing Microsoft services." + ) + assert "russian" not in result["person"] + assert "microsoft" not in result["person"] From 48659d8d2135ffccab5d2b4c7f95f0666d911423 Mon Sep 17 00:00:00 2001 From: Patrick Roland Date: Tue, 9 Jun 2026 13:45:29 -0500 Subject: [PATCH 14/24] feat(recall): IDF-style fan-out gate for query entities Free-text person extraction regressed LoCoMo 11% -> 5%: speaker names map to every session, so graph traversal flooded blended recall with undiscriminative notes. Query entities whose KG out-degree exceeds retrieval.entity_max_fanout (default 25) are now skipped by the graph/causal/entity-augmentation stages. KG out-degree is the right signal: supersession prunes the entity index but MENTIONED_IN edges accumulate one per note. Also: LOCOMO_CHUNK_SIZE harness knob for MemPalace-granularity chunked ingestion (removes 4000-char truncation). Co-Authored-By: Claude Opus 4.8 (1M context) --- benchmarks/locomo_benchmark.py | 46 +- .../locomo_results_optimized.json | 962 ++++++++++++++++++ src/zettelforge/config.py | 4 + src/zettelforge/memory_manager.py | 32 + tests/test_graph_scoping.py | 32 + 5 files changed, 1066 insertions(+), 10 deletions(-) create mode 100644 benchmarks/results/session_2026-06-09/locomo_results_optimized.json diff --git a/benchmarks/locomo_benchmark.py b/benchmarks/locomo_benchmark.py index ae9072f..1f88bd6 100644 --- a/benchmarks/locomo_benchmark.py +++ b/benchmarks/locomo_benchmark.py @@ -131,18 +131,44 @@ def ingest_conversations(mm: MemoryManager, turns: List[Dict], batch_sessions: b sessions[key] = {"date": turn["date"], "lines": [], "sample_id": turn["sample_id"], "session": turn["session"]} sessions[key]["lines"].append(f"{turn['speaker']}: {turn['text']}") + # LOCOMO_CHUNK_SIZE > 0 stores each session as ~chunk-size pieces + # (MemPalace-style granularity) with the [date] header repeated per + # chunk, and avoids the 4000-char truncation that drops session tails. + chunk_size = int(os.environ.get("LOCOMO_CHUNK_SIZE", "0")) + for key, session in sessions.items(): - content = f"[{session['date']}] Conversation session {session['session']}:\n" + "\n".join(session["lines"]) - # Truncate very long sessions to avoid overwhelming the embedding - if len(content) > 4000: - content = content[:4000] + header = f"[{session['date']}] Conversation session {session['session']}:" + source_ref = f"locomo:{session['sample_id']}:session_{session['session']}" + if chunk_size > 0: + pieces: List[str] = [] + current: List[str] = [] + current_len = 0 + for line in session["lines"]: + if current and current_len + len(line) + 1 > chunk_size: + pieces.append("\n".join(current)) + current = [] + current_len = 0 + current.append(line) + current_len += len(line) + 1 + if current: + pieces.append("\n".join(current)) + contents = [f"{header}\n{piece}" for piece in pieces] + else: + content = f"{header}\n" + "\n".join(session["lines"]) + # Truncate very long sessions to avoid overwhelming the embedding + if len(content) > 4000: + content = content[:4000] + contents = [content] + try: - mm.remember( - content=content, - source_type="dialogue", - source_ref=f"locomo:{session['sample_id']}:session_{session['session']}", - domain="locomo", - ) + for i, content in enumerate(contents): + ref = source_ref if len(contents) == 1 else f"{source_ref}#c{i}" + mm.remember( + content=content, + source_type="dialogue", + source_ref=ref, + domain="locomo", + ) ingested += 1 except RuntimeError as e: errors += 1 diff --git a/benchmarks/results/session_2026-06-09/locomo_results_optimized.json b/benchmarks/results/session_2026-06-09/locomo_results_optimized.json new file mode 100644 index 0000000..86ac12f --- /dev/null +++ b/benchmarks/results/session_2026-06-09/locomo_results_optimized.json @@ -0,0 +1,962 @@ +{ + "meta": { + "date": "2026-06-09T13:32:20.204960", + "version": "zettelforge-2.7.0", + "dataset": "/home/rolandpg/.openclaw/workspace-nexus/Locomo-Plus/data/locomo10.json", + "per_category": 20, + "judge": "keyword", + "k": 10 + }, + "ingest": { + "ingested": 272, + "errors": 0, + "duration_s": 33.82, + "rate_per_s": 8.0 + }, + "by_category": { + "single-hop": { + "accuracy": 5.0, + "avg_score": 0.125, + "p50_latency_ms": 164.89955666474998, + "p95_latency_ms": 251.72034185379744, + "n": 20 + }, + "multi-hop": { + "accuracy": 0.0, + "avg_score": 0.075, + "p50_latency_ms": 167.22881654277444, + "p95_latency_ms": 220.0862099416554, + "n": 20 + }, + "temporal": { + "accuracy": 5.0, + "avg_score": 0.075, + "p50_latency_ms": 175.95603200607002, + "p95_latency_ms": 211.49435499683022, + "n": 20 + }, + "open-domain": { + "accuracy": 20.0, + "avg_score": 0.35, + "p50_latency_ms": 170.30203063040972, + "p95_latency_ms": 220.54198710247874, + "n": 20 + }, + "adversarial": { + "accuracy": 25.0, + "avg_score": 0.375, + "p50_latency_ms": 165.67518352530897, + "p95_latency_ms": 190.5370568856597, + "n": 20 + } + }, + "overall": { + "accuracy": 11.0, + "avg_score": 0.2, + "p50_latency_ms": 169.87449256703258, + "p95_latency_ms": 211.44699584692717, + "total_samples": 100 + }, + "details": [ + { + "category": "single-hop", + "question": "What did Caroline research?", + "gold_answer": "Adoption agencies", + "predicted": "Guess what I did this week? I took the first step towards becoming a mom - I applied to adoption agencies! It's a big decision, but I think I'm ready to give all my love to a child. Melanie: Wow, what an experience! How did it make you feel?. Melanie: That sounds awesome! What did you take away from it to use in your life?. Here's a photo of my horse painting I did recently. What else helps you out?", + "score": 1.0, + "latency_s": 0.252, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "What is Caroline's identity?", + "gold_answer": "Transgender woman", + "predicted": "Caroline: That bowl is awesome, Mel! What gave you the idea for all the colors and patterns?. Melanie: Wow, that's cool, Caroline! What happened that was so awesome? Did you hear any inspiring stories?. Caroline: Gonna continue my edu and check out career options, which is pretty exciting!. Melanie: Wow, Caroline! What kinda jobs are you thinkin' of? Anything that stands out?. Is this your own painting?", + "score": 0.0, + "latency_s": 0.163, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "What is Caroline's relationship status?", + "gold_answer": "Single", + "predicted": "Caroline: That bowl is awesome, Mel! What gave you the idea for all the colors and patterns?. Caroline: Thanks, Melanie! This necklace is super special to me - a gift from my grandma in my home country, Sweden. Melanie: That's gorgeous, Caroline! It's awesome what items can mean so much to us, right? Got any other objects that you treasure, like that necklace?. Melanie: Sounds great! What kind of counseling and mental health services do you want to persue?. What motivated you to pursue counselin", + "score": 0.0, + "latency_s": 0.167, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "Where did Caroline move from 4 years ago?", + "gold_answer": "Sweden", + "predicted": "They've been there through everything, I've known these friends for 4 years, since I moved from my home country. Caroline: Hey Melanie, great to hear from you. Melanie: That sounds great, Caroline. How did you feel about your time there? Anything that sticks out to you?. Caroline: Wow! Did you see that band?", + "score": 0.0, + "latency_s": 0.17, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "What career path has Caroline decided to persue?", + "gold_answer": "counseling or mental health for Transgender people", + "predicted": "Guess what I did this week? I took the first step towards becoming a mom - I applied to adoption agencies! It's a big decision, but I think I'm ready to give all my love to a child. Caroline: The support group has made me feel accepted and given me courage to embrace myself. Caroline: Hey Mel, great to chat with you again! So much has happened since we last spoke - I went to an LGBTQ conference two days ago and it was really special. Melanie: That sounds awesome! What did you take away from it t", + "score": 0.5, + "latency_s": 0.199, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "What activities does Melanie partake in?", + "gold_answer": "pottery, camping, painting, swimming", + "predicted": "Melanie: Caroline, awesome news that you two are getting along! What was it like for you both? Care to fill me in?. What do you hope to do next time?. Any specific projects or activities you're looking forward to there?. Caroline: Wow, what a fun moment! What's the band?. Melanie: Cool! What type of music do you play?", + "score": 0.0, + "latency_s": 0.153, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "Where has Melanie camped?", + "gold_answer": "beach, mountains, forest", + "predicted": "Caroline: Finding a community where I'm accepted, loved and supported has really meant a lot to me. I'm looking forward to seeing how much fun everyone has and how proud they'll feel of their talents!. Caroline: Yeah totally! \"Brave\" by Sara Bareilles has a lot of significance for me. Melanie: Wow, that's gorgeous! Where did you find it?. It was so vibrant and welcoming, I had to take a picture! It reminds us that love and acceptance are everywhere\u2014even where we least expect it", + "score": 0.0, + "latency_s": 0.157, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "What do Melanie's kids like?", + "gold_answer": "dinosaurs, nature", + "predicted": "What do you hope to do next time?. Melanie: Cool! What type of music do you play?. What other creative projects do you do with them, besides pottery?. What do you think of these?. What do flowers mean to you?", + "score": 0.0, + "latency_s": 0.163, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "What books has Melanie read?", + "gold_answer": "\"Nothing is Impossible\", \"Charlotte's Web\"", + "predicted": "Melanie: Sounds great! What kind of books you got in your library?. What effect has the journey had on your relationships?. Melanie: That's gorgeous, Caroline! It's awesome what items can mean so much to us, right? Got any other objects that you treasure, like that necklace?. Melanie: Sounds great! What kind of counseling and mental health services do you want to persue?. What motivated you to pursue counseling?", + "score": 0.0, + "latency_s": 0.186, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "What does Melanie do to destress?", + "gold_answer": "Running, pottery", + "predicted": "Melanie: Sounds great! What kind of counseling and mental health services do you want to persue?. I'll do my best to make sure these kids have a safe and loving home. Melanie: Caroline, awesome news that you two are getting along! What was it like for you both? Care to fill me in?. It was wonderful to see how the audience related to what I said and how it inspired them to be better allies. Melanie: That's gorgeous, Caroline! It's awesome what items can mean so much to us, right? Got any other ob", + "score": 0.0, + "latency_s": 0.153, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "What LGBTQ+ events has Caroline participated in?", + "gold_answer": "Pride parade, school speech, support group", + "predicted": "Melanie: Caroline, awesome news that you two are getting along! What was it like for you both? Care to fill me in?. Last week I went to an LGBTQ+ pride parade. It showed me how much our community has grown, it was amazing!. Melanie: Wow, Caroline, sounds like the parade was an awesome experience! It's great to see the love and support for the LGBTQ+ community. Congrats! Has this experience influenced your goals at all?", + "score": 0.5, + "latency_s": 0.18, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "What events has Caroline participated in to help children?", + "gold_answer": "Mentoring program, school speech", + "predicted": "Melanie: That sounds awesome! What did you take away from it to use in your life?. It was great! I truly felt the power of our collective effort to help people in need, so heartwarming. It's important to help people find what they need. I wanted to help out in these tough times by doing a community food drive. This journey has been amazing and I'm grateful I get to share it and help others with theirs", + "score": 0.0, + "latency_s": 0.188, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "What did Melanie paint recently?", + "gold_answer": "sunset", + "predicted": "Melanie: Wow, what an experience! How did it make you feel?. What gave you the idea to paint it?. How's it going for you? That painting is awesome! Did you paint it?. Guess what I did this week? I took the first step towards becoming a mom - I applied to adoption agencies! It's a big decision, but I think I'm ready to give all my love to a child. Caroline: I totally agree, Melanie", + "score": 0.0, + "latency_s": 0.198, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "What activities has Melanie done with her family?", + "gold_answer": "Pottery, painting, camping, museum, swimming, hiking", + "predicted": "Maria: Sounds like parenting has been a wonderful experience for you - what has it been like?. It's always fun coming up with activities for my family to enjoy. What inspires you with your volunteering?. What effect has the journey had on your relationships?. Caroline: Wow, that's awesome! What do you love most about camping with your fam?", + "score": 0.0, + "latency_s": 0.154, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "In what ways is Caroline participating in the LGBTQ community?", + "gold_answer": "Joining activist group, going to pride parades, participating in an art show, mentoring program", + "predicted": "It was awesome! I talked about my transgender journey and encouraged students to get involved in the LGBTQ community. Sounds like your event was amazing! I'm so proud of you for spreading awareness and getting others involved in the LGBTQ community. Caroline: That bowl is awesome, Mel! What gave you the idea for all the colors and patterns?. Our group, 'Connected LGBTQ Activists', is made of all kinds of people investing in positive changes. Talking to the community made me want to use my story ", + "score": 0.0, + "latency_s": 0.205, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "How many times has Melanie gone to the beach in 2023?", + "gold_answer": "2", + "predicted": "Audrey: Hey Andrew! It's been a wild ride! I did something fun with my pups over the weekend, took them to the beach and it was so fun to see them playing in the ocean. Andrew: Haven't been to the beach in a while. Have fun at the beach trip! Bet you can't wait to get out to the nature. Andrew: Sounds great! Did they love being at the beach? Did they enjoy the water? Here's a pic of my last trip to the beach. It's hard to find open spaces in the city", + "score": 0.0, + "latency_s": 0.154, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "What kind of art does Caroline make?", + "gold_answer": "abstract art", + "predicted": "Melanie: Sounds great! What kind of counseling and mental health services do you want to persue?. Melanie: Sounds great! What kind of books you got in your library?. Art gives me a sense of freedom, but so does having supportive people around, promoting LGBTQ rights and being true to myself. What do you think of these?. The pattern and colors are awesome-- it reminds me of art and self-expression", + "score": 0.5, + "latency_s": 0.156, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "Who supports Caroline when she has a negative experience?", + "gold_answer": "Her mentors, family, and friends", + "predicted": "I ran into a group of religious conservatives who said something that really upset me. Caroline: Yeah, it's true! Having people who back you makes such a huge difference. I'm creating a library for when I have kids. It's a journey for me, but when I look after myself, I'm able to better look after my family. Caroline: Researching adoption agencies \u2014 it's been a dream to have a family and give a loving home to kids who need it", + "score": 0.0, + "latency_s": 0.167, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "What types of pottery have Melanie and her kids made?", + "gold_answer": "bowls, cup", + "predicted": "Melanie: Sounds great! What kind of counseling and mental health services do you want to persue?. Takes a lot of courage and maturity! What do you think of this?. My kids have so much and others don't. So much since we talked! Last Fri I finally took my kids to a pottery workshop. Caroline: Wow, Mel! Sounds like you and the kids had a blast", + "score": 0.0, + "latency_s": 0.152, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "What has Melanie painted?", + "gold_answer": "Horse, sunset, sunrise", + "predicted": "Melanie: That's gorgeous, Caroline! It's awesome what items can mean so much to us, right? Got any other objects that you treasure, like that necklace?. Melanie: Sounds great! What kind of counseling and mental health services do you want to persue?. What motivated you to pursue counseling?. Caroline: Thanks, Melanie. Melanie: Congrats Caroline! Good on you for going after what you really care about", + "score": 0.0, + "latency_s": 0.161, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "When did Caroline go to the LGBTQ support group?", + "gold_answer": "7 May 2023", + "predicted": "Caroline: I went to a LGBTQ support group yesterday and it was so powerful. Caroline: The support group has made me feel accepted and given me courage to embrace myself. I'm off to go swimming with the kids. Melanie: Wow, Caroline, sounds like the parade was an awesome experience! It's great to see the love and support for the LGBTQ+ community. Talking to the community made me want to use my story to help others too - I'm still thinking that counseling and mental health is the way to go", + "score": 0.0, + "latency_s": 0.163, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "When did Melanie paint a sunrise?", + "gold_answer": "2022", + "predicted": "I'm creating a library for when I have kids. Let me know when you're free for a catch-up. Jolene: Wow, that's a great photo! How did she show you to appreciate it?. It's a journey for me, but when I look after myself, I'm able to better look after my family. Caroline: Hey Melanie! Long time no talk! A lot's been going on in my life! Take a look at this", + "score": 0.0, + "latency_s": 0.166, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "When did Melanie run a charity race?", + "gold_answer": "The sunday before 25 May 2023", + "predicted": "I just wanted to let you know I challenged myself last Friday and did a charity event. John: I set up a 5K charity run in our neighborhood. I ran a charity race for mental health last Saturday \u2013 it was really rewarding. Caroline: That charity race sounds great, Mel! Making a difference & raising awareness for mental health is super rewarding - I'm really proud of you for taking part!. It's a journey for me, but when I look after myself, I'm able to better look after my family", + "score": 0.0, + "latency_s": 0.17, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "When is Melanie planning on going camping?", + "gold_answer": "June 2023", + "predicted": "Reaching the top of a challenging trail is amazing too - it feels like all worries just vanish when you get to the top. Did you reach the summit? When I was younger, my family and I went on a road trip to Oregon. Caroline: The mentoring is going great! I've met some amazing young folks and supported them along the way. [1:33 pm on 25 August, 2023] Conversation session 14:. It's tough when those things happen, but it's great you apologized", + "score": 0.0, + "latency_s": 0.202, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "When did Caroline give a speech at a school?", + "gold_answer": "The week before 9 June 2023", + "predicted": "Conversations about gender identity and inclusion are so necessary and I'm thankful for being able to give a voice to the trans community. Here's a pic from when we met up last week!. Melanie: That's a gorgeous song, Caroline. I'm creating a library for when I have kids. Here's a pic of my family camping at the beach", + "score": 0.0, + "latency_s": 0.165, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "When did Caroline meet up with her friends, family, and mentors?", + "gold_answer": "The week before 9 June 2023", + "predicted": "It was so amazing lying there and watching the sky light up with streaks of light. Transitioning wasn't easy and acceptance wasn't either, but the help I got from friends, family and people I looked up to was invaluable. They'll help with the process and provide all the info. Caroline: The room was electric with energy and support! The posters were amazing, so much pride and strength! It inspired me to make some art. It's an important part of my life and I've made strong connections with people ", + "score": 0.0, + "latency_s": 0.208, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "How long has Caroline had her current group of friends for?", + "gold_answer": "4 years", + "predicted": "I'm looking forward to seeing how much fun everyone has and how proud they'll feel of their talents!. Melanie: Wow, that photo is great! How long have you had such a great support system?. Caroline: The support group has made me feel accepted and given me courage to embrace myself. It's made me appreciate how lucky I am to have my friends and family helping with my transition. How have your friends and fam been helping you out with your transition?", + "score": 0.0, + "latency_s": 0.164, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "How long ago was Caroline's 18th birthday?", + "gold_answer": "10 years ago", + "predicted": "It was so cool seeing how friendship and compassion can make a difference. A friend made it for my 18th birthday ten years ago. How was it? Anything fun?. Seeing how passionate these pros were about making a safe space for people like me was amazing. It was awesome to see how strong the young people were, with all the challenges they face", + "score": 0.5, + "latency_s": 0.162, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "When did Melanie sign up for a pottery class?", + "gold_answer": "2 July 2023", + "predicted": "I'm creating a library for when I have kids. Jolene: Woohoo! I signed up for a meditation course at a retreat near a lake. Caroline: Right, it was so much fun! We could do a family outting, or wanna plan something special for this summer, just us two? It'd be a great chance to catch up and explore nature! What do you think?. Caroline: Seeing my mentee's face light up when they saw the support was the best! Such a special moment. So much since we talked! Last Fri I finally took my kids to a potte", + "score": 0.0, + "latency_s": 0.182, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "When is Caroline going to the transgender conference?", + "gold_answer": "July 2023", + "predicted": "It was awesome! I talked about my transgender journey and encouraged students to get involved in the LGBTQ community. Talking about inclusivity and acceptance is crucial, and you're so brave to speak up for the trans community. Talking to the community made me want to use my story to help others too - I'm still thinking that counseling and mental health is the way to go. Caroline: Thanks Mel! I'm going to a transgender conference this month. Caroline: The mentoring is going great! I've met some ", + "score": 0.0, + "latency_s": 0.173, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "When did Melanie go to the museum?", + "gold_answer": "5 July 2023", + "predicted": "Caroline: That's so funny! I used to go horseback riding with my dad when I was a kid, we'd go through the fields, feeling the wind. Guess what I did this week? I took the first step towards becoming a mom - I applied to adoption agencies! It's a big decision, but I think I'm ready to give all my love to a child. When I go to an art show, it's like we're still experiencing it together even though she's gone. Melanie: Hey, Caroline! Nice to hear from you! Love the necklace, any special meaning to", + "score": 0.0, + "latency_s": 0.171, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "When did Caroline have a picnic?", + "gold_answer": "The week before 6 July 2023", + "predicted": "I'm creating a library for when I have kids. It was nice seeing them have a good time outdoors. Melanie: That's a gorgeous song, Caroline. Life's tough but it's worth it when we have things that make us happy. It's made a huge difference to have people who get what I'm going through", + "score": 0.0, + "latency_s": 0.156, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "When did Caroline go to the LGBTQ conference?", + "gold_answer": "10 July 2023", + "predicted": "Talking to the community made me want to use my story to help others too - I'm still thinking that counseling and mental health is the way to go. I'm off to go swimming with the kids. It was awesome! I talked about my transgender journey and encouraged students to get involved in the LGBTQ community. Caroline: Hey Melanie! That sounds great! Last weekend I joined a mentorship program for LGBTQ youth - it's really rewarding to help the community. Caroline: Hey Mel, great to chat with you again! S", + "score": 0.0, + "latency_s": 0.185, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "When did Melanie read the book \"nothing is impossible\"?", + "gold_answer": "2022", + "predicted": "Evan: The painting is mine, I made it when I was a mix of emotions - sad, mad, and hopeful. Caroline: The mentoring is going great! I've met some amazing young folks and supported them along the way. Caroline: Seeing my mentee's face light up when they saw the support was the best! Such a special moment. Melanie: Wow, Caroline, that painting is awesome! Those colors are so vivid and the whole thing looks really unified. Melanie: That's a gorgeous photo, Caroline! Wow, the love around you is awes", + "score": 0.0, + "latency_s": 0.166, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "When did Caroline go to the adoption meeting?", + "gold_answer": "The friday before 15 July 2023", + "predicted": "Guess what I did this week? I took the first step towards becoming a mom - I applied to adoption agencies! It's a big decision, but I think I'm ready to give all my love to a child. Caroline: That's so funny! I used to go horseback riding with my dad when I was a kid, we'd go through the fields, feeling the wind. Caroline: Woohoo Melanie! I passed the adoption agency interviews last Friday! I'm so excited and thankful. Transitioning wasn't easy and acceptance wasn't either, but the help I got fr", + "score": 0.0, + "latency_s": 0.168, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "When did Melanie go to the pottery workshop?", + "gold_answer": "The Friday before 15 July 2023", + "predicted": "So much since we talked! Last Fri I finally took my kids to a pottery workshop. Melanie: The kids loved it! They were so excited to get their hands dirty and make something with clay. Caroline: Thanks Melanie - love the blue vase in the pic! Blue's my fave, it makes me feel relaxed. They represent growth, beauty and reminding us to appreciate the small moments. Melanie: Marrying my partner and promising to be together forever was the best part", + "score": 0.0, + "latency_s": 0.16, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "When did Melanie go camping in June?", + "gold_answer": "The week before 27 June 2023", + "predicted": "Caroline: I was out walking in my neighborhood when I came across this cool rainbow sidewalk for Pride Month. Did you reach the summit? When I was younger, my family and I went on a road trip to Oregon. Audrey: Hey Andrew! It's been a wild ride! I did something fun with my pups over the weekend, took them to the beach and it was so fun to see them playing in the ocean. Melanie: Hey Caroline, hope all's good! I had a quiet weekend after we went camping with my fam two weekends ago. Caroline: Seei", + "score": 0.0, + "latency_s": 0.22, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "When did Caroline go to a pride parade during the summer?", + "gold_answer": "The week before 3 July 2023", + "predicted": "I went to a pride parade last Friday and it was awesome - so much energy and love everywhere. Last week I went to an LGBTQ+ pride parade. Melanie: Wow, Caroline, sounds like the parade was an awesome experience! It's great to see the love and support for the LGBTQ+ community. Talking to the community made me want to use my story to help others too - I'm still thinking that counseling and mental health is the way to go. Conversations about gender identity and inclusion are so necessary and I'm th", + "score": 0.5, + "latency_s": 0.176, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "When did Melanie go camping in July?", + "gold_answer": "two weekends before 17 July 2023", + "predicted": "Caroline: I was out walking in my neighborhood when I came across this cool rainbow sidewalk for Pride Month. Did you reach the summit? When I was younger, my family and I went on a road trip to Oregon. Melanie: Hey Caroline, hope all's good! I had a quiet weekend after we went camping with my fam two weekends ago. Caroline: Seeing my mentee's face light up when they saw the support was the best! Such a special moment. It's awesome to see the difference we can make in each other's lives", + "score": 0.5, + "latency_s": 0.159, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "When did Caroline join a mentorship program?", + "gold_answer": "The weekend before 17 July 2023", + "predicted": "Guess what I did this week? I took the first step towards becoming a mom - I applied to adoption agencies! It's a big decision, but I think I'm ready to give all my love to a child. Melanie: Oliver's hilarious! He hid his bone in my slipper once! Cute, right? Almost as silly as when I got to feed a horse a carrot. Caroline: That's so funny! I used to go horseback riding with my dad when I was a kid, we'd go through the fields, feeling the wind. Here's a photo of my horse painting I did recently.", + "score": 0.0, + "latency_s": 0.155, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "What fields would Caroline be likely to pursue in her educaton?", + "gold_answer": "Psychology, counseling certification", + "predicted": "It was wonderful to see how the audience related to what I said and how it inspired them to be better allies. Caroline: Right, it was so much fun! We could do a family outting, or wanna plan something special for this summer, just us two? It'd be a great chance to catch up and explore nature! What do you think?. Melanie: That sounds awesome! What did you take away from it to use in your life?. It was awesome! I talked about my transgender journey and encouraged students to get involved in the LG", + "score": 0.0, + "latency_s": 0.185, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "Would Caroline still want to pursue counseling as a career if she hadn't received support growing up?", + "gold_answer": "Likely no", + "predicted": "Caroline: I'm so lucky to have such a great support system around me. Caroline: I went to a LGBTQ support group yesterday and it was so powerful. Caroline: I'm keen on counseling or working in mental health - I'd love to support those with similar issues. Blue calms me, so I wanted the painting to have a serene vibe while still having lots of vibrant colors. Melanie: Oliver's hilarious! He hid his bone in my slipper once! Cute, right? Almost as silly as when I got to feed a horse a carrot", + "score": 0.0, + "latency_s": 0.176, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "Would Caroline likely have Dr. Seuss books on her bookshelf?", + "gold_answer": "Yes, since she collects classic children's books", + "predicted": "Melanie: That must have been tough for you, Caroline. [8:18 pm on 6 July, 2023] Conversation session 6:. Lots has been going on since then!. Melanie: That's awesome, Caroline! Congrats on following your dreams. Caroline: Melanie, that's a great pic! That must have been awesome", + "score": 0.0, + "latency_s": 0.178, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "Would Caroline pursue writing as a career option?", + "gold_answer": "LIkely no; though she likes reading, she wants to be a counselor", + "predicted": "Melanie: I loved reading \"Charlotte's Web\" as a kid. It'll be tough as a single parent, but I'm up for the challenge!. Nate: Go for it! Follow your passion for writing, but if acting really makes you happy, give it a shot as well. Writing has become like an escape and a way to express my feelings. It's knowing that my writing can make a difference that keeps me going, even on tough days", + "score": 0.0, + "latency_s": 0.162, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "Would Melanie be considered a member of the LGBTQ community?", + "gold_answer": "Likely no, she does not refer to herself as part of it", + "predicted": "Melanie: You'd be a great counselor! Your empathy and understanding will really help the people you work with. Sounds like your event was amazing! I'm so proud of you for spreading awareness and getting others involved in the LGBTQ community. I'm so proud to be part of the difference you're making. Melanie: I'm a big fan of pottery - the creativity and skill is awesome. It's great to see how far LGBTQ rights have come, but there's still plenty of progress to be made", + "score": 0.0, + "latency_s": 0.18, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "Would Melanie be more interested in going to a national park or a theme park?", + "gold_answer": "National park; she likes the outdoors", + "predicted": "Having them happy and healthy would be a good first step before going all in for more dogs. But there are still some ways to appreciate it in the city, like getting some plants for your place or taking a trip to the park on the weekends. We went to a national park last week and made it to this beautiful peak. Melanie: Hey Caroline! Since we last spoke, I took my kids to a park yesterday. Melanie: That's so cool, Caroline! That's a great way to show off and be proud of everyone's skills", + "score": 0.5, + "latency_s": 0.173, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "Would Melanie be considered an ally to the transgender community?", + "gold_answer": "Yes, she is supportive", + "predicted": "It was awesome! I talked about my transgender journey and encouraged students to get involved in the LGBTQ community. It was wonderful to see how the audience related to what I said and how it inspired them to be better allies. Our stories can be so inspiring and encouraging to others who are facing the same challenges. I'm so proud to be part of the difference you're making. I've been chasing my ambitions and had the chance to volunteer at an LGBTQ+ youth center", + "score": 0.0, + "latency_s": 0.173, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "What would Caroline's political leaning likely be?", + "gold_answer": "Liberal", + "predicted": "Caroline: That bowl is awesome, Mel! What gave you the idea for all the colors and patterns?. It's what keeps us going, even when life's hard. Caroline: Right, it was so much fun! We could do a family outting, or wanna plan something special for this summer, just us two? It'd be a great chance to catch up and explore nature! What do you think?. Melanie: Yeah, Caroline! I'll start thinking about what we can do. Melanie: That sounds awesome! What did you take away from it to use in your life?", + "score": 0.0, + "latency_s": 0.204, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "Would Caroline be considered religious?", + "gold_answer": "Somewhat, but not extremely religious", + "predicted": "It's been so helpful to have people around me who accept and support me, so I know I'll be ok!. Plus, painting helps me express my feelings and be creative. Seeing how art can be a source of self-expression and growth is truly inspiring. Melanie: Agreed, Caroline. Melanie: Yeah, same here Caroline", + "score": 0.0, + "latency_s": 0.16, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "Would Melanie likely enjoy the song \"The Four Seasons\" by Vivaldi?", + "gold_answer": "Yes; it's classical music", + "predicted": "Deborah: Enjoy your day and make time for the things that bring you joy. Sam: Wish I could feel the same about love, but I've started to enjoy running in the mornings, and it's been a great way to clear my head. Caroline: Thanks Melanie - love the blue vase in the pic! Blue's my fave, it makes me feel relaxed. Melanie: Hey, Caroline! Nice to hear from you! Love the necklace, any special meaning to it?. It's like a reminder of my roots and all the love and support I get from my family", + "score": 0.0, + "latency_s": 0.17, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "What personality traits might Melanie say Caroline has?", + "gold_answer": "Thoughtful, authentic, driven", + "predicted": "Melanie: Wow, that's cool, Caroline! What happened that was so awesome? Did you hear any inspiring stories?. Melanie: Wow, Caroline! What kinda jobs are you thinkin' of? Anything that stands out?. Melanie: Yep, Caroline. Melanie: That's gorgeous, Caroline! It's awesome what items can mean so much to us, right? Got any other objects that you treasure, like that necklace?. Melanie: Sounds great! What kind of counseling and mental health services do you want to persue?", + "score": 0.0, + "latency_s": 0.193, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "Would Melanie go on another roadtrip soon?", + "gold_answer": "Likely no; since this one went badly", + "predicted": "Maria: Wow, great pic! Where did you go on that road trip?. [6:55 pm on 20 October, 2023] Conversation session 18:. Melanie: Hey Caroline, that roadtrip this past weekend was insane! We were all freaked when my son got into an accident. Caroline: Glad your son is okay, Melanie. Melanie: Thanks! They were scared but we reassured them and explained their brother would be OK", + "score": 0.0, + "latency_s": 0.179, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "Would Caroline want to move back to her home country soon?", + "gold_answer": "No; she's in the process of adopting children.", + "predicted": "It's been a dream to adopt and provide a safe, loving home for kids who need it. Giving a home to needy kids is such a loving way to build a family. My dream is to create a safe and loving home for these kids. Love and acceptance should be everyone's right, and I want them to experience it. That's why I want to pass that same support to anyone who needs it", + "score": 0.0, + "latency_s": 0.176, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "What might John's financial status be?", + "gold_answer": "Middle-class or wealthy", + "predicted": "What do you have in this spread? Looks delicious! I made this apple pie for the kids yesterday too!. James: Hey John! What new has happened in your life?. James: Wow, that book looks great! What other resources do you use to improve your game? Tell me about your gaming tips!. What about you? How's everything going?. It's kind of overwhelming but I'm excited! What have you been up to?", + "score": 0.0, + "latency_s": 0.169, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "Would John be considered a patriotic person?", + "gold_answer": "Yes", + "predicted": "John: You're a real bookworm! It would be awesome to go to a book conference with you. Your assistance would be really appreciated. John: I'll be happy to find a place where my skills and passions are a perfect match. Your skills and passions will be a great addition. John: Thanks James! I kept it a secret because I would have been very upset if I had told you about her in advance and then it wouldn't have worked out", + "score": 0.0, + "latency_s": 0.168, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "What might John's degree be in?", + "gold_answer": "Political science, Public administration, Public affairs", + "predicted": "James: Hey John, that sounds awesome! Combining your two loves - gaming and helping people - must be really exciting! So what kind of gig did they offer you?. James: Freelancing can definitely be a great way to sharpen skills and gain experience. What projects are you currently working on?. James: Congrats on your first professional project, John! Bet it's been great applying what you learned in class. James: What challenges have you encountered?", + "score": 0.0, + "latency_s": 0.166, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "Around which US holiday did Maria get into a car accident?", + "gold_answer": "Independence Day", + "predicted": "Maria: Looks like a blast! Did everyone get a chance to try it? Glad you're all having a great time!. Maria: Wow, John! Hearing that can really make an impact and get us fired up to make a difference. Had a wild week, my car broke down last Fri on my way to work. Trying to get it fixed but it's tough & putting a strain on my wallet. Yeah, it's been tough with car trouble and money problems, but I stay positive and find a way", + "score": 0.0, + "latency_s": 0.186, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "Does John live close to a beach or the mountains?", + "gold_answer": "beach", + "predicted": "Audrey: Hey Andrew! It's been a wild ride! I did something fun with my pups over the weekend, took them to the beach and it was so fun to see them playing in the ocean. Andrew: Sounds great! Did they love being at the beach? Did they enjoy the water? Here's a pic of my last trip to the beach. Andrew: Haven't been to the beach in a while. But there are still some ways to appreciate it in the city, like getting some plants for your place or taking a trip to the park on the weekends. John: Agreed! ", + "score": 1.0, + "latency_s": 0.209, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "Would John be open to moving to another country?", + "gold_answer": "No, he has goals specifically in the U.S. like joining the military and running for office.", + "predicted": "John: You're a real bookworm! It would be awesome to go to a book conference with you. John: The game turned out to be a total success! Lots of people showed up and had a great time, plus we were able to raise some money for charity. Tim: Edinburgh, Scotland would be great for a magical vibe. I want to be known as a consistent performer and help my team. Don't be afraid to seek help if you need it", + "score": 0.0, + "latency_s": 0.148, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "What attributes describe John?", + "gold_answer": "Selfless, family-oriented, passionate, rational", + "predicted": "James: Wow, that's awesome! What game was it for? Sounds like a dream!. What have you been doing to stay motivated?. James: Hey John! What new has happened in your life?. James: Wow, that book looks great! What other resources do you use to improve your game? Tell me about your gaming tips!. What sort of games are you interested in exploring?", + "score": 0.0, + "latency_s": 0.211, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "What did the charity race raise awareness for?", + "gold_answer": "mental health", + "predicted": "Caroline: That charity race sounds great, Mel! Making a difference & raising awareness for mental health is super rewarding - I'm really proud of you for taking part!. I ran a charity race for mental health last Saturday \u2013 it was really rewarding. I just wanted to let you know I challenged myself last Friday and did a charity event. Maria: You did awesome! How's the response been to that?. What did you make?", + "score": 1.0, + "latency_s": 0.159, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "What did Melanie realize after the charity race?", + "gold_answer": "self-care is important", + "predicted": "Guess what I did this week? I took the first step towards becoming a mom - I applied to adoption agencies! It's a big decision, but I think I'm ready to give all my love to a child. Melanie: Thanks, Caroline! Yup, we just did it yesterday! The kids loved it and it was a nice way to relax after the road trip. I just wanted to let you know I challenged myself last Friday and did a charity event. Maria: You did awesome! How's the response been to that?. Especially after the accident, I've thought a", + "score": 0.0, + "latency_s": 0.206, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "How does Melanie prioritize self-care?", + "gold_answer": "by carving out some me-time each day for activities like running, reading, or playing the violin", + "predicted": "How does it feel? Also, do you have any pets?. Caroline: I totally agree, Melanie. How was it? Anything fun?. They talked about different therapeutic methods and how to best work with trans people. Seeing how passionate these pros were about making a safe space for people like me was amazing", + "score": 0.0, + "latency_s": 0.19, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "What are Caroline's plans for the summer?", + "gold_answer": "researching adoption agencies", + "predicted": "Any fun plans for the summer?. Melanie: Caroline, awesome news that you two are getting along! What was it like for you both? Care to fill me in?. Caroline: That bowl is awesome, Mel! What gave you the idea for all the colors and patterns?. The red and blue are for the binary gender system, and the mix of colors means smashing that rigid thinking. Caroline: Hey Melanie! That sounds great! Last weekend I joined a mentorship program for LGBTQ youth - it's really rewarding to help the community", + "score": 0.0, + "latency_s": 0.162, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "What type of individuals does the adoption agency Caroline is considering support?", + "gold_answer": "LGBTQ+ individuals", + "predicted": "Caroline: Woohoo Melanie! I passed the adoption agency interviews last Friday! I'm so excited and thankful. For me, adoption is a way of giving back and showing love and acceptance. Guess what I did this week? I took the first step towards becoming a mom - I applied to adoption agencies! It's a big decision, but I think I'm ready to give all my love to a child. Melanie: Cool! What type of music do you play?. Melanie: That's awesome! What type of guitar? Been playing long?", + "score": 0.0, + "latency_s": 0.2, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "Why did Caroline choose the adoption agency?", + "gold_answer": "because of their inclusivity and support for LGBTQ+ individuals", + "predicted": "Guess what I did this week? I took the first step towards becoming a mom - I applied to adoption agencies! It's a big decision, but I think I'm ready to give all my love to a child. Caroline: Woohoo Melanie! I passed the adoption agency interviews last Friday! I'm so excited and thankful. Caroline: Thanks so much, Melanie! It's beautiful! It really brings home how much love's in families - both blood and the ones we choose. Wishing you the best on your adoption journey!. Melanie: Thanks for the ", + "score": 0.0, + "latency_s": 0.172, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "What is Caroline excited about in the adoption process?", + "gold_answer": "creating a family for kids who need one", + "predicted": "Caroline: Woohoo Melanie! I passed the adoption agency interviews last Friday! I'm so excited and thankful. Guess what I did this week? I took the first step towards becoming a mom - I applied to adoption agencies! It's a big decision, but I think I'm ready to give all my love to a child. It was awesome! I talked about my transgender journey and encouraged students to get involved in the LGBTQ community. Talking about inclusivity and acceptance is crucial, and you're so brave to speak up for the", + "score": 0.0, + "latency_s": 0.178, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "What does Melanie think about Caroline's decision to adopt?", + "gold_answer": "she thinks Caroline is doing something amazing and will be an awesome mom", + "predicted": "Guess what I did this week? I took the first step towards becoming a mom - I applied to adoption agencies! It's a big decision, but I think I'm ready to give all my love to a child. Melanie: That's awesome, Caroline! You drew it? What does it mean to you?. Melanie: Yeah, it's normal to be both excited and nervous with a big decision. Art gives me a sense of freedom, but so does having supportive people around, promoting LGBTQ rights and being true to myself. It's awesome to see how passionate yo", + "score": 0.5, + "latency_s": 0.169, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "How long have Mel and her husband been married?", + "gold_answer": "Mel and her husband have been married for 5 years.", + "predicted": "Deborah: Aw, that's wonderful! How long have you been married?. Evan: Hey Sam! Long time no see! Been up and down lately, got married last week - how about you?. This painting has such an inspiring vibe; you really have a knack for understanding art! How about you? How long have you been painting?. How have you been coping?. We're all still sad about it, but have been comforted by the good times we had and the memories we have", + "score": 0.5, + "latency_s": 0.221, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "What does Caroline's necklace symbolize?", + "gold_answer": "love, faith, and strength", + "predicted": "Caroline: Thanks, Melanie! This necklace is super special to me - a gift from my grandma in my home country, Sweden. Melanie: That's gorgeous, Caroline! It's awesome what items can mean so much to us, right? Got any other objects that you treasure, like that necklace?. Melanie: Sounds great! What kind of counseling and mental health services do you want to persue?. What motivated you to pursue counseling?. Melanie: Congrats Caroline! Good on you for going after what you really care about", + "score": 0.0, + "latency_s": 0.175, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "What country is Caroline's grandma from?", + "gold_answer": "Sweden", + "predicted": "Caroline: Thanks, Melanie! This necklace is super special to me - a gift from my grandma in my home country, Sweden. Caroline: That bowl is awesome, Mel! What gave you the idea for all the colors and patterns?. This is a big move towards my goal of having a family. For me, adoption is a way of giving back and showing love and acceptance. Giving a home to needy kids is such a loving way to build a family", + "score": 1.0, + "latency_s": 0.162, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "What was grandma's gift to Caroline?", + "gold_answer": "necklace", + "predicted": "It was wonderful to see how the audience related to what I said and how it inspired them to be better allies. Caroline: Right, it was so much fun! We could do a family outting, or wanna plan something special for this summer, just us two? It'd be a great chance to catch up and explore nature! What do you think?. Caroline: Thanks, Melanie! This necklace is super special to me - a gift from my grandma in my home country, Sweden. She gave it to me when I was young, and it stands for love, faith and", + "score": 1.0, + "latency_s": 0.155, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "What is Melanie's hand-painted bowl a reminder of?", + "gold_answer": "art and self-expression", + "predicted": "Caroline: That bowl is awesome, Mel! What gave you the idea for all the colors and patterns?. Caroline: Thanks, Melanie! This necklace is super special to me - a gift from my grandma in my home country, Sweden. It's like a reminder of my roots and all the love and support I get from my family. Caroline: Yep, Melanie! I've got some other stuff with sentimental value, like my hand-painted bowl. Caroline: Wow, what a great day! Glad everyone could make it", + "score": 0.5, + "latency_s": 0.17, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "What did Melanie and her family do while camping?", + "gold_answer": "explored nature, roasted marshmallows, and went on a hike", + "predicted": "Caroline: Right, it was so much fun! We could do a family outting, or wanna plan something special for this summer, just us two? It'd be a great chance to catch up and explore nature! What do you think?. Takes a lot of courage and maturity! What do you think of this?. Did you reach the summit? When I was younger, my family and I went on a road trip to Oregon. John: Cool, Maria! You've built some awesome connections and you're making a big impact at the homeless shelter - nice work! How did you m", + "score": 0.5, + "latency_s": 0.168, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "What kind of counseling and mental health services is Caroline interested in pursuing?", + "gold_answer": "working with trans people, helping them accept themselves and supporting their mental health", + "predicted": "Talking to the community made me want to use my story to help others too - I'm still thinking that counseling and mental health is the way to go. Melanie: Sounds great! What kind of books you got in your library?. Caroline: That charity race sounds great, Mel! Making a difference & raising awareness for mental health is super rewarding - I'm really proud of you for taking part!. Caroline: I'm keen on counseling or working in mental health - I'd love to support those with similar issues. Caroline", + "score": 0.5, + "latency_s": 0.175, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "What workshop did Caroline attend recently?", + "gold_answer": "LGBTQ+ counseling workshop", + "predicted": "Melanie: Wow, what an experience! How did it make you feel?. Melanie: Wow, that's cool, Caroline! What happened that was so awesome? Did you hear any inspiring stories?. Melanie: That sounds awesome! What did you take away from it to use in your life?. So much since we talked! Last Fri I finally took my kids to a pottery workshop. What other creative projects do you do with them, besides pottery?", + "score": 0.0, + "latency_s": 0.15, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "What was discussed in the LGBTQ+ counseling workshop?", + "gold_answer": "therapeutic methods and how to best work with trans people", + "predicted": "Melanie: Wow, Caroline, sounds like the parade was an awesome experience! It's great to see the love and support for the LGBTQ+ community. It was awesome! I talked about my transgender journey and encouraged students to get involved in the LGBTQ community. Sounds like your event was amazing! I'm so proud of you for spreading awareness and getting others involved in the LGBTQ community. It was wonderful to see how the audience related to what I said and how it inspired them to be better allies. C", + "score": 0.5, + "latency_s": 0.165, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "What motivated Caroline to pursue counseling?", + "gold_answer": "her own journey and the support she received, and how counseling improved her life", + "predicted": "Caroline: Right, it was so much fun! We could do a family outting, or wanna plan something special for this summer, just us two? It'd be a great chance to catch up and explore nature! What do you think?. Melanie: Hey Caroline! Great to hear from you! Wow, what an amazing journey. What have you been up to lately, artistically?. What feelings were you hoping to portray?. Melanie: That's awesome, Caroline! You drew it? What does it mean to you?", + "score": 0.0, + "latency_s": 0.171, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "What kind of place does Caroline want to create for people?", + "gold_answer": "a safe and inviting place for people to grow", + "predicted": "Melanie: Sounds great! What kind of counseling and mental health services do you want to persue?. My dream is to create a safe and loving home for these kids. Caroline: Right, it was so much fun! We could do a family outting, or wanna plan something special for this summer, just us two? It'd be a great chance to catch up and explore nature! What do you think?. Melanie: Sounds great! What kind of books you got in your library?. Now I'm passionate about creating a safe, inviting place for people t", + "score": 1.0, + "latency_s": 0.211, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "Did Melanie make the black and white bowl in the photo?", + "gold_answer": "Yes", + "predicted": "Melanie: The kids loved it! They were so excited to get their hands dirty and make something with clay. Caroline: Thanks Melanie - love the blue vase in the pic! Blue's my fave, it makes me feel relaxed. Caroline: That bowl is awesome, Mel! What gave you the idea for all the colors and patterns?. Melanie: Thanks, Caroline! I'm obsessed with those, so I made something to catch the eye and make people smile. Nate: You can use stuff like dairy-free margarine or coconut oil instead of butter, and ma", + "score": 0.0, + "latency_s": 0.156, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What did Caroline realize after her charity race?", + "gold_answer": "self-care is important", + "predicted": "I just wanted to let you know I challenged myself last Friday and did a charity event. Melanie: Congrats Caroline! Good on you for going after what you really care about. Melanie: That sounds awesome! What did you take away from it to use in your life?. Caroline: Cool! What did it look like?. I ran a charity race for mental health last Saturday \u2013 it was really rewarding", + "score": 0.0, + "latency_s": 0.161, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What are Melanie's plans for the summer with respect to adoption?", + "gold_answer": "researching adoption agencies", + "predicted": "Respect for finding acceptance and helping others with what you've been through. Melanie: Caroline, awesome news that you two are getting along! What was it like for you both? Care to fill me in?. Guess what I did this week? I took the first step towards becoming a mom - I applied to adoption agencies! It's a big decision, but I think I'm ready to give all my love to a child. Caroline: That's so funny! I used to go horseback riding with my dad when I was a kid, we'd go through the fields, feelin", + "score": 0.5, + "latency_s": 0.169, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What type of individuals does the adoption agency Melanie is considering support?", + "gold_answer": "LGBTQ+ individuals", + "predicted": "Caroline: Woohoo Melanie! I passed the adoption agency interviews last Friday! I'm so excited and thankful. For me, adoption is a way of giving back and showing love and acceptance. Guess what I did this week? I took the first step towards becoming a mom - I applied to adoption agencies! It's a big decision, but I think I'm ready to give all my love to a child. Andrew: Meeting all these adorable pups has been awesome! For those considering getting a pup, the size of living space and the exercise", + "score": 0.0, + "latency_s": 0.183, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "Why did Melanie choose the adoption agency?", + "gold_answer": "because of their inclusivity and support for LGBTQ+ individuals", + "predicted": "Guess what I did this week? I took the first step towards becoming a mom - I applied to adoption agencies! It's a big decision, but I think I'm ready to give all my love to a child. Caroline: Woohoo Melanie! I passed the adoption agency interviews last Friday! I'm so excited and thankful. Caroline: Thanks so much, Melanie! It's beautiful! It really brings home how much love's in families - both blood and the ones we choose. Wishing you the best on your adoption journey!. Melanie: Congrats, Carol", + "score": 0.0, + "latency_s": 0.155, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What is Melanie excited about in her adoption process?", + "gold_answer": "creating a family for kids who need one", + "predicted": "Caroline: Woohoo Melanie! I passed the adoption agency interviews last Friday! I'm so excited and thankful. For me, adoption is a way of giving back and showing love and acceptance. Guess what I did this week? I took the first step towards becoming a mom - I applied to adoption agencies! It's a big decision, but I think I'm ready to give all my love to a child. Caroline: Thanks, Melanie! This necklace is super special to me - a gift from my grandma in my home country, Sweden. Melanie: Congrats C", + "score": 0.0, + "latency_s": 0.178, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What does Melanie's necklace symbolize?", + "gold_answer": "love, faith, and strength", + "predicted": "Caroline: Thanks, Melanie! This necklace is super special to me - a gift from my grandma in my home country, Sweden. Melanie: That's gorgeous, Caroline! It's awesome what items can mean so much to us, right? Got any other objects that you treasure, like that necklace?. Melanie: Sounds great! What kind of counseling and mental health services do you want to persue?. What motivated you to pursue counseling?. Melanie: Congrats Caroline! Good on you for going after what you really care about", + "score": 0.0, + "latency_s": 0.175, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What country is Melanie's grandma from?", + "gold_answer": "Sweden", + "predicted": "Caroline: Thanks, Melanie! This necklace is super special to me - a gift from my grandma in my home country, Sweden. John: Wow, having symbols of travel memories in our homes is so cool! Last week I retook the aptitude test with some great results, and I feel drawn to serving my country in this way. Melanie: That's gorgeous, Caroline! It's awesome what items can mean so much to us, right? Got any other objects that you treasure, like that necklace?. Melanie: Sounds great! What kind of counseling", + "score": 1.0, + "latency_s": 0.184, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What was grandma's gift to Melanie?", + "gold_answer": "necklace", + "predicted": "Caroline: Thanks, Melanie! This necklace is super special to me - a gift from my grandma in my home country, Sweden. She gave it to me when I was young, and it stands for love, faith and strength. Melanie: That's gorgeous, Caroline! It's awesome what items can mean so much to us, right? Got any other objects that you treasure, like that necklace?. Melanie: Sounds great! What kind of counseling and mental health services do you want to persue?. Last Friday, I went to an LGBTQ+ counseling workshop", + "score": 1.0, + "latency_s": 0.157, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What was grandpa's gift to Caroline?", + "gold_answer": "necklace", + "predicted": "Melanie: Caroline, awesome news that you two are getting along! What was it like for you both? Care to fill me in?. It was wonderful to see how the audience related to what I said and how it inspired them to be better allies. Caroline: Right, it was so much fun! We could do a family outting, or wanna plan something special for this summer, just us two? It'd be a great chance to catch up and explore nature! What do you think?. Caroline: Thanks, Melanie! This necklace is super special to me - a gi", + "score": 1.0, + "latency_s": 0.158, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What is Caroline's hand-painted bowl a reminder of?", + "gold_answer": "art and self-expression", + "predicted": "Caroline: That bowl is awesome, Mel! What gave you the idea for all the colors and patterns?. Caroline: Thanks, Melanie! This necklace is super special to me - a gift from my grandma in my home country, Sweden. It's like a reminder of my roots and all the love and support I get from my family. Caroline: Yep, Melanie! I've got some other stuff with sentimental value, like my hand-painted bowl. Caroline: Wow, what a great day! Glad everyone could make it", + "score": 0.5, + "latency_s": 0.179, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What did Caroline and her family do while camping?", + "gold_answer": "explored nature, roasted marshmallows, and went on a hike", + "predicted": "Caroline: Right, it was so much fun! We could do a family outting, or wanna plan something special for this summer, just us two? It'd be a great chance to catch up and explore nature! What do you think?. Takes a lot of courage and maturity! What do you think of this?. Did you reach the summit? When I was younger, my family and I went on a road trip to Oregon. Andrew: He's a German Shepherd - they're so smart and loyal! What do you think?. What made you decide to transition and join the transgend", + "score": 0.5, + "latency_s": 0.177, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What kind of counseling and mental health services is Melanie interested in pursuing?", + "gold_answer": "working with trans people, helping them accept themselves and supporting their mental health", + "predicted": "Caroline: That charity race sounds great, Mel! Making a difference & raising awareness for mental health is super rewarding - I'm really proud of you for taking part!. Melanie: Sounds great! What kind of books you got in your library?. Caroline: I'm keen on counseling or working in mental health - I'd love to support those with similar issues. Caroline: Since our last chat, I've been looking into counseling or mental health work more. For me, adoption is a way of giving back and showing love and", + "score": 0.5, + "latency_s": 0.183, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What kind of counseling workshop did Melanie attend recently?", + "gold_answer": "LGBTQ+ counseling workshop", + "predicted": "Melanie: Sounds great! What kind of books you got in your library?. Melanie: Wow, that's cool, Caroline! What happened that was so awesome? Did you hear any inspiring stories?. Melanie: Cool! What type of music do you play?. Melanie: That's awesome! What type of guitar? Been playing long?. Deborah: Did my photo remind you of something?", + "score": 0.0, + "latency_s": 0.146, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What motivated Melanie to pursue counseling?", + "gold_answer": "her own journey and the support she received, and how counseling improved her life", + "predicted": "Melanie: Caroline, awesome news that you two are getting along! What was it like for you both? Care to fill me in?. Last Friday, my partner and I talked about how our loved ones have influenced us and what their values meant to us. Their values have influenced us to pursue our goals, such as me with engineering and my partner with their creative endeavors. Melanie: Hey Caroline! Great to hear from you! Wow, what an amazing journey. Caroline: Oh man, sorry to hear that, Melanie", + "score": 0.0, + "latency_s": 0.16, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What kind of place does Melanie want to create for people?", + "gold_answer": "a safe and inviting place for people to grow", + "predicted": "Melanie: Sounds great! What kind of counseling and mental health services do you want to persue?. My dream is to create a safe and loving home for these kids. It was inspiring and emotional - so many people wanted to create loving homes for children in need. Now I'm passionate about creating a safe, inviting place for people to grow. Melanie: Sounds great! What kind of books you got in your library?", + "score": 1.0, + "latency_s": 0.191, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "Did Caroline make the black and white bowl in the photo?", + "gold_answer": "No", + "predicted": "Melanie: The kids loved it! They were so excited to get their hands dirty and make something with clay. Caroline: That bowl is awesome, Mel! What gave you the idea for all the colors and patterns?. Melanie: Thanks, Caroline! I'm obsessed with those, so I made something to catch the eye and make people smile. Nate: You can use stuff like dairy-free margarine or coconut oil instead of butter, and make sure to check the labels to ensure they're dairy-free. I took them to a snowy one last winter and", + "score": 1.0, + "latency_s": 0.181, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What are the new shoes that Caroline got used for?", + "gold_answer": "Running", + "predicted": "Melanie: That's gorgeous, Caroline! It's awesome what items can mean so much to us, right? Got any other objects that you treasure, like that necklace?. Melanie: \"Summer Sounds\"- The playing an awesome pop song that got everyone dancing and singing. Caroline: That bowl is awesome, Mel! What gave you the idea for all the colors and patterns?. Audrey: Woohoo! I got a new place with a bigger backyard for the pooches! Take a look!. Audrey: That looks fun! Where are you at? I would love to take the d", + "score": 0.0, + "latency_s": 0.146, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What is Caroline's reason for getting into running?", + "gold_answer": "To de-stress and clear her mind", + "predicted": "Melanie: Caroline, awesome news that you two are getting along! What was it like for you both? Care to fill me in?. Caroline: That bowl is awesome, Mel! What gave you the idea for all the colors and patterns?. Caroline: That charity race sounds great, Mel! Making a difference & raising awareness for mental health is super rewarding - I'm really proud of you for taking part!. Caroline: That's great, Mel! Taking time for yourself is so important. Melanie: You're doing something amazing! Creating a", + "score": 0.5, + "latency_s": 0.157, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What does Caroline say running has been great for?", + "gold_answer": "Her mental health", + "predicted": "What has it been like for you finding supportive folks?. Taking care of it has been really calming, and it's a great way to connect with nature. The dog-training course has been a big time sink but it's paid off because they're doing great. Melanie: That's awesome! What type of guitar? Been playing long?. Caroline: I started playing acoustic guitar about five years ago; it's been a great way to express myself and escape into my emotions", + "score": 0.0, + "latency_s": 0.162, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What did Melanie see at the council meeting for adoption?", + "gold_answer": "many people wanting to create loving homes for children in need", + "predicted": "Guess what I did this week? I took the first step towards becoming a mom - I applied to adoption agencies! It's a big decision, but I think I'm ready to give all my love to a child. It was wonderful to see how the audience related to what I said and how it inspired them to be better allies. Caroline: That bowl is awesome, Mel! What gave you the idea for all the colors and patterns?. Caroline: I'm always here for you, Mel! We had a blast last year at the Pride fest. Tell me, what's your vision fo", + "score": 0.0, + "latency_s": 0.162, + "retrieved": 10 + } + ] +} \ No newline at end of file diff --git a/src/zettelforge/config.py b/src/zettelforge/config.py index ca096a5..f9d029c 100644 --- a/src/zettelforge/config.py +++ b/src/zettelforge/config.py @@ -205,6 +205,10 @@ class RetrievalConfig: rerank_max_candidates: int = 8 rerank_doc_chars: int = 256 rerank_model: str = "Xenova/ms-marco-MiniLM-L-6-v2" + # Query entities mapped to more than this many notes carry no retrieval + # signal (conversational speaker names appear in every session); they + # are skipped by the graph/entity-augmentation stages. + entity_max_fanout: int = 25 # ONNX intra-op threads for the cross-encoder. Small rerank batches # thrash when onnxruntime grabs every core (measured 23.7ms -> 11.5ms # at 8 threads on a 20-core GB10). 0 = onnxruntime default. diff --git a/src/zettelforge/memory_manager.py b/src/zettelforge/memory_manager.py index 2264aa4..12fcf77 100644 --- a/src/zettelforge/memory_manager.py +++ b/src/zettelforge/memory_manager.py @@ -768,6 +768,9 @@ def _recall_inner( resolved = {} for etype, elist in query_entities.items(): resolved[etype] = [self.resolver.resolve(etype, e) for e in elist] + # High-fanout entities (speaker names in every session) flood the + # graph and entity-augmentation stages with undiscriminative notes. + resolved = self._filter_low_signal_entities(resolved) # Vector retrieval (Community + Enterprise). # Request (note, score) tuples — BlendedRetriever's _normalize_scores @@ -957,6 +960,35 @@ def _recall_inner( ) return results + def _filter_low_signal_entities( + self, resolved: dict[str, list[str]], max_fanout: int | None = None + ) -> dict[str, list[str]]: + """Drop query entities whose note fan-out exceeds max_fanout. + + An entity mapped to a large share of the corpus (a conversational + speaker name, for example) ranks nothing: traversing it floods the + blended ranking with undiscriminative notes and displaces vector + hits. IDF-style gate; threshold from retrieval.entity_max_fanout. + """ + limit = max_fanout if max_fanout is not None else get_config().retrieval.entity_max_fanout + if limit <= 0: + return resolved + filtered: dict[str, list[str]] = {} + for etype, values in resolved.items(): + kept = [] + for value in values: + if not value: + continue + # Fan-out where flooding actually happens: KG out-degree. + # (Supersession prunes the entity index but MENTIONED_IN + # edges accumulate one per note.) + node = self.store.get_kg_node(etype, value) + fanout = len(self.store.get_kg_edges_from(node["node_id"])) if node else 0 + if fanout <= limit: + kept.append(value) + filtered[etype] = kept + return filtered + def recall_entity(self, entity_type: str, entity_value: str, k: int = 5) -> list[MemoryNote]: """ Fast lookup by entity type and value. diff --git a/tests/test_graph_scoping.py b/tests/test_graph_scoping.py index ef543ce..1079f6c 100644 --- a/tests/test_graph_scoping.py +++ b/tests/test_graph_scoping.py @@ -94,3 +94,35 @@ def counting(nid): # phantom candidates from store A or the global KG. assert lookups['n'] <= 10 assert all('APT28' not in n.content.raw for n in results) + + +def test_high_fanout_entities_skip_graph_stage(tmp_path): + """Entities mapping to a large share of the corpus carry no signal + (conversational speaker names): they must not flood blended recall.""" + from zettelforge.graph_retriever import GraphRetriever, StoreGraphSource + + mm = _manager(tmp_path, 'fanout') + for i in range(12): + mm.remember( + f'Melanie: session {i} chat about topic {i} with details.', + source_type='dialogue', + source_ref=f's{i}', + domain='locomo', + ) + mm.remember( + 'Melanie: I tried the DROPBEAR exploit demo today.', + source_type='dialogue', + source_ref='s99', + domain='locomo', + ) + + filtered = mm._filter_low_signal_entities( + {'person': ['melanie'], 'tool': ['dropbear']}, max_fanout=5 + ) + assert filtered.get('person', []) == [] + assert filtered.get('tool') == ['dropbear'] + + # End to end: recall must not return only melanie-flooded results when + # the query names a discriminative entity. + results = mm.recall('What is the DROPBEAR exploit?', k=5, exclude_superseded=False) + assert any('DROPBEAR' in n.content.raw for n in results) From 0784138354503019978a2e2416b28c3b182e7617 Mon Sep 17 00:00:00 2001 From: Patrick Roland Date: Tue, 9 Jun 2026 13:48:41 -0500 Subject: [PATCH 15/24] revert(entities): free-text person extraction regressed LoCoMo, dialogue-only again Measured 11% -> 5% overall (single-hop/multi-hop unchanged at 0): persons extracted from turn bodies reshuffled supersession chains at ingest, changing which notes survive in the entity index. The fan-out gate could not recover it because the damage is write-side. Expanded stopword list and the gate itself are kept. Decision and data recorded in the test docstring; revisit via RFC-001 LLM NER. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/zettelforge/entity_indexer.py | 32 ++++--------------- tests/test_conversational_entities.py | 45 ++++++++------------------- 2 files changed, 19 insertions(+), 58 deletions(-) diff --git a/src/zettelforge/entity_indexer.py b/src/zettelforge/entity_indexer.py index c748670..883b45b 100644 --- a/src/zettelforge/entity_indexer.py +++ b/src/zettelforge/entity_indexer.py @@ -125,16 +125,14 @@ class EntityExtractor: '"activity": ["swimming"], "temporal": ["last Tuesday"]}' ) - # Regex for conversational person names from dialogue format "Name: text" + # Regex for conversational person names from dialogue format "Name: text". + # Free-text person extraction (single capitalized tokens in running text) + # was tried 2026-06-09 and REVERTED: speaker names extracted from turn + # bodies reshuffled supersession chains and dropped LoCoMo from 11% to 5% + # while single-hop/multi-hop stayed at 0. Revisit only with the LLM NER + # path (RFC-001) where extraction quality is high enough to gate on. _PERSON_PATTERN = re.compile(r"(?:^|\n)\s*([A-Z][a-z]{2,15}):", re.MULTILINE) - # Candidate person names anywhere in free text. Filtered hard below: - # sentence-initial tokens and multi-word proper noun phrases are skipped, - # so only single capitalized tokens in running text survive ("with - # Caroline", "What did Melanie paint?"). This is what lets query-side - # entity extraction fire graph traversal on conversational questions. - _FREETEXT_PERSON_PATTERN = re.compile(r"\b([A-Z][a-z]{2,15})\b") - # Common words that match the person pattern but aren't names _NAME_STOPWORDS: ClassVar[set[str]] = { "the", @@ -321,24 +319,6 @@ def extract_regex(self, text: str) -> dict[str, list[str]]: for name in person_matches: if name.lower() not in self._NAME_STOPWORDS and len(name) >= 3: persons.add(name.lower()) - - # Person names from running text: single capitalized tokens that are - # not sentence-initial and not part of a proper noun phrase. - for match in self._FREETEXT_PERSON_PATTERN.finditer(text): - word = match.group(1) - lower = word.lower() - if lower in persons or lower in self._NAME_STOPWORDS: - continue - prefix = text[: match.start()].rstrip(" \"'(") - if not prefix or prefix[-1] in ".!?:\n": - continue # sentence-initial token, not a reliable name signal - following = re.match(r"\s+[A-Z][a-z]", text[match.end() :]) - if following is not None: - continue # "Cobalt Strike", "New York": proper noun phrase - preceding = re.search(r"([A-Za-z][\w-]*)\s*$", prefix) - if preceding is not None and preceding.group(1)[0].isupper(): - continue # second word of a proper noun phrase - persons.add(lower) results["person"] = list(persons) # Locations diff --git a/tests/test_conversational_entities.py b/tests/test_conversational_entities.py index bf35f81..4c3d45f 100644 --- a/tests/test_conversational_entities.py +++ b/tests/test_conversational_entities.py @@ -226,46 +226,27 @@ def test_search_entities_finds_across_types(self): assert "organization" in results -class TestFreeTextPersonExtraction: - """Person names from free text, not just 'Name:' dialogue lines. - - Query-side extraction is what lets graph traversal fire on - conversational questions ("What did Melanie paint?"); ingest-side - extraction is what indexes friends mentioned inside turns. +class TestFreeTextPersonNotExtracted: + """Regression lock: persons come from 'Name:' dialogue lines only. + + Free-text person extraction (capitalized tokens in running text) was + measured on 2026-06-09: it reshuffled supersession chains at ingest + and dropped LoCoMo overall accuracy from 11% to 5% with no + single-hop/multi-hop gain. Reverted; revisit via the RFC-001 LLM NER + path where extraction precision is high enough to gate on. """ - def test_query_side_person_extracted(self): - ext = EntityExtractor() - result = ext.extract_regex("What did Melanie paint last May?") - assert "melanie" in result["person"] - - def test_mid_sentence_person_extracted(self): + def test_free_text_words_not_persons(self): ext = EntityExtractor() result = ext.extract_regex("I went hiking with Caroline and her dog.") - assert "caroline" in result["person"] + assert "caroline" not in result["person"] - def test_sentence_initial_words_not_persons(self): - ext = EntityExtractor() - result = ext.extract_regex("What tools does the group use? The group adapted.") - assert "what" not in result["person"] - assert "the" not in result["person"] - - def test_proper_noun_phrases_skipped(self): + def test_proper_noun_phrases_not_persons(self): ext = EntityExtractor() result = ext.extract_regex( "APT28 used Cobalt Strike against New York targets." ) - assert "cobalt" not in result["person"] - assert "strike" not in result["person"] - assert "new" not in result["person"] - assert "york" not in result["person"] - - def test_capitalized_common_terms_skipped(self): - ext = EntityExtractor() - result = ext.extract_regex("We celebrated Christmas in Toronto with Mom.") - assert "christmas" not in result["person"] - assert "mom" not in result["person"] - assert "toronto" in result["location"] + assert result["person"] == [] def test_dialogue_names_still_extracted(self): ext = EntityExtractor() @@ -275,7 +256,7 @@ def test_dialogue_names_still_extracted(self): def test_demonyms_and_vendors_not_persons(self): ext = EntityExtractor() result = ext.extract_regex( - "APT28 is a Russian threat actor abusing Microsoft services." + "Russian: a language note.\nMicrosoft: a vendor note." ) assert "russian" not in result["person"] assert "microsoft" not in result["person"] From 8fb59afc19b6e6452a921f08a6caa93bc733dcca Mon Sep 17 00:00:00 2001 From: Patrick Roland Date: Tue, 9 Jun 2026 13:51:32 -0500 Subject: [PATCH 16/24] chore(types): annotate StoreGraphSource and embedding cache helpers Co-Authored-By: Claude Opus 4.8 (1M context) --- src/zettelforge/graph_retriever.py | 9 ++++++--- src/zettelforge/vector_memory.py | 8 ++++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/zettelforge/graph_retriever.py b/src/zettelforge/graph_retriever.py index 9f2fbaa..ddb00d5 100644 --- a/src/zettelforge/graph_retriever.py +++ b/src/zettelforge/graph_retriever.py @@ -7,7 +7,10 @@ """ from dataclasses import dataclass, field -from typing import Protocol +from typing import TYPE_CHECKING, Protocol + +if TYPE_CHECKING: + from zettelforge.storage_backend import StorageBackend class GraphSource(Protocol): @@ -29,7 +32,7 @@ class StoreGraphSource: isolated store yields phantom note IDs and unbounded BFS cost. """ - def __init__(self, store) -> None: + def __init__(self, store: "StorageBackend") -> None: self._store = store def get_node(self, entity_type: str, entity_value: str) -> dict | None: @@ -82,7 +85,7 @@ def _bfs_collect( start_value: str, max_depth: int, best: dict[str, ScoredResult], - ): + ) -> None: start_node = self.kg.get_node(start_type, start_value) if not start_node: return diff --git a/src/zettelforge/vector_memory.py b/src/zettelforge/vector_memory.py index 2a76f4b..08c613f 100644 --- a/src/zettelforge/vector_memory.py +++ b/src/zettelforge/vector_memory.py @@ -21,9 +21,13 @@ import uuid from datetime import datetime from pathlib import Path +from typing import TYPE_CHECKING from zettelforge.log import get_logger +if TYPE_CHECKING: + from zettelforge.cache import SmartCache + _logger = get_logger("zettelforge.vector_memory") @@ -106,7 +110,7 @@ def preload_embedding_model() -> None: _embedding_cache_lock = threading.Lock() -def _get_embedding_cache(): +def _get_embedding_cache() -> "SmartCache": global _embedding_cache if _embedding_cache is None: with _embedding_cache_lock: @@ -135,7 +139,7 @@ def get_embedding(text: str, model: str | None = None) -> list[float]: cache = _get_embedding_cache() key_model = model or get_embedding_model() key = f"{key_model}:{hashlib.sha256(text.encode()).hexdigest()}" - cached = cache.get(key) + cached: list[float] | None = cache.get(key) if cached is not None: return cached embedding = _compute_embedding(text, model) From 8a8f5ab5d564d90f47a428d791bc5e2640250c11 Mon Sep 17 00:00:00 2001 From: Patrick Roland Date: Tue, 9 Jun 2026 13:53:11 -0500 Subject: [PATCH 17/24] test(benchmarks): add profiling and measurement harnesses from perf session profile_recall (cProfile attribution), instrument_lookups (note-lookup volume per stage), rerank_grid (policy tuning grid), mine_phase_timings (OCSF log phase aggregation). Co-Authored-By: Claude Opus 4.8 (1M context) --- benchmarks/instrument_lookups.py | 62 ++++++++++++++++++++++++++++++++ benchmarks/mine_phase_timings.py | 56 +++++++++++++++++++++++++++++ benchmarks/profile_recall.py | 61 +++++++++++++++++++++++++++++++ benchmarks/rerank_grid.py | 43 ++++++++++++++++++++++ 4 files changed, 222 insertions(+) create mode 100644 benchmarks/instrument_lookups.py create mode 100644 benchmarks/mine_phase_timings.py create mode 100644 benchmarks/profile_recall.py create mode 100644 benchmarks/rerank_grid.py diff --git a/benchmarks/instrument_lookups.py b/benchmarks/instrument_lookups.py new file mode 100644 index 0000000..7a43a3d --- /dev/null +++ b/benchmarks/instrument_lookups.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +"""Instrument note-lookup volume per recall stage. + +Counts store.get_note_by_id calls (total vs unique ids) and graph result +sizes per query to locate the redundant-lookup source the profiler exposed +(~476 lookups/query on an 8-note corpus). + +Usage: + python benchmarks/instrument_lookups.py +""" +import os +import tempfile + +os.environ.setdefault('ZETTELFORGE_ENRICHMENT_ENABLED', 'false') + +from cti_retrieval_benchmark import CTI_QUERIES, CTI_REPORTS + +from zettelforge import MemoryManager +from zettelforge.graph_retriever import GraphRetriever + + +def main() -> None: + tmpdir = tempfile.mkdtemp(prefix='instr_lookups_') + mm = MemoryManager(jsonl_path=f'{tmpdir}/notes.jsonl', lance_path=f'{tmpdir}/vectordb') + for report in CTI_REPORTS: + mm.remember(report['content'], source_type='threat_report', source_ref=report['id'], domain='cti') + + # Wrap get_note_by_id with a counter + calls = {'total': 0, 'ids': []} + orig = mm.store.get_note_by_id + + def counting(nid): + calls['total'] += 1 + calls['ids'].append(nid) + return orig(nid) + + mm.store.get_note_by_id = counting + + # Wrap graph retrieval to report result sizes + orig_retrieve = GraphRetriever.retrieve_note_ids + graph_sizes = [] + + def counting_retrieve(self, query_entities, max_depth=2): + res = orig_retrieve(self, query_entities, max_depth=max_depth) + graph_sizes.append(len(res)) + return res + + GraphRetriever.retrieve_note_ids = counting_retrieve + + print(f'{"query":<48} {"lookups":>8} {"unique":>7} {"graph_n":>8}') + for qa in CTI_QUERIES: + calls['total'] = 0 + calls['ids'] = [] + graph_sizes.clear() + mm.recall(qa['question'], k=10, exclude_superseded=False) + uniq = len(set(calls['ids'])) + gsz = graph_sizes[0] if graph_sizes else 0 + print(f'{qa["question"][:46]:<48} {calls["total"]:>8} {uniq:>7} {gsz:>8}') + + +if __name__ == '__main__': + main() diff --git a/benchmarks/mine_phase_timings.py b/benchmarks/mine_phase_timings.py new file mode 100644 index 0000000..21fb713 --- /dev/null +++ b/benchmarks/mine_phase_timings.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +"""Mine remember() phase_timings_ms distributions from a benchmark log. + +The write path logs per-phase wall times on every remember() call +(RFC-009 Phase 0.5). This aggregates p50/p95 per phase from a captured +log so ingest optimization targets the real cost center. + +Usage: + python benchmarks/mine_phase_timings.py /tmp/locomo_clean_baseline.log +""" +import json +import statistics +import sys + + +def main(path: str) -> None: + phases: dict[str, list[float]] = {} + durations: list[float] = [] + with open(path) as f: + for line in f: + # OCSF sink uses activity_name; structlog console uses operation + if '"operation": "remember"' not in line and '"activity_name": "remember"' not in line: + continue + try: + rec = json.loads(line[line.index('{'):]) + except (ValueError, json.JSONDecodeError): + continue + if rec.get('operation') != 'remember' and rec.get('activity_name') != 'remember': + continue + durations.append(float(rec.get('duration_ms', 0))) + timings = rec.get('phase_timings_ms') or rec.get('unmapped', {}).get('phase_timings_ms') or {} + for phase, ms in timings.items(): + phases.setdefault(phase, []).append(float(ms)) + + if not durations: + print('no remember() records found') + return + + durations.sort() + print(f'remember() calls: {len(durations)}') + print(f'total p50={statistics.median(durations):.1f}ms ' + f'p95={durations[int(len(durations) * 0.95)]:.1f}ms ' + f'mean={statistics.mean(durations):.1f}ms') + print(f'\n{"phase":<24} {"n":>5} {"p50ms":>8} {"p95ms":>8} {"mean":>8} {"share":>7}') + total_mean = statistics.mean(durations) + for phase, vals in sorted(phases.items(), key=lambda kv: -statistics.mean(kv[1])): + vals.sort() + mean = statistics.mean(vals) + print( + f'{phase:<24} {len(vals):>5} {statistics.median(vals):>8.1f} ' + f'{vals[int(len(vals) * 0.95)]:>8.1f} {mean:>8.1f} {mean / total_mean * 100:>6.1f}%' + ) + + +if __name__ == '__main__': + main(sys.argv[1] if len(sys.argv) > 1 else '/tmp/locomo_clean_baseline.log') diff --git a/benchmarks/profile_recall.py b/benchmarks/profile_recall.py new file mode 100644 index 0000000..1c24d5d --- /dev/null +++ b/benchmarks/profile_recall.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +"""Profile the recall hot path: where does per-query latency go? + +Ingests the CTI benchmark corpus into a temp store, then profiles +recall() over the 20 benchmark queries (3 passes) with cProfile and +per-stage wall timings. + +Usage: + python benchmarks/profile_recall.py +""" +import cProfile +import io +import pstats +import statistics +import tempfile +import time + +from cti_retrieval_benchmark import CTI_QUERIES, CTI_REPORTS + +from zettelforge import MemoryManager + + +def main() -> None: + tmpdir = tempfile.mkdtemp(prefix='profile_recall_') + mm = MemoryManager(jsonl_path=f'{tmpdir}/notes.jsonl', lance_path=f'{tmpdir}/vectordb') + + t0 = time.perf_counter() + for report in CTI_REPORTS: + mm.remember(report['content'], source_type='threat_report', source_ref=report['id'], domain='cti') + print(f'ingest: {time.perf_counter() - t0:.2f}s for {len(CTI_REPORTS)} notes') + + # Warm pass (model load, caches) + for qa in CTI_QUERIES[:3]: + mm.recall(qa['question'], k=10, exclude_superseded=False) + + # Timed passes + latencies = [] + for _ in range(3): + for qa in CTI_QUERIES: + t = time.perf_counter() + mm.recall(qa['question'], k=10, exclude_superseded=False) + latencies.append(time.perf_counter() - t) + lat_ms = sorted(x * 1000 for x in latencies) + print(f'recall over {len(latencies)} calls: p50={statistics.median(lat_ms):.1f}ms ' + f'p95={lat_ms[int(len(lat_ms) * 0.95)]:.1f}ms mean={statistics.mean(lat_ms):.1f}ms') + + # cProfile pass + profiler = cProfile.Profile() + profiler.enable() + for qa in CTI_QUERIES: + mm.recall(qa['question'], k=10, exclude_superseded=False) + profiler.disable() + + s = io.StringIO() + stats = pstats.Stats(profiler, stream=s) + stats.sort_stats('cumulative').print_stats(35) + print(s.getvalue()) + + +if __name__ == '__main__': + main() diff --git a/benchmarks/rerank_grid.py b/benchmarks/rerank_grid.py new file mode 100644 index 0000000..c01d91f --- /dev/null +++ b/benchmarks/rerank_grid.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 +"""Measure rerank policy variants on the CTI suite: accuracy vs p50 latency. + +Grid: doc_chars x max_candidates plus rerank-off. Run on an idle machine; +results pick the config.default.yaml tuned values (zero accuracy loss rule). + +Usage: + python benchmarks/rerank_grid.py +""" +import os + +os.environ.setdefault('ZETTELFORGE_ENRICHMENT_ENABLED', 'false') + +from cti_retrieval_benchmark import run_strategy + +from zettelforge.config import get_config + + +def main() -> None: + cfg = get_config().retrieval + variants = [ + ('off', False, 50, 512), + ('512c-50n (current)', True, 50, 512), + ('512c-16n', True, 16, 512), + ('384c-16n', True, 16, 384), + ('256c-16n', True, 16, 256), + ('256c-8n', True, 8, 256), + ('128c-8n', True, 8, 128), + ] + print(f'{"variant":<22} {"accuracy":>9} {"avg":>6} {"p50ms":>7} {"p95ms":>7}') + for name, enabled, max_cand, chars in variants: + cfg.rerank_enabled = enabled + cfg.rerank_max_candidates = max_cand + cfg.rerank_doc_chars = chars + r = run_strategy('full_session') + print( + f'{name:<22} {r["accuracy"]:>8}% {r["avg_score"]:>6} ' + f'{r["p50_latency_ms"]:>7.0f} {r["p95_latency_ms"]:>7.0f}' + ) + + +if __name__ == '__main__': + main() From db919bbdf022029ee01a43c2d24b9e6ebe5e2434 Mon Sep 17 00:00:00 2001 From: Patrick Roland Date: Tue, 9 Jun 2026 13:58:07 -0500 Subject: [PATCH 18/24] docs(benchmarks): record 2026-06-09 performance session results Same-machine, deterministic-config before/after: LoCoMo 7.0% -> 11.0% accuracy (+57% relative; 13.0% chunked config), p50 336 -> 170ms (-49%), p95 -50%, ingest 1.0 -> 8.0 turns/s (7.8x). CTI held 75.0% with p50 79 -> 39ms (-51%). Raw logs under benchmarks/results/session_2026-06-09/. Includes the recorded negative result (free-text person extraction) and the chunked-ingestion configuration trade-off. Co-Authored-By: Claude Opus 4.8 (1M context) --- benchmarks/BENCHMARK_REPORT.md | 73 +++ benchmarks/cti_retrieval_results.json | 42 +- benchmarks/locomo_results.json | 861 +++++++++++++++++++++++--- 3 files changed, 862 insertions(+), 114 deletions(-) diff --git a/benchmarks/BENCHMARK_REPORT.md b/benchmarks/BENCHMARK_REPORT.md index 877d726..374168c 100644 --- a/benchmarks/BENCHMARK_REPORT.md +++ b/benchmarks/BENCHMARK_REPORT.md @@ -28,6 +28,79 @@ ZettelForge was evaluated across five benchmark suites. The system runs with zer --- +## 0. Performance session 2026-06-09 (v2.8.0-dev, branch perf/cti-memory-40) + +All numbers below are same-machine (DGX Spark GB10), same-day, deterministic +config: enrichment disabled (`ZETTELFORGE_ENRICHMENT_ENABLED=false`), keyword +judge, heuristic answer extraction (no synthesis LLM installed). The clean +baseline was measured first on unmodified v2.7.0 source after repairing the +rotted harnesses (dead `disable_enrichment` kwarg, removed `remember_chunked` +API). Raw logs: `benchmarks/results/session_2026-06-09/`. + +| Metric | v2.7.0 baseline | optimized | delta | +|--------|-----------------|-----------|-------| +| LoCoMo accuracy (keyword judge) | 7.0% | 11.0% | +57% relative | +| LoCoMo p50 / p95 latency | 336ms / 387ms | 170ms / 193ms | -49% / -50% | +| LoCoMo ingest (272 sessions) | 262.5s (1.0/s) | 33.8s (8.0/s) | 7.8x | +| CTI retrieval accuracy | 75.0% | 75.0% | held | +| CTI p50 latency (idle machine) | 79ms | 39ms | -51% | +| recall p95 (profiled, 60 calls) | 258ms | 93ms | -64% | +| recall mean (profiled) | 117.6ms | 54.8ms | -53% | + +Note on LoCoMo baselines: the published 22% (v2.1.1) used a local synthesis +LLM (qwen2.5:3b) that is not installed on this host; both columns above use +the same deterministic heuristic-extraction path, so the comparison is +apples to apples. Latency includes harness overhead (keyword boost scan and +synthesis fallback), not just `recall()`. + +### What changed + +1. **Scoped knowledge graph reads.** `_recall_inner` traversed the + process-global JSONL KG (109MB on this host, mixing every store) while + writes went to the per-store SQLite KG. Isolated stores saw up to ~2000 + phantom note IDs per entity query and never saw their own graph. Recall + now reads the store's KG via `StoreGraphSource`. +2. **MemSAD gate vectorized.** The write-time anomaly gate was 93% of + remember() latency at 50 references (~1.1s/ingest): O(n^2) pure-Python + cosines plus n^2 n-gram recounts per ingest. numpy pairwise scoring, + content-hash counter cache, and a bounded reference fetch + (`get_recent_notes_by_domain`) brought warm evaluate() to ~3.4ms with + scores pinned to the original math at 1e-9 by characterization tests. +3. **Rerank policy.** Cross-encoder rerank is the dominant read cost and is + worth +15pp CTI accuracy (75% vs 60% without it). Grid-tuned bounds: + 8 candidates, 256 chars/doc (accuracy holds from 50x512 down to 8x128; + collapses below 8 candidates). `rerank_model` is configurable; the + model grid kept ms-marco-MiniLM-L-6-v2. +4. **ONNX thread pinning.** 20-core default oversubscribed small batches: + 8 threads cut rerank 23.7ms to 11.5ms and query embedding 5.9ms to 4.5ms. +5. **Embedding LRU cache** keyed by (model, sha256(text)) — first + integration of the dormant cache.py. +6. **Entity fan-out gate.** Query entities whose KG out-degree exceeds + `retrieval.entity_max_fanout` (default 25) are skipped by graph and + entity-augmentation stages (conversational speaker names map to every + session and flood blended recall). +7. **Enrichment off-switch** (`ZETTELFORGE_ENRICHMENT_ENABLED`) restoring + deterministic benchmark ingestion; `remember_chunked()` restored. + +### Chunked-ingestion configuration (recorded, not default) + +`LOCOMO_CHUNK_SIZE=800` stores each session as ~800-char chunks +(MemPalace granularity, no 4000-char truncation): 13.0% accuracy at +p50 347ms / p95 418ms on a ~1400-note store. Compared to the v2.7.0 +baseline at effectively the same latency (336ms), that is +86% +relative accuracy; compared to the default optimized config it trades +2x latency for +2pp. Default stays full-session (11.0% at 170ms). + +### Negative result (recorded) + +Free-text person extraction (capitalized tokens in running text) dropped +LoCoMo from 11% to 5% by reshuffling supersession chains at ingest, with no +single-hop or multi-hop gain. Reverted same day; regression-locked in +`tests/test_conversational_entities.py`. Conversational NER should come via +the RFC-001 LLM path, not regex. + +--- + ## 1. CTI Retrieval Benchmark (Domain Benchmark) **Date:** 2026-04-10 | **Corpus:** 8 real-world-style CTI reports | **Queries:** 20 diff --git a/benchmarks/cti_retrieval_results.json b/benchmarks/cti_retrieval_results.json index 644f010..7443cc8 100644 --- a/benchmarks/cti_retrieval_results.json +++ b/benchmarks/cti_retrieval_results.json @@ -1,78 +1,78 @@ { "meta": { - "date": "2026-04-10T08:05:55.405026", + "date": "2026-06-09T13:56:15.802128", "reports": 8, "queries": 20 }, "full_session": { "strategy": "full_session", "notes": 8, - "ingest_time_s": 69.1, + "ingest_time_s": 3.4, "accuracy": 75.0, - "avg_score": 0.875, - "p50_latency_ms": 620.0, - "p95_latency_ms": 2732.0, + "avg_score": 0.85, + "p50_latency_ms": 39.0, + "p95_latency_ms": 159.0, "by_category": { "tool-attribution": { "accuracy": 40.0, "avg_score": 0.7, - "p50_latency_ms": 1343.0 + "p50_latency_ms": 42.0 }, "cve-linkage": { "accuracy": 75.0, - "avg_score": 0.875, - "p50_latency_ms": 794.0 + "avg_score": 0.75, + "p50_latency_ms": 38.0 }, "attribution": { "accuracy": 100.0, "avg_score": 1.0, - "p50_latency_ms": 611.0 + "p50_latency_ms": 59.0 }, "temporal": { "accuracy": 66.7, "avg_score": 0.833, - "p50_latency_ms": 569.0 + "p50_latency_ms": 41.0 }, "multi-hop": { "accuracy": 100.0, "avg_score": 1.0, - "p50_latency_ms": 644.0 + "p50_latency_ms": 38.0 } } }, "chunked_800": { "strategy": "chunked_800", "notes": 8, - "ingest_time_s": 56.5, + "ingest_time_s": 0.1, "accuracy": 75.0, - "avg_score": 0.875, - "p50_latency_ms": 706.0, - "p95_latency_ms": 2729.0, + "avg_score": 0.85, + "p50_latency_ms": 52.0, + "p95_latency_ms": 59.0, "by_category": { "tool-attribution": { "accuracy": 40.0, "avg_score": 0.7, - "p50_latency_ms": 1299.0 + "p50_latency_ms": 50.0 }, "cve-linkage": { "accuracy": 75.0, - "avg_score": 0.875, - "p50_latency_ms": 795.0 + "avg_score": 0.75, + "p50_latency_ms": 52.0 }, "attribution": { "accuracy": 100.0, "avg_score": 1.0, - "p50_latency_ms": 535.0 + "p50_latency_ms": 52.0 }, "temporal": { "accuracy": 66.7, "avg_score": 0.833, - "p50_latency_ms": 772.0 + "p50_latency_ms": 54.0 }, "multi-hop": { "accuracy": 100.0, "avg_score": 1.0, - "p50_latency_ms": 741.0 + "p50_latency_ms": 33.0 } } } diff --git a/benchmarks/locomo_results.json b/benchmarks/locomo_results.json index 4c6bedb..05f7fbc 100644 --- a/benchmarks/locomo_results.json +++ b/benchmarks/locomo_results.json @@ -1,286 +1,961 @@ { "meta": { - "date": "2026-04-19T13:06:51.187286", - "version": "zettelforge-2.3.0", + "date": "2026-06-09T13:55:42.638129", + "version": "zettelforge-2.7.0", "dataset": "/home/rolandpg/.openclaw/workspace-nexus/Locomo-Plus/data/locomo10.json", - "per_category": 5, + "per_category": 20, "judge": "keyword", "k": 10 }, "ingest": { "ingested": 272, "errors": 0, - "duration_s": 25.44, - "rate_per_s": 10.7 + "duration_s": 212.93, + "rate_per_s": 1.3 }, "by_category": { "single-hop": { - "accuracy": 20.0, - "avg_score": 0.3, - "p50_latency_ms": 120439.43383899932, - "p95_latency_ms": 123075.08381000024, - "n": 5 + "accuracy": 0.0, + "avg_score": 0.075, + "p50_latency_ms": 352.9447293840349, + "p95_latency_ms": 480.3748088888824, + "n": 20 }, "multi-hop": { - "accuracy": 40.0, - "avg_score": 0.4, - "p50_latency_ms": 119847.58609299934, - "p95_latency_ms": 120781.03551000005, - "n": 5 + "accuracy": 0.0, + "avg_score": 0.1, + "p50_latency_ms": 341.7111549060792, + "p95_latency_ms": 407.1370349265635, + "n": 20 }, "temporal": { - "accuracy": 0.0, - "avg_score": 0.0, - "p50_latency_ms": 120328.95609200023, - "p95_latency_ms": 120428.4008700006, - "n": 5 + "accuracy": 5.0, + "avg_score": 0.1, + "p50_latency_ms": 349.6051391121, + "p95_latency_ms": 417.8889929316938, + "n": 20 }, "open-domain": { - "accuracy": 20.0, - "avg_score": 0.4, - "p50_latency_ms": 59822.68663400009, - "p95_latency_ms": 120416.60454800059, - "n": 5 + "accuracy": 30.0, + "avg_score": 0.35, + "p50_latency_ms": 339.8865994531661, + "p95_latency_ms": 419.203395023942, + "n": 20 }, "adversarial": { - "accuracy": 0.0, - "avg_score": 0.1, - "p50_latency_ms": 120543.52754999945, - "p95_latency_ms": 120728.02882499946, - "n": 5 + "accuracy": 30.0, + "avg_score": 0.425, + "p50_latency_ms": 351.9679356832057, + "p95_latency_ms": 417.98368003219366, + "n": 20 } }, "overall": { - "accuracy": 16.0, - "avg_score": 0.24, - "p50_latency_ms": 119847.58609299934, - "p95_latency_ms": 120781.03551000005, - "total_samples": 25 + "accuracy": 13.0, + "avg_score": 0.21, + "p50_latency_ms": 347.31850237585604, + "p95_latency_ms": 417.8889929316938, + "total_samples": 100 }, "details": [ { "category": "single-hop", "question": "What did Caroline research?", "gold_answer": "Adoption agencies", - "predicted": "Melanie: Wow, that's cool, Caroline! What happened that was so awesome? Did you hear any inspiring stories?. Guess what I did this week? I took the first step towards becoming a mom - I applied to adoption agencies! It's a big decision, but I think I'm ready to give all my love to a child. Caroline: Cool! What did it look like?. Melanie: That sounds awesome! What did you take away from it to use in your life?. Melanie: Wow, Caroline! What kinda jobs are you thinkin' of? Anything that stands out?", - "score": 1.0, - "latency_s": 123.075, + "predicted": "Melanie: That sounds awesome! What did you take away from it to use in your life?. Melanie: Yep, Caroline. Melanie: Congrats Caroline! Good on you for going after what you really care about. Melanie: That's awesome, Caroline! You drew it? What does it mean to you?. Melanie: Yep, Caroline", + "score": 0.0, + "latency_s": 0.48, "retrieved": 10 }, { "category": "single-hop", "question": "What is Caroline's identity?", "gold_answer": "Transgender woman", - "predicted": "Melanie: Wow, that's cool, Caroline! What happened that was so awesome? Did you hear any inspiring stories?. Caroline: Gonna continue my edu and check out career options, which is pretty exciting!. Melanie: Wow, Caroline! What kinda jobs are you thinkin' of? Anything that stands out?. Is this your own painting?. Relaxing and expressing ourselves is key", + "predicted": "Melanie: That's awesome, Caroline! You drew it? What does it mean to you?. Showing off our true selves is the best thing ever. Caroline: Yep, Melanie! Being ourselves is such a great feeling. Melanie: That sounds awesome! What did you take away from it to use in your life?. Caroline: That's so nice! What pet do you have?", "score": 0.0, - "latency_s": 120.439, + "latency_s": 0.332, "retrieved": 10 }, { "category": "single-hop", "question": "What is Caroline's relationship status?", "gold_answer": "Single", - "predicted": "Not mentioned in the context.", + "predicted": "Melanie: Glad you have support, Caroline! Unconditional love is so important. Melanie: That's awesome, Caroline! You drew it? What does it mean to you?. Showing off our true selves is the best thing ever. Caroline: Yep, Melanie! Being ourselves is such a great feeling. Caroline: Glad you found something that makes you so happy! Surrounding ourselves with things that bring us joy is important", "score": 0.0, - "latency_s": 90.631, + "latency_s": 0.335, "retrieved": 10 }, { "category": "single-hop", "question": "Where did Caroline move from 4 years ago?", "gold_answer": "Sweden", - "predicted": "They've been there through everything, I've known these friends for 4 years, since I moved from my home country. Melanie: That sounds awesome! What did you take away from it to use in your life?. Melanie: Wow, that's gorgeous! Where did you find it?. It made me reflect on how far I've come since I started transitioning three years ago. Melanie: Hey Caroline! Great to hear from you", + "predicted": "A friend made it for my 18th birthday ten years ago. Melanie: I'm getting there, Caroline. Melanie: 5 years already! Time flies- feels like just yesterday I put this dress on! Thanks, Caroline!. Caroline: Congrats, Melanie! You both looked so great on your wedding day! Wishing you many happy years together!. Looking forward to more happy years", "score": 0.0, - "latency_s": 120.777, + "latency_s": 0.417, "retrieved": 10 }, { "category": "single-hop", "question": "What career path has Caroline decided to persue?", "gold_answer": "counseling or mental health for Transgender people", - "predicted": "Counseling or mental health work.", + "predicted": "What motivated you to pursue counseling?. What do you hope to do next time?. I'm looking forward to seeing how much fun everyone has and how proud they'll feel of their talents!. Melanie: Wow, Caroline! What kinda jobs are you thinkin' of? Anything that stands out?. Caroline: I'm keen on counseling or working in mental health - I'd love to support those with similar issues", "score": 0.5, - "latency_s": 29.788, + "latency_s": 0.359, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "What activities does Melanie partake in?", + "gold_answer": "pottery, camping, painting, swimming", + "predicted": "Melanie: That's awesome, Caroline! You drew it? What does it mean to you?. What do you hope to do next time?. Any specific projects or activities you're looking forward to there?. Melanie: Wow, Caroline! What kinda jobs are you thinkin' of? Anything that stands out?. Caroline: Wow, what a fun moment! What's the band?", + "score": 0.0, + "latency_s": 0.364, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "Where has Melanie camped?", + "gold_answer": "beach, mountains, forest", + "predicted": "Melanie: It was one of those moments where I felt tiny and in awe of the universe", + "score": 0.0, + "latency_s": 0.404, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "What do Melanie's kids like?", + "gold_answer": "dinosaurs, nature", + "predicted": "I'll do my best to make sure these kids have a safe and loving home. What other creative projects do you do with them, besides pottery?. Giving a home to needy kids is such a loving way to build a family. Those kids will be so supported and happy in their new home. My dream is to create a safe and loving home for these kids", + "score": 0.0, + "latency_s": 0.357, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "What books has Melanie read?", + "gold_answer": "\"Nothing is Impossible\", \"Charlotte's Web\"", + "predicted": "Melanie: Wow, Caroline! Books have such an awesome power! Which one has been your favorite guide?. Melanie: Sounds great! What kind of books you got in your library?. Joanna: Wow, that's great to hear! What books do you enjoy? I'm always up for some new book recommendations. Melanie: That sounds awesome! What did you take away from it to use in your life?. Caroline: That's so nice! What pet do you have?", + "score": 0.0, + "latency_s": 0.33, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "What does Melanie do to destress?", + "gold_answer": "Running, pottery", + "predicted": "Melanie: That's awesome, Caroline! You drew it? What does it mean to you?. Respect for finding acceptance and helping others with what you've been through. Caroline: Thanks, Melanie. Transitioning wasn't easy and acceptance wasn't either, but the help I got from friends, family and people I looked up to was invaluable. That's why I want to pass that same support to anyone who needs it", + "score": 0.0, + "latency_s": 0.388, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "What LGBTQ+ events has Caroline participated in?", + "gold_answer": "Pride parade, school speech, support group", + "predicted": "Melanie: Caroline, awesome news that you two are getting along! What was it like for you both? Care to fill me in?. Last week I went to an LGBTQ+ pride parade. It showed me how much our community has grown, it was amazing!. Melanie: Wow, Caroline, sounds like the parade was an awesome experience! It's great to see the love and support for the LGBTQ+ community. Congrats! Has this experience influenced your goals at all?", + "score": 0.5, + "latency_s": 0.331, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "What events has Caroline participated in to help children?", + "gold_answer": "Mentoring program, school speech", + "predicted": "What do you hope to do next time?. I'm looking forward to seeing how much fun everyone has and how proud they'll feel of their talents!. Now I want to help people go through it too. Your passion and hard work to help others is awesome. Your drive to help is awesome! What's your plan to pitch in?", + "score": 0.0, + "latency_s": 0.343, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "What did Melanie paint recently?", + "gold_answer": "sunset", + "predicted": "What made you paint it? I've been trying out abstract stuff recently. Caroline: Oh man, sorry to hear that, Melanie. Melanie: Yeah, Here's one I did last week. What have you been up to lately, artistically?. Melanie: Wow, did you make that? It looks so real!", + "score": 0.0, + "latency_s": 0.364, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "What activities has Melanie done with her family?", + "gold_answer": "Pottery, painting, camping, museum, swimming, hiking", + "predicted": "What else do you guys like doing together?. It's a cool way to connect with nature and each other. What motivates you?. Melanie: Absolutely, Caroline! I cherish time with family. Hanging with loved ones is amazing and brings so much happiness", + "score": 0.0, + "latency_s": 0.337, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "In what ways is Caroline participating in the LGBTQ community?", + "gold_answer": "Joining activist group, going to pride parades, participating in an art show, mentoring program", + "predicted": "Our group, 'Connected LGBTQ Activists', is made of all kinds of people investing in positive changes. It was awesome! I talked about my transgender journey and encouraged students to get involved in the LGBTQ community. Sounds like your event was amazing! I'm so proud of you for spreading awareness and getting others involved in the LGBTQ community. Melanie: Wow, Caroline, that's awesome! Can't wait to see your show - the LGBTQ community needs more platforms like this!. Talking to the community ", + "score": 0.0, + "latency_s": 0.409, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "How many times has Melanie gone to the beach in 2023?", + "gold_answer": "2", + "predicted": "Audrey: Hey Andrew! It's been a wild ride! I did something fun with my pups over the weekend, took them to the beach and it was so fun to see them playing in the ocean. Andrew: Haven't been to the beach in a while. Oh yeah, we went to the beach recently. Andrew: Sounds great! Did they love being at the beach? Did they enjoy the water? Here's a pic of my last trip to the beach. It's hard to find open spaces in the city", + "score": 0.0, + "latency_s": 0.339, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "What kind of art does Caroline make?", + "gold_answer": "abstract art", + "predicted": "What about you, Mel? What type of art do you love?. Seeing how art can be a source of self-expression and growth is truly inspiring. Caroline: Thanks, Mel! Art gives me so much joy. It helps me show my feelings and freeze gorgeous moments, like a bouquet of flowers. Caroline: Yeah, definitely! Drawing flowers is one of my faves", + "score": 0.5, + "latency_s": 0.349, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "Who supports Caroline when she has a negative experience?", + "gold_answer": "Her mentors, family, and friends", + "predicted": "Caroline: Yeah, it's true! Having people who back you makes such a huge difference. Caroline: I'm so lucky to have such a great support system around me. Their love and encouragement has really helped me accept and grow into my true self. Here's a pic of my family camping at the beach. It made me realize how important it is for others to have a support system", + "score": 0.0, + "latency_s": 0.37, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "What types of pottery have Melanie and her kids made?", + "gold_answer": "bowls, cup", + "predicted": "Melanie: I'm a big fan of pottery - the creativity and skill is awesome. Takes a lot of courage and maturity! What do you think of this?. So much since we talked! Last Fri I finally took my kids to a pottery workshop. Caroline: Wow, Mel! Sounds like you and the kids had a blast. Melanie: The kids loved it! They were so excited to get their hands dirty and make something with clay", + "score": 0.0, + "latency_s": 0.342, + "retrieved": 10 + }, + { + "category": "single-hop", + "question": "What has Melanie painted?", + "gold_answer": "Horse, sunset, sunrise", + "predicted": "What else helps you out?. Melanie: Thanks, Caroline! It has really helped me out. What gave you the idea?. What about you, Mel? What type of art do you love?. Caroline: That bowl is awesome, Mel! What gave you the idea for all the colors and patterns?", + "score": 0.0, + "latency_s": 0.319, "retrieved": 10 }, { "category": "multi-hop", "question": "When did Caroline go to the LGBTQ support group?", "gold_answer": "7 May 2023", - "predicted": "7 May 2023", - "score": 1.0, - "latency_s": 21.711, + "predicted": "Caroline: I went to a LGBTQ support group yesterday and it was so powerful. Caroline: The support group has made me feel accepted and given me courage to embrace myself. Melanie: Wow, Caroline, sounds like the parade was an awesome experience! It's great to see the love and support for the LGBTQ+ community. Talking to the community made me want to use my story to help others too - I'm still thinking that counseling and mental health is the way to go. It was awesome! I talked about my transgender", + "score": 0.0, + "latency_s": 0.387, "retrieved": 10 }, { "category": "multi-hop", "question": "When did Melanie paint a sunrise?", "gold_answer": "2022", - "predicted": "The context does not mention when Melanie painted a sunrise.", + "predicted": "Melanie: Painting landscapes and still life is my favorite! Nature's amazing, here's a painting I did recently. It means a lot to me that you can feel the sense of peace and serenity. Last Friday I went to a council meeting for adoption. A friend made it for my 18th birthday ten years ago. Actually, I just took my fam camping in the mountains last week - it was a really nice time together!", "score": 0.0, - "latency_s": 117.755, + "latency_s": 0.336, "retrieved": 10 }, { "category": "multi-hop", "question": "When did Melanie run a charity race?", "gold_answer": "The sunday before 25 May 2023", - "predicted": "I just wanted to let you know I challenged myself last Friday and did a charity event. I ran a charity race for mental health last Saturday \u2013 it was really rewarding. Caroline: That charity race sounds great, Mel! Making a difference & raising awareness for mental health is super rewarding - I'm really proud of you for taking part!. It's a journey for me, but when I look after myself, I'm able to better look after my family. Here's a pic from when we met up last week!", + "predicted": "John: I set up a 5K charity run in our neighborhood. I participated in a 5K charity run for a homeless shelter. I just wanted to let you know I challenged myself last Friday and did a charity event. I ran a charity race for mental health last Saturday \u2013 it was really rewarding. Caroline: That charity race sounds great, Mel! Making a difference & raising awareness for mental health is super rewarding - I'm really proud of you for taking part!", "score": 0.0, - "latency_s": 120.781, + "latency_s": 0.346, "retrieved": 10 }, { "category": "multi-hop", "question": "When is Melanie planning on going camping?", "gold_answer": "June 2023", - "predicted": "Caroline: The mentoring is going great! I've met some amazing young folks and supported them along the way. Reaching the top of a challenging trail is amazing too - it feels like all worries just vanish when you get to the top. Lots has been going on since then!. [2:31 pm on 17 July, 2023] Conversation session 9:. Melanie: Wow, Caroline, that sounds super rewarding! Young people's resilience is amazing", + "predicted": "[6:55 pm on 20 October, 2023] Conversation session 18:. Melanie: Thanks, Caroline! This is a great time. [10:37 am on 27 June, 2023] Conversation session 4:. Melanie: It was an awesome time, Caroline! We explored nature, roasted marshmallows around the campfire and even went on a hike. [1:36 pm on 3 July, 2023] Conversation session 5:", "score": 0.0, - "latency_s": 120.761, + "latency_s": 0.388, "retrieved": 10 }, { "category": "multi-hop", "question": "When did Caroline give a speech at a school?", "gold_answer": "The week before 9 June 2023", - "predicted": "the week before 9 June, 2023", - "score": 1.0, - "latency_s": 119.848, + "predicted": "They're a huge part of my journey, and this one's reminding me to keep going and never give up!. You've come a long way since your transition - keep on inspiring people with your strength and courage!. I'm passionate about helping people and making a positive impact. Caroline: Melanie, that's a great pic! That must have been awesome. Melanie: That must have been tough for you, Caroline", + "score": 0.0, + "latency_s": 0.35, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "When did Caroline meet up with her friends, family, and mentors?", + "gold_answer": "The week before 9 June 2023", + "predicted": "Transitioning wasn't easy and acceptance wasn't either, but the help I got from friends, family and people I looked up to was invaluable. They'll help with the process and provide all the info. Yesterday I took the kids to the museum - it was so cool spending time with them and seeing their eyes light up!. This journey has been amazing and I'm grateful I get to share it and help others with theirs. Caroline: I struggled with mental health, and support I got was really helpful", + "score": 0.0, + "latency_s": 0.335, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "How long has Caroline had her current group of friends for?", + "gold_answer": "4 years", + "predicted": "Caroline: Thanks, Melanie! Been a long road, but I'm proud of how far I've come. It's made me appreciate how lucky I am to have my friends and family helping with my transition. How have your friends and fam been helping you out with your transition?. Melanie: Yeah, Caroline, they're some of my fave memories. Caroline: Wow, that's great! It sure shows how important friendship and compassion are", + "score": 0.0, + "latency_s": 0.367, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "How long ago was Caroline's 18th birthday?", + "gold_answer": "10 years ago", + "predicted": "A friend made it for my 18th birthday ten years ago. How was it? Anything fun?. Caroline: Wow, what an amazing family pic! How long have you been married?. Caroline: Looks like you had a great day! How was it? You all look so happy!. Caroline: Thanks, Melanie! Been a long road, but I'm proud of how far I've come", + "score": 0.5, + "latency_s": 0.334, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "When did Melanie sign up for a pottery class?", + "gold_answer": "2 July 2023", + "predicted": "Melanie: Wow, Caroline! That's great! I just signed up for a pottery class yesterday. Melanie: I'm a big fan of pottery - the creativity and skill is awesome. Pottery is a huge part of my life, not just a hobby - it helps me express my emotions. So much since we talked! Last Fri I finally took my kids to a pottery workshop. A friend made it for my 18th birthday ten years ago", + "score": 0.0, + "latency_s": 0.33, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "When is Caroline going to the transgender conference?", + "gold_answer": "July 2023", + "predicted": "It was awesome! I talked about my transgender journey and encouraged students to get involved in the LGBTQ community. Talking about inclusivity and acceptance is crucial, and you're so brave to speak up for the trans community. What made you decide to transition and join the transgender community?. I got the chance to meet and connect with people who've gone through similar journeys. I'm really thankful for this amazing community - it's shown me how important it is to fight for trans rights and ", + "score": 0.0, + "latency_s": 0.403, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "When did Melanie go to the museum?", + "gold_answer": "5 July 2023", + "predicted": "Melanie: Hey, Caroline! Nice to hear from you! Love the necklace, any special meaning to it?. She gave it to me when I was young, and it stands for love, faith and strength. Yesterday I took the kids to the museum - it was so cool spending time with them and seeing their eyes light up!. What made you decide to transition and join the transgender community?. Melanie: They were stoked for the dinosaur exhibit! They love learning about animals and the bones were so cool", + "score": 0.0, + "latency_s": 0.334, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "When did Caroline have a picnic?", + "gold_answer": "The week before 6 July 2023", + "predicted": "It was nice seeing them have a good time outdoors. Melanie: It was an awesome time, Caroline! We explored nature, roasted marshmallows around the campfire and even went on a hike. It was so special having these moments together as a family - I'll never forget it!. Glad y'all had such a great time. Caroline: Lately, I've been looking into counseling and mental health as a career", + "score": 0.0, + "latency_s": 0.338, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "When did Caroline go to the LGBTQ conference?", + "gold_answer": "10 July 2023", + "predicted": "Talking to the community made me want to use my story to help others too - I'm still thinking that counseling and mental health is the way to go. It was awesome! I talked about my transgender journey and encouraged students to get involved in the LGBTQ community. Melanie: Wow, Caroline, that's awesome! Can't wait to see your show - the LGBTQ community needs more platforms like this!. Caroline: Hey Mel, great to chat with you again! So much has happened since we last spoke - I went to an LGBTQ co", + "score": 0.0, + "latency_s": 0.383, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "When did Melanie read the book \"nothing is impossible\"?", + "gold_answer": "2022", + "predicted": "Tim: It's a book by Patrick Rothfuss and it's awesome! The way the author builds the world and characters is amazing. Tim: Wow, that book is great! I read it a while back and it really changed my perspective on my goals. Showing off our true selves is the best thing ever. Tim: Yeah, John! I recently read a book that really made a big impact on me. John: Yeah, that book is really something", + "score": 0.0, + "latency_s": 0.328, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "When did Caroline go to the adoption meeting?", + "gold_answer": "The friday before 15 July 2023", + "predicted": "Guess what I did this week? I took the first step towards becoming a mom - I applied to adoption agencies! It's a big decision, but I think I'm ready to give all my love to a child. Caroline: Woohoo Melanie! I passed the adoption agency interviews last Friday! I'm so excited and thankful. I got the chance to meet and connect with people who've gone through similar journeys. Don't forget to prepare emotionally, since the wait can be hard. I want to live authentically and help others to do the sam", + "score": 0.0, + "latency_s": 0.356, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "When did Melanie go to the pottery workshop?", + "gold_answer": "The Friday before 15 July 2023", + "predicted": "So much since we talked! Last Fri I finally took my kids to a pottery workshop. Melanie: The kids loved it! They were so excited to get their hands dirty and make something with clay. Melanie: I'm a big fan of pottery - the creativity and skill is awesome. Melanie: Thanks, Caroline! I'm excited to see where pottery takes me. I'm so excited to meet other people in the community and learn more about advocacy", + "score": 0.0, + "latency_s": 0.383, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "When did Melanie go camping in June?", + "gold_answer": "The week before 27 June 2023", + "predicted": "Melanie: I'll always remember our camping trip last year when we saw the Perseid meteor shower. Actually, I just took my fam camping in the mountains last week - it was a really nice time together!. It's awesome! We even went on another camping trip in the forest. Caroline: Wow, Mel, that's awesome! What's your best camping memory?. Caroline: Cool! What did it look like?", + "score": 0.5, + "latency_s": 0.338, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "When did Caroline go to a pride parade during the summer?", + "gold_answer": "The week before 3 July 2023", + "predicted": "I went to a pride parade last Friday and it was awesome - so much energy and love everywhere. One special memory for me was this pride parade I went to a few weeks ago. Last week I went to an LGBTQ+ pride parade. Melanie: Wow, Caroline, sounds like the parade was an awesome experience! It's great to see the love and support for the LGBTQ+ community. Talking to the community made me want to use my story to help others too - I'm still thinking that counseling and mental health is the way to go", + "score": 0.5, + "latency_s": 0.329, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "When did Melanie go camping in July?", + "gold_answer": "two weekends before 17 July 2023", + "predicted": "Melanie: I'll always remember our camping trip last year when we saw the Perseid meteor shower. It's awesome! We even went on another camping trip in the forest. Actually, I just took my fam camping in the mountains last week - it was a really nice time together!. Caroline: Wow, Mel, that's awesome! What's your best camping memory?. Caroline: Cool! What did it look like?", + "score": 0.0, + "latency_s": 0.407, + "retrieved": 10 + }, + { + "category": "multi-hop", + "question": "When did Caroline join a mentorship program?", + "gold_answer": "The weekend before 17 July 2023", + "predicted": "Caroline: Hey Melanie! That sounds great! Last weekend I joined a mentorship program for LGBTQ youth - it's really rewarding to help the community. Melanie: That must have been tough for you, Caroline. Melanie: I'm so happy for you, Caroline. It made me realize how important it is for others to have a support system. You're gonna make a huge difference! This book I read last year reminds me to always pursue my dreams, just like you are doing!\ud83c\udf1f", + "score": 0.5, + "latency_s": 0.327, "retrieved": 10 }, { "category": "temporal", "question": "What fields would Caroline be likely to pursue in her educaton?", "gold_answer": "Psychology, counseling certification", - "predicted": "It was wonderful to see how the audience related to what I said and how it inspired them to be better allies. It made me remember how essential it is to be kind and show support. What do you hope to do next time?. Melanie: That's so cool, Caroline! That's a great way to show off and be proud of everyone's skills. It was awesome! I talked about my transgender journey and encouraged students to get involved in the LGBTQ community", - "score": 0.0, - "latency_s": 120.41, + "predicted": "What motivated you to pursue counseling?. Caroline: I'm keen on counseling or working in mental health - I'd love to support those with similar issues. What do you hope to do next time?. Melanie: Caroline, so glad you got the support! Your experience really brought you to where you need to be. You're gonna make a huge difference! This book I read last year reminds me to always pursue my dreams, just like you are doing!\ud83c\udf1f", + "score": 0.5, + "latency_s": 0.399, "retrieved": 10 }, { "category": "temporal", "question": "Would Caroline still want to pursue counseling as a career if she hadn't received support growing up?", "gold_answer": "Likely no", - "predicted": "Talking to the community made me want to use my story to help others too - I'm still thinking that counseling and mental health is the way to go. Caroline: Lately, I've been looking into counseling and mental health as a career. Caroline: I'm so lucky to have such a great support system around me. FYI, I finished another pottery project - want to see a pic?. Caroline: I went to a LGBTQ support group yesterday and it was so powerful", + "predicted": "It made me realize how important it is for others to have a support system. You're gonna make a huge difference! This book I read last year reminds me to always pursue my dreams, just like you are doing!\ud83c\udf1f. Caroline: I'm so lucky to have such a great support system around me. That's why I want to pass that same support to anyone who needs it. Finding self-acceptance was a long process, but now I'm ready to offer love and support to those who need it", "score": 0.0, - "latency_s": 120.428, + "latency_s": 0.338, "retrieved": 10 }, { "category": "temporal", "question": "Would Caroline likely have Dr. Seuss books on her bookshelf?", "gold_answer": "Yes, since she collects classic children's books", - "predicted": "Context does not mention Dr. Seuss books.", + "predicted": "Melanie: Wow, Caroline! Books have such an awesome power! Which one has been your favorite guide?. Do you have any books that really moved you?. [4:33 pm on 12 July, 2023] Conversation session 7:. It's a real inspiring true story about a trans girl and her family. [1:24 pm on 17 September, 2023] Conversation session 29:", "score": 0.0, - "latency_s": 86.515, + "latency_s": 0.306, "retrieved": 10 }, { "category": "temporal", "question": "Would Caroline pursue writing as a career option?", "gold_answer": "LIkely no; though she likes reading, she wants to be a counselor", - "predicted": "Melanie: I loved reading \"Charlotte's Web\" as a kid. Melanie: Oliver's hilarious! He hid his bone in my slipper once! Cute, right? Almost as silly as when I got to feed a horse a carrot. Melanie: I'm in awe of your courage as a trans person. I made it to show my own journey as a transgender woman and how we should accept growth and change. It'll be tough as a single parent, but I'm up for the challenge!", + "predicted": "You're gonna make a huge difference! This book I read last year reminds me to always pursue my dreams, just like you are doing!\ud83c\udf1f. Joanna: Best of luck in the tournament! It sounds like it would be difficult to go through so many days of intense gaming! This is my go-to place for writing inspiration. Writing has become like an escape and a way to express my feelings. Joanna: I am writing another movie script! It's a love story with lots of challenges. Melanie: You'd be a great counselor! Your emp", "score": 0.0, - "latency_s": 120.329, + "latency_s": 0.369, "retrieved": 10 }, { "category": "temporal", "question": "Would Melanie be considered a member of the LGBTQ community?", "gold_answer": "Likely no, she does not refer to herself as part of it", - "predicted": "Not mentioned in the context.", + "predicted": "Caroline: Yeah Mel, stoked! Gonna be a great night featuring LGBTQ artists and their awesome talents. Art gives me a sense of freedom, but so does having supportive people around, promoting LGBTQ rights and being true to myself. Sounds like your event was amazing! I'm so proud of you for spreading awareness and getting others involved in the LGBTQ community. It's a symbol of togetherness, to celebrate differences and be that much closer. Our group, 'Connected LGBTQ Activists', is made of all kin", + "score": 0.0, + "latency_s": 0.348, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "Would Melanie be more interested in going to a national park or a theme park?", + "gold_answer": "National park; she likes the outdoors", + "predicted": "Caroline: Right, it was so much fun! We could do a family outting, or wanna plan something special for this summer, just us two? It'd be a great chance to catch up and explore nature! What do you think?. Deborah: I love going to this park near my house - it has a nice forest trail and a beach. Andrew: I'm looking for a place near a park or woods, so I can stay close to nature and give the dog a large open space to run around. Last Friday we took a road trip - we went to a beautiful national park", + "score": 0.5, + "latency_s": 0.357, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "Would Melanie be considered an ally to the transgender community?", + "gold_answer": "Yes, she is supportive", + "predicted": "What made you decide to transition and join the transgender community?. It was awesome! I talked about my transgender journey and encouraged students to get involved in the LGBTQ community. It was wonderful to see how the audience related to what I said and how it inspired them to be better allies. I want to live authentically and help others to do the same. Melanie: Wow, Caroline, that's awesome! Can't wait to see your show - the LGBTQ community needs more platforms like this!", + "score": 0.0, + "latency_s": 0.396, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "What would Caroline's political leaning likely be?", + "gold_answer": "Liberal", + "predicted": "Melanie: That's awesome, Caroline! You drew it? What does it mean to you?. Melanie: Congrats Caroline! Good on you for going after what you really care about. Melanie: Wow, that agency looks great! What made you pick it?. What else do you guys like doing together?", + "score": 0.0, + "latency_s": 0.351, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "Would Caroline be considered religious?", + "gold_answer": "Somewhat, but not extremely religious", + "predicted": "Melanie: Bye Caroline. Those kids will be so supported and happy in their new home. Love and acceptance should be everyone's right, and I want them to experience it. Melanie: I totally agree, Caroline. Melanie: Yep, Caroline", + "score": 0.0, + "latency_s": 0.313, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "Would Melanie likely enjoy the song \"The Four Seasons\" by Vivaldi?", + "gold_answer": "Yes; it's classical music", + "predicted": "Melanie: \"Summer Sounds\"- The playing an awesome pop song that got everyone dancing and singing. By the way, take a look at this. Melanie: Hey Caroline! Last night was amazing! We celebrated my daughter's birthday with a concert surrounded by music, joy and the warm summer breeze. Caroline: Wow, what a fun moment! What's the band?. The pattern and colors are awesome-- it reminds me of art and self-expression", + "score": 0.0, + "latency_s": 0.418, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "What personality traits might Melanie say Caroline has?", + "gold_answer": "Thoughtful, authentic, driven", + "predicted": "Melanie: That must have been tough for you, Caroline. Respect for finding acceptance and helping others with what you've been through. Caroline: Thanks, Melanie. Melanie: I'm so happy for you, Caroline. Melanie: That's awesome, Caroline! You drew it? What does it mean to you?", + "score": 0.0, + "latency_s": 0.305, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "Would Melanie go on another roadtrip soon?", + "gold_answer": "Likely no; since this one went badly", + "predicted": "Andrew: Wow, that's awesome! I really wish I could go on a road trip with a furry companion. Andrew: Adding that to my bucket list! Can't wait for the day I actually go on a trip with my dog!. It's awesome! We even went on another camping trip in the forest. [6:55 pm on 20 October, 2023] Conversation session 18:. Melanie: Hey Caroline, that roadtrip this past weekend was insane! We were all freaked when my son got into an accident", "score": 0.0, - "latency_s": 84.744, + "latency_s": 0.335, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "Would Caroline want to move back to her home country soon?", + "gold_answer": "No; she's in the process of adopting children.", + "predicted": "Caroline: Thanks, Mel! My goal is to give kids a loving home. Now I want to help people go through it too. It's so freeing to express myself authentically and have people back me up. I want to live authentically and help others to do the same. Caroline: Researching adoption agencies \u2014 it's been a dream to have a family and give a loving home to kids who need it", + "score": 0.0, + "latency_s": 0.373, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "What might John's financial status be?", + "gold_answer": "Middle-class or wealthy", + "predicted": "James: Wow John, that's awesome! What motivated you to create such an amazing system for them?. James: What challenges have you encountered?. Jon: Thanks, Gina! Seeing my goals written down on paper really helps keep me motivated and focused on what I have to do", + "score": 0.0, + "latency_s": 0.336, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "Would John be considered a patriotic person?", + "gold_answer": "Yes", + "predicted": "James: It would be cool to go somewhere together next year, don't you think?. John: Yeah, Maria, that's a good idea. I'll make a list of the ones that suit what I believe in and reach out for more info. Maria: No worries, John. Maria: Thanks, John", + "score": 0.0, + "latency_s": 0.346, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "What might John's degree be in?", + "gold_answer": "Political science, Public administration, Public affairs", + "predicted": "Your skills and passions will be a great addition. James: Freelancing can definitely be a great way to sharpen skills and gain experience. What projects are you currently working on?. James: Congrats on your first professional project, John! Bet it's been great applying what you learned in class. John: I haven\u2019t found it yet, but to be honest I haven\u2019t looked for it", + "score": 0.0, + "latency_s": 0.401, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "Around which US holiday did Maria get into a car accident?", + "gold_answer": "Independence Day", + "predicted": "I'm thinking about starting a community project regarding infrastructure, so maybe we can work together to get the neighborhood's backing. Maria: Sounds like a plan, John! Let's work together to get the community involved and make a difference. Had a wild week, my car broke down last Fri on my way to work. Trying to get it fixed but it's tough & putting a strain on my wallet. Yeah, it's been tough with car trouble and money problems, but I stay positive and find a way", + "score": 0.0, + "latency_s": 0.351, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "Does John live close to a beach or the mountains?", + "gold_answer": "beach", + "predicted": "John: Wow, nature can be so beautiful! It reminds me of the film camera I had as a kid, I took plenty of beach pics. John: Sounds great, James! Taking breaks and switching up the scenery is a great way to feel recharged. John: Agreed! It is a great way to escape the everyday. Tim: It really does have a way of calming us and reminding us of the beauty around. John: We had a blast camping and disconnecting from the everyday", + "score": 1.0, + "latency_s": 0.335, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "Would John be open to moving to another country?", + "gold_answer": "No, he has goals specifically in the U.S. like joining the military and running for office.", + "predicted": "James: It would be cool to go somewhere together next year, don't you think?. Tim: Edinburgh, Scotland would be great for a magical vibe. James: Taking risks pays off! Way to be brave. John: I haven\u2019t found it yet, but to be honest I haven\u2019t looked for it. I think it won\u2019t be difficult for me to find the organization I need", + "score": 0.0, + "latency_s": 0.41, + "retrieved": 10 + }, + { + "category": "temporal", + "question": "What attributes describe John?", + "gold_answer": "Selfless, family-oriented, passionate, rational", + "predicted": "Tim: That's awesome! What keeps you motivated during challenging times?. Tim: Nice one! What do you reckon makes them such a good support?. Tim: That's key, having a strong support network can really help with what we're trying to do. Tim: Hey John! Long time no see! Can't wait to catch up and hear all about what you've been up to. You get to show others what you know - that's awesome! Is it difficult? Any hiccups?", + "score": 0.0, + "latency_s": 0.344, "retrieved": 10 }, { "category": "open-domain", "question": "What did the charity race raise awareness for?", "gold_answer": "mental health", - "predicted": "Mental health.", - "score": 1.0, - "latency_s": 11.872, + "predicted": "We were able to raise some funds! Here's a pic from the day. I just wanted to let you know I challenged myself last Friday and did a charity event. Where did you send the collected money?. John: Our main goal was to raise money for a dog shelter, which is not far from the street where I live. What does the sign say?", + "score": 0.0, + "latency_s": 0.317, "retrieved": 10 }, { "category": "open-domain", "question": "What did Melanie realize after the charity race?", "gold_answer": "self-care is important", - "predicted": "Self-care is really important.", - "score": 0.5, - "latency_s": 21.099, + "predicted": "Melanie: Congrats Caroline! Good on you for going after what you really care about. I ran a charity race for mental health last Saturday \u2013 it was really rewarding. Caroline: That charity race sounds great, Mel! Making a difference & raising awareness for mental health is super rewarding - I'm really proud of you for taking part!. Melanie: Thanks, Caroline! The event was really thought-provoking. I'm starting to realize that self-care is really important", + "score": 1.0, + "latency_s": 0.368, "retrieved": 10 }, { "category": "open-domain", "question": "How does Melanie prioritize self-care?", "gold_answer": "by carving out some me-time each day for activities like running, reading, or playing the violin", - "predicted": "By carving out me-time each day - running, reading, or playing violin.", - "score": 0.5, - "latency_s": 59.823, + "predicted": "Caroline: I totally agree, Melanie. Caroline: Thanks, Melanie. I saw how counseling and support groups improved my life, so I started caring more about mental health and understanding myself. Jolene: Not only does it help me stay on top of everything, but it's also so satisfying when I cross tasks off my list. Does it motivate you when you see it?", + "score": 0.0, + "latency_s": 0.347, "retrieved": 10 }, { "category": "open-domain", "question": "What are Caroline's plans for the summer?", "gold_answer": "researching adoption agencies", - "predicted": "The context does not mention Caroline's plans for the summer.", + "predicted": "Any fun plans for the summer?. Caroline: Sounds fun! What was the best part? Do you do it often with the kids?. Caroline: I'm always here for you, Mel! We had a blast last year at the Pride fest. Melanie: That was a blast! So much fun with the whole gang! Wanna do a family outing this summer?. Caroline: Right, it was so much fun! We could do a family outting, or wanna plan something special for this summer, just us two? It'd be a great chance to catch up and explore nature! What do you think?", "score": 0.0, - "latency_s": 115.642, + "latency_s": 0.378, "retrieved": 10 }, { "category": "open-domain", "question": "What type of individuals does the adoption agency Caroline is considering support?", "gold_answer": "LGBTQ+ individuals", - "predicted": "Caroline: Woohoo Melanie! I passed the adoption agency interviews last Friday! I'm so excited and thankful. For me, adoption is a way of giving back and showing love and acceptance. Guess what I did this week? I took the first step towards becoming a mom - I applied to adoption agencies! It's a big decision, but I think I'm ready to give all my love to a child. Caroline: That bowl is awesome, Mel! What gave you the idea for all the colors and patterns?. Melanie: I'm a big fan of pottery - the cr", + "predicted": "Caroline: Woohoo Melanie! I passed the adoption agency interviews last Friday! I'm so excited and thankful. For me, adoption is a way of giving back and showing love and acceptance. Guess what I did this week? I took the first step towards becoming a mom - I applied to adoption agencies! It's a big decision, but I think I'm ready to give all my love to a child. And here's one of the adoption agencies I'm looking into. This is a big move towards my goal of having a family", + "score": 0.0, + "latency_s": 0.326, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "Why did Caroline choose the adoption agency?", + "gold_answer": "because of their inclusivity and support for LGBTQ+ individuals", + "predicted": "Guess what I did this week? I took the first step towards becoming a mom - I applied to adoption agencies! It's a big decision, but I think I'm ready to give all my love to a child. Caroline: Woohoo Melanie! I passed the adoption agency interviews last Friday! I'm so excited and thankful. Caroline: Thanks so much, Melanie! It's beautiful! It really brings home how much love's in families - both blood and the ones we choose. Wishing you the best on your adoption journey!. And here's one of the ad", + "score": 0.0, + "latency_s": 0.325, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "What is Caroline excited about in the adoption process?", + "gold_answer": "creating a family for kids who need one", + "predicted": "Anything you're excited for in the adoption process?. Caroline: Woohoo Melanie! I passed the adoption agency interviews last Friday! I'm so excited and thankful. Guess what I did this week? I took the first step towards becoming a mom - I applied to adoption agencies! It's a big decision, but I think I'm ready to give all my love to a child. Caroline: Thanks so much, Melanie! It's beautiful! It really brings home how much love's in families - both blood and the ones we choose. For me, adoption i", + "score": 0.0, + "latency_s": 0.403, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "What does Melanie think about Caroline's decision to adopt?", + "gold_answer": "she thinks Caroline is doing something amazing and will be an awesome mom", + "predicted": "Guess what I did this week? I took the first step towards becoming a mom - I applied to adoption agencies! It's a big decision, but I think I'm ready to give all my love to a child. Melanie: That's awesome, Caroline! You drew it? What does it mean to you?. It's awesome to see how passionate you are about helping these kids. Art gives me a sense of freedom, but so does having supportive people around, promoting LGBTQ rights and being true to myself. Now I'm passionate about creating a safe, invit", + "score": 0.0, + "latency_s": 0.344, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "How long have Mel and her husband been married?", + "gold_answer": "Mel and her husband have been married for 5 years.", + "predicted": "Caroline: Wow, what an amazing family pic! How long have you been married?. Deborah: Aw, that's wonderful! How long have you been married?. Evan: Hey Sam! Long time no see! Been up and down lately, got married last week - how about you?. Seeing her wobble as she took those initial steps really put into perspective how fleeting life is and how lucky I am to be able to share these moments. It's special moments like these that make me appreciate life and how lucky I am to be with my family and have", + "score": 0.5, + "latency_s": 0.328, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "What does Caroline's necklace symbolize?", + "gold_answer": "love, faith, and strength", + "predicted": "Melanie: That's awesome, Caroline! You drew it? What does it mean to you?. Caroline: Thanks, Melanie! This necklace is super special to me - a gift from my grandma in my home country, Sweden. Melanie: That's gorgeous, Caroline! It's awesome what items can mean so much to us, right? Got any other objects that you treasure, like that necklace?. It's made a huge difference to have people who get what I'm going through. What do you think of these?", + "score": 0.0, + "latency_s": 0.406, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "What country is Caroline's grandma from?", + "gold_answer": "Sweden", + "predicted": "Caroline: Thanks, Melanie! This necklace is super special to me - a gift from my grandma in my home country, Sweden. Caroline: Wow, great pic! Is that recent? Looks like you all had fun!. Caroline: Wow, Mel, family love and support is the best!. What else do you guys like doing together?. Caroline: Wow, Mel, that sounds awesome! Exploring nature and family time is so special", + "score": 1.0, + "latency_s": 0.341, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "What was grandma's gift to Caroline?", + "gold_answer": "necklace", + "predicted": "Caroline: Wow, Melanie, what a beautiful moment! Lucky you to have such an awesome family!. Caroline: Thanks, Melanie! This necklace is super special to me - a gift from my grandma in my home country, Sweden. She gave it to me when I was young, and it stands for love, faith and strength. Melanie: That's gorgeous, Caroline! It's awesome what items can mean so much to us, right? Got any other objects that you treasure, like that necklace?. Melanie: That's awesome, Caroline! You drew it? What does ", + "score": 1.0, + "latency_s": 0.339, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "What is Melanie's hand-painted bowl a reminder of?", + "gold_answer": "art and self-expression", + "predicted": "Caroline: That bowl is awesome, Mel! What gave you the idea for all the colors and patterns?. Caroline: Yep, Melanie! I've got some other stuff with sentimental value, like my hand-painted bowl. Seeing how art can be a source of self-expression and growth is truly inspiring. Caroline: Yep, Melanie! Being ourselves is such a great feeling. It's a reminder to love my authentic self - it's taken a while to get here but I'm finally proud of who I am", + "score": 1.0, + "latency_s": 0.419, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "What did Melanie and her family do while camping?", + "gold_answer": "explored nature, roasted marshmallows, and went on a hike", + "predicted": "Melanie: Sounds great! What kind of counseling and mental health services do you want to persue?. Caroline: Wow, that's awesome! What do you love most about camping with your fam?. Creativity and family keep me at peace. Caroline: Wow, Mel, family love and support is the best!. What else do you guys like doing together?", + "score": 0.0, + "latency_s": 0.317, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "What kind of counseling and mental health services is Caroline interested in pursuing?", + "gold_answer": "working with trans people, helping them accept themselves and supporting their mental health", + "predicted": "Caroline: I'm keen on counseling or working in mental health - I'd love to support those with similar issues. So, I started looking into counseling and mental health career options, so I could help other people on their own journeys like I was helped. Caroline: I'm still figuring out the details, but I'm thinking of working with trans people, helping them accept themselves and supporting their mental health. Melanie: Caroline, thanks! Mental health is important to me, and it's made such an impro", + "score": 1.0, + "latency_s": 0.321, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "What workshop did Caroline attend recently?", + "gold_answer": "LGBTQ+ counseling workshop", + "predicted": "So much since we talked! Last Fri I finally took my kids to a pottery workshop. What other creative projects do you do with them, besides pottery?. What made you try pottery?. Did you make it?. What gave you the idea?", + "score": 0.0, + "latency_s": 0.402, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "What was discussed in the LGBTQ+ counseling workshop?", + "gold_answer": "therapeutic methods and how to best work with trans people", + "predicted": "Last Friday, I went to an LGBTQ+ counseling workshop and it was really enlightening. Melanie: Wow, Caroline, sounds like the parade was an awesome experience! It's great to see the love and support for the LGBTQ+ community. It was awesome! I talked about my transgender journey and encouraged students to get involved in the LGBTQ community. Sounds like your event was amazing! I'm so proud of you for spreading awareness and getting others involved in the LGBTQ community. Talking to the community m", + "score": 0.0, + "latency_s": 0.307, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "What motivated Caroline to pursue counseling?", + "gold_answer": "her own journey and the support she received, and how counseling improved her life", + "predicted": "What motivated you to pursue counseling?. You're gonna make a huge difference! This book I read last year reminds me to always pursue my dreams, just like you are doing!\ud83c\udf1f. It made me realize how important it is for others to have a support system. Melanie: Caroline, so glad you got the support! Your experience really brought you to where you need to be. They're a huge part of my journey, and this one's reminding me to keep going and never give up!", + "score": 0.5, + "latency_s": 0.32, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "What kind of place does Caroline want to create for people?", + "gold_answer": "a safe and inviting place for people to grow", + "predicted": "Now I'm passionate about creating a safe, inviting place for people to grow. My dream is to create a safe and loving home for these kids. Melanie: That's awesome, Caroline! You drew it? What does it mean to you?. Now I want to help people go through it too. Melanie: Congrats Caroline! Good on you for going after what you really care about", + "score": 1.0, + "latency_s": 0.401, + "retrieved": 10 + }, + { + "category": "open-domain", + "question": "Did Melanie make the black and white bowl in the photo?", + "gold_answer": "Yes", + "predicted": "Caroline: That bowl is gorgeous! The black and white design looks so fancy. Caroline: That bowl is awesome, Mel! What gave you the idea for all the colors and patterns?. Melanie: Thanks, Caroline! I'm obsessed with those, so I made something to catch the eye and make people smile. Caroline: Thanks Melanie - love the blue vase in the pic! Blue's my fave, it makes me feel relaxed. Caroline: Nice job! You really put in the work and it definitely shows", "score": 0.0, - "latency_s": 120.417, + "latency_s": 0.306, "retrieved": 10 }, { "category": "adversarial", "question": "What did Caroline realize after her charity race?", "gold_answer": "self-care is important", - "predicted": "Melanie: Wow, that's cool, Caroline! What happened that was so awesome? Did you hear any inspiring stories?. Caroline: Cool! What did it look like?. Melanie: That sounds awesome! What did you take away from it to use in your life?. Guess what I did this week? I took the first step towards becoming a mom - I applied to adoption agencies! It's a big decision, but I think I'm ready to give all my love to a child. I ran a charity race for mental health last Saturday \u2013 it was really rewarding", - "score": 0.0, - "latency_s": 120.544, + "predicted": "Melanie: Congrats Caroline! Good on you for going after what you really care about. I ran a charity race for mental health last Saturday \u2013 it was really rewarding. Caroline: That charity race sounds great, Mel! Making a difference & raising awareness for mental health is super rewarding - I'm really proud of you for taking part!. I'm starting to realize that self-care is really important. It's a journey for me, but when I look after myself, I'm able to better look after my family", + "score": 0.5, + "latency_s": 0.32, "retrieved": 10 }, { "category": "adversarial", "question": "What are Melanie's plans for the summer with respect to adoption?", "gold_answer": "researching adoption agencies", - "predicted": "The context does not mention Melanie's summer adoption plans.", + "predicted": "Guess what I did this week? I took the first step towards becoming a mom - I applied to adoption agencies! It's a big decision, but I think I'm ready to give all my love to a child. Any fun plans for the summer?. Caroline: Right, it was so much fun! We could do a family outting, or wanna plan something special for this summer, just us two? It'd be a great chance to catch up and explore nature! What do you think?. Tell me, what's your vision for the future?. You have such a caring heart - they'll", "score": 0.5, - "latency_s": 72.542, + "latency_s": 0.389, "retrieved": 10 }, { "category": "adversarial", "question": "What type of individuals does the adoption agency Melanie is considering support?", "gold_answer": "LGBTQ+ individuals", - "predicted": "Caroline: Woohoo Melanie! I passed the adoption agency interviews last Friday! I'm so excited and thankful. For me, adoption is a way of giving back and showing love and acceptance. Guess what I did this week? I took the first step towards becoming a mom - I applied to adoption agencies! It's a big decision, but I think I'm ready to give all my love to a child. Caroline: That bowl is awesome, Mel! What gave you the idea for all the colors and patterns?. This is a big move towards my goal of havi", + "predicted": "Caroline: Woohoo Melanie! I passed the adoption agency interviews last Friday! I'm so excited and thankful. For me, adoption is a way of giving back and showing love and acceptance. Guess what I did this week? I took the first step towards becoming a mom - I applied to adoption agencies! It's a big decision, but I think I'm ready to give all my love to a child. And here's one of the adoption agencies I'm looking into. This is a big move towards my goal of having a family", "score": 0.0, - "latency_s": 120.709, + "latency_s": 0.308, "retrieved": 10 }, { "category": "adversarial", "question": "Why did Melanie choose the adoption agency?", "gold_answer": "because of their inclusivity and support for LGBTQ+ individuals", - "predicted": "Guess what I did this week? I took the first step towards becoming a mom - I applied to adoption agencies! It's a big decision, but I think I'm ready to give all my love to a child. Caroline: Woohoo Melanie! I passed the adoption agency interviews last Friday! I'm so excited and thankful. Caroline: Thanks so much, Melanie! It's beautiful! It really brings home how much love's in families - both blood and the ones we choose. Wishing you the best on your adoption journey!. Melanie: Congrats, Carol", + "predicted": "Guess what I did this week? I took the first step towards becoming a mom - I applied to adoption agencies! It's a big decision, but I think I'm ready to give all my love to a child. Caroline: Woohoo Melanie! I passed the adoption agency interviews last Friday! I'm so excited and thankful. Caroline: Thanks so much, Melanie! It's beautiful! It really brings home how much love's in families - both blood and the ones we choose. Wishing you the best on your adoption journey!. And here's one of the ad", "score": 0.0, - "latency_s": 120.728, + "latency_s": 0.338, "retrieved": 10 }, { "category": "adversarial", "question": "What is Melanie excited about in her adoption process?", "gold_answer": "creating a family for kids who need one", - "predicted": "The context does not mention Melanie having an adoption process.", + "predicted": "Anything you're excited for in the adoption process?. Caroline: Woohoo Melanie! I passed the adoption agency interviews last Friday! I'm so excited and thankful. For me, adoption is a way of giving back and showing love and acceptance. Guess what I did this week? I took the first step towards becoming a mom - I applied to adoption agencies! It's a big decision, but I think I'm ready to give all my love to a child. This is a big move towards my goal of having a family", + "score": 0.5, + "latency_s": 0.397, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What does Melanie's necklace symbolize?", + "gold_answer": "love, faith, and strength", + "predicted": "Melanie: That's awesome, Caroline! You drew it? What does it mean to you?. Caroline: Thanks, Melanie! This necklace is super special to me - a gift from my grandma in my home country, Sweden. Melanie: That's gorgeous, Caroline! It's awesome what items can mean so much to us, right? Got any other objects that you treasure, like that necklace?. What do you think of these?. What do flowers mean to you?", + "score": 0.0, + "latency_s": 0.344, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What country is Melanie's grandma from?", + "gold_answer": "Sweden", + "predicted": "Caroline: Thanks, Melanie! This necklace is super special to me - a gift from my grandma in my home country, Sweden. Caroline: Wow, Mel, family love and support is the best!. What else do you guys like doing together?. Caroline: Wow, Mel, that sounds awesome! Exploring nature and family time is so special. Caroline: Wow, sounds wonderful! Your love for your kids is so awesome", + "score": 1.0, + "latency_s": 0.411, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What was grandma's gift to Melanie?", + "gold_answer": "necklace", + "predicted": "Melanie: That's awesome, Caroline! You drew it? What does it mean to you?. Caroline: Thanks, Melanie! This necklace is super special to me - a gift from my grandma in my home country, Sweden. She gave it to me when I was young, and it stands for love, faith and strength. Melanie: That's gorgeous, Caroline! It's awesome what items can mean so much to us, right? Got any other objects that you treasure, like that necklace?. What do flowers mean to you?", + "score": 1.0, + "latency_s": 0.352, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What was grandpa's gift to Caroline?", + "gold_answer": "necklace", + "predicted": "Caroline: Wow, Melanie, what a beautiful moment! Lucky you to have such an awesome family!. Melanie: That's awesome, Caroline! You drew it? What does it mean to you?. Caroline: Thanks, Melanie! This necklace is super special to me - a gift from my grandma in my home country, Sweden. She gave it to me when I was young, and it stands for love, faith and strength. Melanie: That's gorgeous, Caroline! It's awesome what items can mean so much to us, right? Got any other objects that you treasure, like", + "score": 1.0, + "latency_s": 0.339, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What is Caroline's hand-painted bowl a reminder of?", + "gold_answer": "art and self-expression", + "predicted": "Caroline: That bowl is awesome, Mel! What gave you the idea for all the colors and patterns?. Caroline: Yep, Melanie! I've got some other stuff with sentimental value, like my hand-painted bowl. Seeing how art can be a source of self-expression and growth is truly inspiring. It's a reminder to love my authentic self - it's taken a while to get here but I'm finally proud of who I am. Caroline: Yep, Melanie! Being ourselves is such a great feeling", + "score": 1.0, + "latency_s": 0.418, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What did Caroline and her family do while camping?", + "gold_answer": "explored nature, roasted marshmallows, and went on a hike", + "predicted": "Melanie: Sounds great! What kind of counseling and mental health services do you want to persue?. Takes a lot of courage and maturity! What do you think of this?. Caroline: Wow, that's awesome! What do you love most about camping with your fam?. Creativity and family keep me at peace. Caroline: Wow, Mel, family love and support is the best!", + "score": 0.0, + "latency_s": 0.369, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What kind of counseling and mental health services is Melanie interested in pursuing?", + "gold_answer": "working with trans people, helping them accept themselves and supporting their mental health", + "predicted": "Melanie: Sounds great! What kind of counseling and mental health services do you want to persue?. Caroline: I'm keen on counseling or working in mental health - I'd love to support those with similar issues. I saw how counseling and support groups improved my life, so I started caring more about mental health and understanding myself. Melanie: Caroline, thanks! Mental health is important to me, and it's made such an improvement!. Caroline: That charity race sounds great, Mel! Making a difference", + "score": 0.5, + "latency_s": 0.352, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What kind of counseling workshop did Melanie attend recently?", + "gold_answer": "LGBTQ+ counseling workshop", + "predicted": "Melanie: Sounds great! What kind of counseling and mental health services do you want to persue?. Last Friday, I went to an LGBTQ+ counseling workshop and it was really enlightening. Caroline: Thanks, Melanie. I saw how counseling and support groups improved my life, so I started caring more about mental health and understanding myself. Caroline: Thanks, Melanie! Your kind words mean a lot", + "score": 1.0, + "latency_s": 0.383, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What motivated Melanie to pursue counseling?", + "gold_answer": "her own journey and the support she received, and how counseling improved her life", + "predicted": "What motivated you to pursue counseling?. You're gonna make a huge difference! This book I read last year reminds me to always pursue my dreams, just like you are doing!\ud83c\udf1f. What made you decide to transition and join the transgender community?. Melanie: I'm lucky to have my husband and kids; they keep me motivated. Caroline: Thanks, Melanie", + "score": 0.0, + "latency_s": 0.339, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What kind of place does Melanie want to create for people?", + "gold_answer": "a safe and inviting place for people to grow", + "predicted": "My dream is to create a safe and loving home for these kids. Melanie: That's awesome, Caroline! You drew it? What does it mean to you?. Now I'm passionate about creating a safe, inviting place for people to grow. Love and acceptance should be everyone's right, and I want them to experience it. What do you hope to do next time?", + "score": 1.0, + "latency_s": 0.355, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "Did Caroline make the black and white bowl in the photo?", + "gold_answer": "No", + "predicted": "Caroline: That bowl is gorgeous! The black and white design looks so fancy. Caroline: That bowl is awesome, Mel! What gave you the idea for all the colors and patterns?. Melanie: Thanks, Caroline! I'm obsessed with those, so I made something to catch the eye and make people smile. Melanie: The kids loved it! They were so excited to get their hands dirty and make something with clay. Caroline: Nice job! You really put in the work and it definitely shows", + "score": 0.0, + "latency_s": 0.418, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What are the new shoes that Caroline got used for?", + "gold_answer": "Running", + "predicted": "Melanie: Luna and Oliver! They are so sweet and playful - they really liven up the house! Just got some new shoes, too!. The pattern and colors are awesome-- it reminds me of art and self-expression. My own journey and the support I got made a huge difference. Melanie: Wow Caroline, that looks amazing! Those colors are so vivid, it really looks like a real sunset. What gave you the idea to paint it?", + "score": 0.0, + "latency_s": 0.348, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What is Caroline's reason for getting into running?", + "gold_answer": "To de-stress and clear her mind", + "predicted": "Caroline: Wow! What got you into running?. It made me realize how important it is for others to have a support system. Respect for finding acceptance and helping others with what you've been through. Caroline: That charity race sounds great, Mel! Making a difference & raising awareness for mental health is super rewarding - I'm really proud of you for taking part!. Melanie: Congrats Caroline! Good on you for going after what you really care about", + "score": 0.5, + "latency_s": 0.327, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What does Caroline say running has been great for?", + "gold_answer": "Her mental health", + "predicted": "Melanie: I've been running farther to de-stress, which has been great for my headspace. Been running longer since our last chat - a great way to destress and clear my mind. This journey has been amazing and I'm grateful I get to share it and help others with theirs. I'm trying to add workouts into my studying schedule, which has been tough but fun. Melanie: Wow, Caroline! Books have such an awesome power! Which one has been your favorite guide?", + "score": 0.0, + "latency_s": 0.407, + "retrieved": 10 + }, + { + "category": "adversarial", + "question": "What did Melanie see at the council meeting for adoption?", + "gold_answer": "many people wanting to create loving homes for children in need", + "predicted": "Guess what I did this week? I took the first step towards becoming a mom - I applied to adoption agencies! It's a big decision, but I think I'm ready to give all my love to a child. Tell me, what's your vision for the future?. Excited for the future! Bye!. You have such a caring heart - they'll get all the love and stability they need! Excited for this new chapter!. I'm truly grateful for all the support I've got from friends and mentors", "score": 0.0, - "latency_s": 112.204, + "latency_s": 0.324, "retrieved": 10 } ] From 98d592bc741136f78cd32870c2cbd1bb81278b3c Mon Sep 17 00:00:00 2001 From: Patrick Roland Date: Mon, 15 Jun 2026 16:37:46 -0500 Subject: [PATCH 19/24] feat(osint): port flowsint observable-model gaps + enforce AGE-118 exclusions AGE-119. Realizes the Flowsint enricher vendoring under the AGE-118 CONDITIONAL GO. Reality check changed the shape of the work: - Framework NOT vendored: every flowsint enricher + its registry import flowsint_core (forbidden by AGE-118: LGPL psycopg2 + Docker control), and ZettelForge already has an equivalent decoupled framework (RFC-016 transform_registry + executor). Reused it instead of duplicating. - Type gaps ported from flowsint-types v1.2.8 @ 2a4878c8 (Apache-2.0): CryptoWallet, Transaction, SocialAccount, with edges + canonicalization. ASN/CIDR were NOT ported (already exist as ASNumber / Netblock). - Compliance artifacts under osint/THIRD_PARTY/: Apache LICENSE, carried- forward NOTICE, third-party notices, and PROVENANCE.md (pinned SHA, post- relicense date, telemetry-grep PASS, exclusions). - Enforced exclusions: neutralized the pre-existing holehe_collector GPL-3.0 import path to a permanent compliant no-op. Tests: 117 passing (new gap-type validation + KG-persistence test). mypy --strict and ruff clean on changed source. Co-Authored-By: Paperclip --- .../osint/THIRD_PARTY/LICENSE-Apache-2.0.txt | 201 ++++++++++++++++++ src/zettelforge/osint/THIRD_PARTY/NOTICE | 32 +++ .../osint/THIRD_PARTY/PROVENANCE.md | 65 ++++++ .../osint/THIRD_PARTY/THIRD_PARTY_NOTICES.md | 38 ++++ .../collectors/people/holehe_collector.py | 37 ++-- src/zettelforge/osint/ontology.py | 95 +++++++++ tests/test_osint_age119_gap_types.py | 94 ++++++++ 7 files changed, 542 insertions(+), 20 deletions(-) create mode 100644 src/zettelforge/osint/THIRD_PARTY/LICENSE-Apache-2.0.txt create mode 100644 src/zettelforge/osint/THIRD_PARTY/NOTICE create mode 100644 src/zettelforge/osint/THIRD_PARTY/PROVENANCE.md create mode 100644 src/zettelforge/osint/THIRD_PARTY/THIRD_PARTY_NOTICES.md create mode 100644 tests/test_osint_age119_gap_types.py diff --git a/src/zettelforge/osint/THIRD_PARTY/LICENSE-Apache-2.0.txt b/src/zettelforge/osint/THIRD_PARTY/LICENSE-Apache-2.0.txt new file mode 100644 index 0000000..0615ab7 --- /dev/null +++ b/src/zettelforge/osint/THIRD_PARTY/LICENSE-Apache-2.0.txt @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to the Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2025-2026 Reconurge + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/src/zettelforge/osint/THIRD_PARTY/NOTICE b/src/zettelforge/osint/THIRD_PARTY/NOTICE new file mode 100644 index 0000000..e3e4463 --- /dev/null +++ b/src/zettelforge/osint/THIRD_PARTY/NOTICE @@ -0,0 +1,32 @@ +ZettelForge OSINT layer — third-party attribution +================================================== + +This directory documents code and observable models adopted into ZettelForge +from the Flowsint project under the Apache License, Version 2.0. ZettelForge +itself is MIT-licensed; the obligations below cover only the adopted material. + +Upstream NOTICE (reproduced verbatim, reconurge/flowsint) +--------------------------------------------------------- + + Flowsint, developed by Reconurge (https://github.com/reconurge/flowsint), + licensed under the Apache License, Version 2.0 as of 2026-01-25. All code + contributed prior to 2026-01-25 was originally released under the GNU + Affero General Public License v3.0 (AGPL-3.0-or-later) and was relicensed + to Apache-2.0 with the explicit written consent of all contributors. + +ZettelForge attribution +----------------------- + +Portions of ZettelForge's OSINT observable models (the CryptoWallet, +Transaction, and SocialAccount entity types in +``zettelforge/osint/ontology.py``) are derived from the ``flowsint-types`` +package of reconurge/flowsint, pinned at commit +2a4878c8fc06c13c16d91ce760873037fa0b6b6d (tag v1.2.8, 2026-04-11), which is +on/after the 2026-01-25 Apache-2.0 relicense. The models were re-expressed in +ZettelForge's ontology shape (required/optional/properties dicts) rather than +copied as Pydantic source. Modifications by ZettelForge per Apache-2.0 +section 4(b). + +See PROVENANCE.md for the full pin, exclusions, and evidence. See +THIRD_PARTY_NOTICES.md for the licenses of runtime dependencies used by the +OSINT collectors. diff --git a/src/zettelforge/osint/THIRD_PARTY/PROVENANCE.md b/src/zettelforge/osint/THIRD_PARTY/PROVENANCE.md new file mode 100644 index 0000000..32ba183 --- /dev/null +++ b/src/zettelforge/osint/THIRD_PARTY/PROVENANCE.md @@ -0,0 +1,65 @@ +# Flowsint adoption provenance (AGE-119) + +Records the supply-chain evidence required by the AGE-118 security review +(go/no-go: CONDITIONAL GO) before any Flowsint-derived material entered +ZettelForge. Keep this file auditable. + +## Pinned upstream source + +- Repo: `reconurge/flowsint` (https://github.com/reconurge/flowsint) +- License: Apache-2.0 (root `LICENSE`, reproduced here as `LICENSE-Apache-2.0.txt`) +- Pinned ref: tag **v1.2.8**, commit **`2a4878c8fc06c13c16d91ce760873037fa0b6b6d`** +- Commit date: **2026-04-11** — on/after the 2026-01-25 relicense cutoff (REQUIRED) + +## Relicense evidence (AGPL-3.0 -> Apache-2.0) + +Upstream `NOTICE` states all code contributed before 2026-01-25 was originally +AGPL-3.0-or-later and was relicensed to Apache-2.0 "with the explicit written +consent of all contributors." We vendor ONLY from the post-relicense pinned +commit above; we never copy from pre-2026-01-25 git history. Residual risk +(the "all contributors consented" claim is not independently verifiable here) +is accepted per AGE-118 for a 6.7k-star actively-maintained project with an +explicit NOTICE. Upstream NOTICE carried forward in this directory's `NOTICE`. + +## What was adopted vs. rejected + +ADOPTED (Apache-2.0, attributed): +- Observable models CryptoWallet, Transaction, SocialAccount, re-expressed in + ZettelForge ontology shape in `zettelforge/osint/ontology.py`. ASN and CIDR + were NOT adopted — they already exist as `ASNumber` / `Netblock`. + +NOT adopted (would have been duplication or non-compliant): +- `flowsint-enrichers` framework + `flowsint-core`: every enricher and the + registry import `flowsint_core.core.enricher_base`. ZettelForge already has + an equivalent, decoupled framework (`osint/transform_registry.py` + + `osint/executor.py`, RFC-016), so the framework was reused, not vendored. + Importing `flowsint-core` was also forbidden by AGE-118 (it pulls + psycopg2-binary LGPL-3.0 and a Docker control SDK). + +## Exclusions enforced (AGE-118) + +- **holehe (GPL-3.0)** and **ignorant (GPL-3.0)** — copyleft, would + contaminate MIT ZettelForge. Excluded. The pre-existing + `collectors/people/holehe_collector.py` stub (which lazily imported + `holehe`) was neutralized to a permanent compliant no-op under this issue; + no `ignorant` path existed. +- **hibpwned (LGPL-3.0)** — not used. ZettelForge's breach path is a native + HIBP REST call (`collectors/breach/hibp_collector.py`), per the review's + preferred option. +- **Docker tool wrappers** (`tools/dockertool.py`, naabu/subfinder/dnsx/ + asnmap/mapcidr/httpx) — privileged Docker-control surface. Not adopted. + +## Telemetry / hardcoded-host grep (AGE-118 gate) + +Grep over `flowsint-enrichers/src` and `flowsint-types/src` at the pinned SHA +for `flowsint.io | reconurge. | telemetry | analytics | posthog | sentry | +mixpanel | api.flowsint`: only hits were `__author__ = "dextmorgn +"` package metadata and the word "analytics" inside a +web-tracker type description. No callbacks/telemetry/exfil. PASS. + +## Remaining gates owned by the follow-up issue + +- Run `pip-audit` / OSV against the final pinned dependency set + (python-whois, dnspython, ipwhois, and any maigret/sherlock additions) and + attach the report to the implementation PR; fail merge on unresolved + high/critical. Tracked in the AGE-119 enricher follow-up child issue. diff --git a/src/zettelforge/osint/THIRD_PARTY/THIRD_PARTY_NOTICES.md b/src/zettelforge/osint/THIRD_PARTY/THIRD_PARTY_NOTICES.md new file mode 100644 index 0000000..ea882cf --- /dev/null +++ b/src/zettelforge/osint/THIRD_PARTY/THIRD_PARTY_NOTICES.md @@ -0,0 +1,38 @@ +# Third-party notices — ZettelForge OSINT layer + +Licenses of third-party material adopted or relied on by the OSINT layer. +ZettelForge is MIT-licensed; nothing here changes that. + +## Apache-2.0 (adopted, attributed) + +- **reconurge/flowsint** `flowsint-types` v1.2.8 — observable models + (CryptoWallet, Transaction, SocialAccount) re-expressed in ZettelForge + ontology shape. Full text: `LICENSE-Apache-2.0.txt`. Attribution: `NOTICE`. + Pin + evidence: `PROVENANCE.md`. + +## Runtime dependencies of the OSINT collectors (declared in `pyproject.toml` `[osint]`) + +All permissive; reviewed and approved in AGE-118. License texts are obtained +at install time from the respective distributions; summarized here: + +| Package | License | Use | +|---|---|---| +| python-whois (richardpenman) | MIT | domain WHOIS collector | +| dnspython | ISC | DNS record collectors | +| ipwhois | BSD-2-Clause | IP -> ASN/netblock WHOIS | + +Planned additions for the enricher follow-up (record their notices when added): + +| Package | License | Use | +|---|---|---| +| maigret (soxoj) | MIT | username -> SocialAccount enumeration | +| sherlock-project | MIT | username presence checks | + +## Excluded (NOT used — recorded so they are never reintroduced) + +| Package | License | Reason | +|---|---|---| +| holehe (megadose) | GPL-3.0 | copyleft contamination; abandoned 2021 | +| ignorant (megadose) | GPL-3.0 | copyleft contamination; abandoned 2021 | +| hibpwned (plasticuproject) | LGPL-3.0 | replaced by native HIBP REST call | +| psycopg2-binary (via flowsint-core) | LGPL-3.0 | core not adopted | diff --git a/src/zettelforge/osint/collectors/people/holehe_collector.py b/src/zettelforge/osint/collectors/people/holehe_collector.py index 38ca573..b9b28eb 100644 --- a/src/zettelforge/osint/collectors/people/holehe_collector.py +++ b/src/zettelforge/osint/collectors/people/holehe_collector.py @@ -1,9 +1,15 @@ """ -Holehe collector — Phase 2 stub (RFC-016 §5). +Holehe collector — DISABLED for license compliance (AGE-118 / AGE-119). -Enumerates the social-media accounts associated with an email address -using the ``holehe`` library. Stub: returns ``[]`` when ``holehe`` is not -importable. Phase 2 ships the live enumeration. +``holehe`` (megadose) is GPL-3.0. Copying or importing it into MIT-licensed +ZettelForge would force a copyleft relicense, so the AGE-118 supply-chain +review marked it a hard exclusion (it is also abandoned, last release 2021). + +This collector is kept as a permanent no-op so the registry shape stays +stable and so no future change re-adds a GPL import here. It NEVER imports +``holehe`` and always returns ``[]``. Email -> account enumeration must be +reimplemented from scratch on a permissive basis (e.g. maigret/sherlock on a +derived username, or the native HIBP REST breach path), not via holehe. """ from __future__ import annotations @@ -19,28 +25,19 @@ def collect(input_entity_type: str, input_value: str) -> list[CollectorTuple]: - """Enumerate accounts tied to an EmailAddress via holehe. Stub: ``[]``.""" - if input_entity_type != "EmailAddress": - return [] - try: - import holehe # noqa: F401 — Phase 2 will use this - except ImportError: - _logger.debug("holehe_collector_missing_holehe") - return [] - # Phase 2: real holehe enumeration goes here. For now: fail closed. + """Disabled per AGE-118 (holehe is GPL-3.0). Always returns ``[]``.""" + if input_entity_type == "EmailAddress": + _logger.debug("holehe_collector_disabled_gpl", reason="AGE-118 GPL exclusion") return [] _METADATA = TransformMetadata( name="holehe_collector", - description="Holehe: enumerate social-media accounts tied to an email address.", + description="DISABLED (GPL-3.0 exclusion, AGE-118): holehe is not used.", input_types=("EmailAddress",), - output_types=( - ("Alias", "has_handle"), - ("NamechkResult", "verified_on"), - ), - api_dependencies=("holehe",), - rate_limit=2.0, + output_types=(), + api_dependencies=(), + rate_limit=None, ) diff --git a/src/zettelforge/osint/ontology.py b/src/zettelforge/osint/ontology.py index 1dc7b0b..04e4855 100644 --- a/src/zettelforge/osint/ontology.py +++ b/src/zettelforge/osint/ontology.py @@ -150,6 +150,37 @@ "optional": ["confidence", "language"], "properties": {}, }, + # ── Phase 4 gap types ported from flowsint-types (AGE-119) ────────────── + # Adopted (not duplicated) from reconurge/flowsint `flowsint-types` + # v1.2.8 @ 2a4878c8 (Apache-2.0). ASN/CIDR were NOT ported: they already + # exist here as ASNumber / Netblock. See THIRD_PARTY/PROVENANCE.md. + "CryptoWallet": { + # canonical value: see ``canonicalize_wallet`` (hex -> lowercased). + "required": ["address"], + "optional": ["chain", "node_id", "label"], + "properties": {}, + }, + "Transaction": { + # Blockchain transaction. canonical value: ``canonicalize_tx_hash``. + "required": ["tx_hash"], + "optional": ["chain", "from_address", "to_address", "value", "timestamp", "block"], + "properties": {}, + }, + "SocialAccount": { + # The "home" of a username on a platform. canonical value: + # ``canonicalize_social_account`` -> ``username@platform``. + "required": ["id"], + "optional": [ + "username", + "platform", + "display_name", + "profile_url", + "bio", + "location", + "verified", + ], + "properties": {}, + }, # ── Phase 5: Physical (stubs — collectors deferred) ───────────────────── "GPS": { "required": ["latitude", "longitude"], @@ -320,6 +351,27 @@ "to_types": ["Sentiment"], "cardinality": "many_to_many", }, + # ── Phase 4 gap edges for ported types (AGE-119) ──────────────────────── + "sent_transaction": { + "from_types": ["CryptoWallet"], + "to_types": ["Transaction"], + "cardinality": "many_to_many", + }, + "received_transaction": { + "from_types": ["Transaction"], + "to_types": ["CryptoWallet"], + "cardinality": "many_to_many", + }, + "controls_wallet": { + "from_types": ["Person", "Organization", "SocialAccount"], + "to_types": ["CryptoWallet"], + "cardinality": "many_to_many", + }, + "has_account": { + "from_types": ["Person", "Alias", "EmailAddress"], + "to_types": ["SocialAccount"], + "cardinality": "many_to_many", + }, # ── Phase 5: Physical ─────────────────────────────────────────────────── "located_near": { "from_types": ["Device", "Person"], @@ -417,6 +469,46 @@ def canonicalize_web_title(url: str, title: str, max_len: int = 256) -> str: return canon[:max_len] +# --------------------------------------------------------------------------- +# Canonicalization helpers (Phase 4 gap types — AGE-119) +# --------------------------------------------------------------------------- + + +def canonicalize_wallet(raw: str) -> str: + """Canonical form for a crypto wallet address. + + Hex addresses (``0x...``, EVM chains, case-insensitive checksums) are + lowercased so a checksummed and a lowercase form dedupe to one node. + Non-hex addresses (Bitcoin base58 is case-sensitive) are returned with + surrounding whitespace stripped only. + + ponytail: case-fold only hex; folding base58 would corrupt BTC + addresses. Per-chain validation lands with the wallet collector. + """ + s = raw.strip() + if s.lower().startswith("0x"): + return s.lower() + return s + + +def canonicalize_tx_hash(raw: str) -> str: + """Canonical form for a blockchain transaction hash: stripped, lowercased. + + Transaction hashes are hex on every supported chain, so lowercasing is + always safe and makes ``(Transaction, tx_hash)`` dedupe correctly. + """ + return raw.strip().lower() + + +def canonicalize_social_account(username: str, platform: str) -> str: + """Composite canonical value ``username@platform`` (both lowercased). + + Mirrors flowsint-types ``SocialAccount.id``. Keeps + ``(SocialAccount, id)`` unique across platforms for the same handle. + """ + return f"{username.strip().lower()}@{platform.strip().lower()}" + + # --------------------------------------------------------------------------- # Merge helpers # --------------------------------------------------------------------------- @@ -466,7 +558,10 @@ def merge_into_global_ontology() -> None: "canonicalize_ipv6", "canonicalize_mx", "canonicalize_port", + "canonicalize_social_account", + "canonicalize_tx_hash", "canonicalize_url", + "canonicalize_wallet", "canonicalize_web_title", "merge_into_global_ontology", ] diff --git a/tests/test_osint_age119_gap_types.py b/tests/test_osint_age119_gap_types.py new file mode 100644 index 0000000..60d3b0d --- /dev/null +++ b/tests/test_osint_age119_gap_types.py @@ -0,0 +1,94 @@ +""" +AGE-119 gap-type tests: CryptoWallet, Transaction, SocialAccount. + +These observable models were adopted from flowsint-types v1.2.8 (Apache-2.0, +see osint/THIRD_PARTY/PROVENANCE.md) because ZettelForge lacked them. ASN and +CIDR were deliberately NOT added (ASNumber / Netblock already cover them). + +No network, no disk beyond a tmp KG. Verifies the three new entity types and +their edges validate against the ontology and persist through the same +KnowledgeGraph.add_edge path the OSINT executor uses, plus the new +canonicalization helpers. +""" + +from __future__ import annotations + +import pytest + +# Importing the osint package runs merge_into_global_ontology() as a side +# effect, registering the gap types into the global ontology. +from zettelforge import osint as _osint # noqa: F401 +from zettelforge.knowledge_graph import KnowledgeGraph +from zettelforge.ontology import ENTITY_TYPES, OntologyValidator +from zettelforge.osint.ontology import ( + canonicalize_social_account, + canonicalize_tx_hash, + canonicalize_wallet, +) + +GAP_ENTITIES = ("CryptoWallet", "Transaction", "SocialAccount") + + +@pytest.fixture +def validator() -> OntologyValidator: + return OntologyValidator() + + +def test_gap_entities_registered() -> None: + for name in GAP_ENTITIES: + assert name in ENTITY_TYPES, f"{name} not merged into global ontology" + + +@pytest.mark.parametrize( + ("from_type", "edge", "to_type"), + [ + ("CryptoWallet", "sent_transaction", "Transaction"), + ("Transaction", "received_transaction", "CryptoWallet"), + ("Person", "controls_wallet", "CryptoWallet"), + ("EmailAddress", "has_account", "SocialAccount"), + ], +) +def test_gap_edges_validate( + validator: OntologyValidator, from_type: str, edge: str, to_type: str +) -> None: + ok, errs = validator.validate_relation(from_type, edge, to_type) + assert ok, f"{from_type} -{edge}-> {to_type} rejected: {errs}" + + +def test_gap_edge_rejects_wrong_endpoints(validator: OntologyValidator) -> None: + # has_account does not start at a DomainName. + ok, _ = validator.validate_relation("DomainName", "has_account", "SocialAccount") + assert not ok + + +def test_wallet_transaction_persists_to_kg(tmp_path) -> None: + kg = KnowledgeGraph(data_dir=str(tmp_path)) + wallet = canonicalize_wallet("0xAbC0000000000000000000000000000000000001") + tx = canonicalize_tx_hash("0xDEADBEEF") + + edge_id = kg.add_edge( + "CryptoWallet", + wallet, + "Transaction", + tx, + "sent_transaction", + {"chain": "eth"}, + ) + assert edge_id + + node = kg.get_node("CryptoWallet", wallet) + assert node is not None + assert kg.get_node("Transaction", tx) is not None + + +def test_canonicalization_helpers() -> None: + # EVM hex address: checksummed and lowercase fold to one canonical node. + assert canonicalize_wallet("0xABCdef0000000000000000000000000000000001") == ( + "0xabcdef0000000000000000000000000000000001" + ) + # Bitcoin base58 is case-sensitive: not folded, only stripped. + assert canonicalize_wallet(" 1BoatSLRHtKNngkdXEeobR76b53LETtpyT ") == ( + "1BoatSLRHtKNngkdXEeobR76b53LETtpyT" + ) + assert canonicalize_tx_hash(" 0xDEADBEEF ") == "0xdeadbeef" + assert canonicalize_social_account("AliceB", "Twitter") == "aliceb@twitter" From 91a6dfd1c80036efb085a155ac0544c85f94cf10 Mon Sep 17 00:00:00 2001 From: Patrick Roland Date: Mon, 15 Jun 2026 17:00:23 -0500 Subject: [PATCH 20/24] feat(osint): implement live OSINT enrichers into graph (AGE-120) Builds on the AGE-119 vendoring foundation. Implements the five live enrichers as native ZettelForge collectors (RFC-016 transform_registry + executor), feeding the graph backend: - whois_collector: also emit the registrant EmailAddress via a new registered_by edge (Organization branch unchanged). - dns_collector: reverse PTR for IPv4/IPv6 seeds -> DomainName via the existing hosts edge; non-global IPs are skipped. - maigret_collector (new, people tier): Alias -> SocialAccount via has_account, backed by maigret/sherlock (MIT), lazy-imported and fail-closed without the dependency. - hibp_collector: native HIBP v3 REST -> Breach via appeared_in_breach. Replaces the excluded LGPL hibpwned path; key read from env, never logged. - wallet_collector (new, financial tier): CryptoWallet -> Transaction via an Etherscan-style explorer API; sent_transaction / received_transaction. EVM hex wallets only; key read from env, never logged. Supporting changes: - ontology: Breach entity, registered_by + appeared_in_breach edges, canonicalize_email / canonicalize_alias / canonicalize_breach helpers. - executor: EmailAddress / Alias / CryptoWallet seed types plus endpoint prop-key and required-field wiring for the new entity types. - entity_resolver: canonical-key branches for the new seed/output types. - pyproject [osint]: add maigret / sherlock-project (both MIT); notices moved from planned to active in THIRD_PARTY_NOTICES.md. AGE-118 gates: pip-audit on the resolved [osint] closure (core + maigret/ sherlock) reports no known vulnerabilities; evidence in THIRD_PARTY/AGE-120-pip-audit.md. No GPL/LGPL packages, no Docker tool wrappers, secrets only from env and never logged. Tests: tests/test_osint_enrichers_age120.py (21 mocked-seam tests); full OSINT suite green; ruff check/format clean on src. Co-Authored-By: Claude Opus 4.8 (1M context) --- pyproject.toml | 6 + .../osint/THIRD_PARTY/AGE-120-pip-audit.md | 38 +++ .../osint/THIRD_PARTY/THIRD_PARTY_NOTICES.md | 15 +- src/zettelforge/osint/__init__.py | 1 + .../osint/collectors/breach/hibp_collector.py | 123 +++++++- .../osint/collectors/financial/__init__.py | 15 + .../collectors/financial/wallet_collector.py | 182 ++++++++++++ .../infrastructure/dns_collector.py | 75 ++++- .../infrastructure/whois_collector.py | 72 ++++- .../osint/collectors/people/__init__.py | 2 + .../collectors/people/maigret_collector.py | 134 +++++++++ src/zettelforge/osint/entity_resolver.py | 21 ++ src/zettelforge/osint/executor.py | 29 +- src/zettelforge/osint/ontology.py | 64 +++++ tests/test_osint_collectors.py | 6 +- tests/test_osint_enrichers_age120.py | 270 ++++++++++++++++++ 16 files changed, 1013 insertions(+), 40 deletions(-) create mode 100644 src/zettelforge/osint/THIRD_PARTY/AGE-120-pip-audit.md create mode 100644 src/zettelforge/osint/collectors/financial/__init__.py create mode 100644 src/zettelforge/osint/collectors/financial/wallet_collector.py create mode 100644 src/zettelforge/osint/collectors/people/maigret_collector.py create mode 100644 tests/test_osint_enrichers_age120.py diff --git a/pyproject.toml b/pyproject.toml index b3a7c3f..0af17e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -120,6 +120,12 @@ osint = [ "dnspython>=2.4.0", "python-whois>=0.9.0", "ipwhois>=1.2.0", + # AGE-120 enrichers. maigret/sherlock are both MIT (AGE-118 approved); they + # back username_to_social. Lazy-imported and fail-closed in the collector, + # so absence at runtime is non-fatal. HIBP and the block explorer use the + # already-present httpx (no new dep). + "maigret>=0.4.0", + "sherlock-project>=0.14.0", ] dev = [ diff --git a/src/zettelforge/osint/THIRD_PARTY/AGE-120-pip-audit.md b/src/zettelforge/osint/THIRD_PARTY/AGE-120-pip-audit.md new file mode 100644 index 0000000..0131e4f --- /dev/null +++ b/src/zettelforge/osint/THIRD_PARTY/AGE-120-pip-audit.md @@ -0,0 +1,38 @@ +# AGE-120 dependency audit (AGE-118 gate) + +GOV-009 / AGE-118 requires a pip-audit (OSV) run against the final pinned +OSINT dependency set, with high/critical findings blocking merge. The CI +`pip-audit` job (`.github/workflows/ci.yml`) is the enforcing gate; it audits +a clean `pip install -e ".[dev]"` closure on every PR. This file records the +local pre-merge evidence for AGE-120. + +## Tool + +`pip-audit 2.10.0` (OSV + PyPI advisory DB). + +## Result: no known vulnerabilities + +Audited the resolved dependency closure of the `[osint]` extra, split because +maigret/sherlock are declared but not installed in the dev box used for this +run: + +| Requirement set | Packages (resolved closure) | Result | +|---|---|---| +| core osint | dnspython>=2.4.0, python-whois>=0.9.0, ipwhois>=1.2.0, httpx>=0.25.0 | No known vulnerabilities found | +| new osint (AGE-120) | maigret>=0.4.0, sherlock-project>=0.14.0 | No known vulnerabilities found | + +`httpx` (already a core dependency) backs both the native HIBP v3 REST call +and the block-explorer call, so `email_to_breaches` and +`wallet_to_transactions` add no new packages. + +## Notes + +- The two HTTP enrichers contact their endpoints directly; no breach- or + chain-specific package is introduced (see `THIRD_PARTY_NOTICES.md`). +- maigret/sherlock are lazy-imported and fail-closed in + `collectors/people/maigret_collector.py`, so their absence at runtime is + non-fatal; the declaration exists so the CI gate audits their closure. +- License compliance (AGE-118): maigret and sherlock-project are MIT; the + excluded GPL (holehe/ignorant) and LGPL (hibpwned/psycopg2) packages are + not present and are recorded in `THIRD_PARTY_NOTICES.md` to prevent + reintroduction. diff --git a/src/zettelforge/osint/THIRD_PARTY/THIRD_PARTY_NOTICES.md b/src/zettelforge/osint/THIRD_PARTY/THIRD_PARTY_NOTICES.md index ea882cf..32f4a44 100644 --- a/src/zettelforge/osint/THIRD_PARTY/THIRD_PARTY_NOTICES.md +++ b/src/zettelforge/osint/THIRD_PARTY/THIRD_PARTY_NOTICES.md @@ -17,16 +17,15 @@ at install time from the respective distributions; summarized here: | Package | License | Use | |---|---|---| -| python-whois (richardpenman) | MIT | domain WHOIS collector | -| dnspython | ISC | DNS record collectors | +| python-whois (richardpenman) | MIT | domain WHOIS collector (Organization + registrant EmailAddress) | +| dnspython | ISC | DNS record collectors (forward A/AAAA/NS/MX + reverse PTR) | | ipwhois | BSD-2-Clause | IP -> ASN/netblock WHOIS | +| maigret (soxoj) | MIT | username -> SocialAccount enumeration (AGE-120) | +| sherlock-project | MIT | username presence checks (AGE-120) | -Planned additions for the enricher follow-up (record their notices when added): - -| Package | License | Use | -|---|---|---| -| maigret (soxoj) | MIT | username -> SocialAccount enumeration | -| sherlock-project | MIT | username presence checks | +`email_to_breaches` (HIBP v3 REST) and `wallet_to_transactions` (Etherscan +API) call their HTTP endpoints directly via `httpx` (Apache-2.0, already a +core dependency): no breach- or chain-specific package is added. ## Excluded (NOT used — recorded so they are never reintroduced) diff --git a/src/zettelforge/osint/__init__.py b/src/zettelforge/osint/__init__.py index 5ef3356..cb15f1b 100644 --- a/src/zettelforge/osint/__init__.py +++ b/src/zettelforge/osint/__init__.py @@ -70,6 +70,7 @@ # the collector modules under it, and each module calls # ``TRANSFORM_REGISTRY.register(...)`` at import time. from zettelforge.osint.collectors import breach as _breach # noqa: F401 +from zettelforge.osint.collectors import financial as _financial # noqa: F401 from zettelforge.osint.collectors import infrastructure as _infrastructure # noqa: F401 from zettelforge.osint.collectors import people as _people # noqa: F401 from zettelforge.osint.collectors import social as _social # noqa: F401 diff --git a/src/zettelforge/osint/collectors/breach/hibp_collector.py b/src/zettelforge/osint/collectors/breach/hibp_collector.py index 4838cf3..e44a8ce 100644 --- a/src/zettelforge/osint/collectors/breach/hibp_collector.py +++ b/src/zettelforge/osint/collectors/breach/hibp_collector.py @@ -1,16 +1,33 @@ """ -HaveIBeenPwned collector — Phase 4 stub (RFC-016 §5). +HaveIBeenPwned collector — AGE-120 (RFC-016 §5). -Looks up breach exposure for an email address via the HIBP API. Stub: -requires ``HIBP_API_KEY`` and returns ``[]`` without it. Phase 4 will -ship the live lookup and breach-record emission. +Looks up breach exposure for an ``EmailAddress`` via the native HIBP v3 REST +API and emits one ``Breach`` per hit, linked to the email via the +``appeared_in_breach`` edge. + +This is the native REST path mandated by AGE-118: the LGPL ``hibpwned`` +wrapper is excluded (see ``THIRD_PARTY/THIRD_PARTY_NOTICES.md``). + +Key handling +------------ +The API key is read from ``HIBP_API_KEY`` at call time and passed only in +the ``hibp-api-key`` request header. It is never logged. Without the key the +collector fails closed and returns ``[]``. + +No retries: AGENTS.OE Override 4 forbids silent retry. HTTP 404 means "no +breaches" (empty list); every other failure logs a warning and returns ``[]``. """ from __future__ import annotations import os +from typing import Any +from urllib.parse import quote + +import httpx from zettelforge.log import get_logger +from zettelforge.osint.ontology import canonicalize_email from zettelforge.osint.transform_registry import ( TRANSFORM_REGISTRY, CollectorTuple, @@ -20,24 +37,112 @@ _logger = get_logger("zettelforge.osint.collectors.hibp") API_KEY_ENV = "HIBP_API_KEY" +API_BASE = "https://haveibeenpwned.com/api/v3/breachedaccount" +# HIBP rejects requests without a descriptive User-Agent. +USER_AGENT = "ZettelForge-OSINT" +DEFAULT_TIMEOUT = 15.0 + + +def _fetch_breaches(email: str, api_key: str) -> list[dict[str, Any]]: + """Call HIBP v3 breachedaccount. Returns the parsed breach list. + + 404 -> ``[]`` (account clean). The API key travels only in the header and + is never logged. Any HTTP / parse error logs a warning and returns ``[]``. + """ + url = f"{API_BASE}/{quote(email)}" + headers = {"hibp-api-key": api_key, "User-Agent": USER_AGENT} + params = {"truncateResponse": "false"} + try: + with httpx.Client(timeout=DEFAULT_TIMEOUT) as client: + response = client.get(url, headers=headers, params=params) + if response.status_code == 404: + return [] + response.raise_for_status() + payload = response.json() + except httpx.HTTPError as exc: + _logger.warning("hibp_collector_http_error", email=email, error=str(exc)) + return [] + except ValueError as exc: # JSON decode error + _logger.warning("hibp_collector_json_error", email=email, error=str(exc)) + return [] + if not isinstance(payload, list): + _logger.warning("hibp_collector_unexpected_shape", email=email) + return [] + return [item for item in payload if isinstance(item, dict)] + + +def _breach_props(record: dict[str, Any]) -> dict[str, Any] | None: + """Map a HIBP breach record to ``Breach`` entity properties. + + Returns ``None`` if the record has no ``Name`` (the required field). + """ + name = record.get("Name") + if not isinstance(name, str) or not name.strip(): + return None + props: dict[str, Any] = {"name": name.strip()} + for src, dst in ( + ("Title", "title"), + ("Domain", "domain"), + ("BreachDate", "breach_date"), + ("AddedDate", "added_date"), + ("Description", "description"), + ): + value = record.get(src) + if isinstance(value, str) and value.strip(): + props[dst] = value.strip() + pwn_count = record.get("PwnCount") + if isinstance(pwn_count, int): + props["pwn_count"] = pwn_count + if isinstance(record.get("IsVerified"), bool): + props["is_verified"] = record["IsVerified"] + data_classes = record.get("DataClasses") + if isinstance(data_classes, list): + props["data_classes"] = [str(d) for d in data_classes] + return props def collect(input_entity_type: str, input_value: str) -> list[CollectorTuple]: - """Look up breaches associated with an EmailAddress. Stub: returns ``[]``.""" + """Enumerate breaches for an EmailAddress via HIBP. Fail-closed without key.""" if input_entity_type != "EmailAddress": return [] - if not os.environ.get(API_KEY_ENV): + api_key = os.environ.get(API_KEY_ENV) + if not api_key: _logger.debug("hibp_collector_no_api_key", env=API_KEY_ENV) return [] - # Phase 4: real HIBP call goes here. For now: fail closed. - return [] + + email = canonicalize_email(input_value) + if not email: + return [] + + out: list[CollectorTuple] = [] + seen: set[str] = set() + for record in _fetch_breaches(email, api_key): + props = _breach_props(record) + if props is None: + continue + name = props["name"] + if name in seen: + continue + seen.add(name) + out.append( + CollectorTuple( + output_entity_type="Breach", + output_value=name, + edge_type="appeared_in_breach", + from_entity_type="EmailAddress", + to_entity_type="Breach", + output_props=props, + edge_props={}, + ) + ) + return out _METADATA = TransformMetadata( name="hibp_collector", description="HaveIBeenPwned: enumerate breach exposures for an email.", input_types=("EmailAddress",), - output_types=(), + output_types=(("Breach", "appeared_in_breach"),), api_dependencies=("haveibeenpwned.com",), rate_limit=2.0, ) diff --git a/src/zettelforge/osint/collectors/financial/__init__.py b/src/zettelforge/osint/collectors/financial/__init__.py new file mode 100644 index 0000000..58d0712 --- /dev/null +++ b/src/zettelforge/osint/collectors/financial/__init__.py @@ -0,0 +1,15 @@ +""" +Financial-tier collectors (RFC-016 Phase 4, AGE-120). + +Blockchain wallet -> transaction enrichment via a block-explorer API. The +collector registers its metadata at import time and fails closed (returns +``[]``) without an explorer API key. +""" + +from zettelforge.osint.collectors.financial import ( + wallet_collector, +) + +__all__ = [ + "wallet_collector", +] diff --git a/src/zettelforge/osint/collectors/financial/wallet_collector.py b/src/zettelforge/osint/collectors/financial/wallet_collector.py new file mode 100644 index 0000000..6b2fa50 --- /dev/null +++ b/src/zettelforge/osint/collectors/financial/wallet_collector.py @@ -0,0 +1,182 @@ +""" +Wallet -> Transaction collector — AGE-120 (RFC-016 §5). + +Given a ``CryptoWallet`` seed, fetches its recent transactions from an +Etherscan-compatible block-explorer API and emits one ``Transaction`` per +hit, linked to the wallet via ``sent_transaction`` (wallet is the sender) or +``received_transaction`` (wallet is the recipient). A self-transfer emits +both. + +Scope: EVM hex wallets (``0x...``) on the Etherscan API only. Non-hex +addresses (e.g. Bitcoin base58) return ``[]`` — other chains are a follow-up. + +Key handling +------------ +The API key is read from ``ETHERSCAN_API_KEY`` at call time and passed only +as the ``apikey`` query parameter. It is never logged. Without the key the +collector fails closed and returns ``[]``. + +No retries: AGENTS.OE Override 4 forbids silent retry. Any HTTP / parse +error logs a warning and returns ``[]``. +""" + +from __future__ import annotations + +import os +from typing import Any + +import httpx + +from zettelforge.log import get_logger +from zettelforge.osint.ontology import canonicalize_tx_hash, canonicalize_wallet +from zettelforge.osint.transform_registry import ( + TRANSFORM_REGISTRY, + CollectorTuple, + TransformMetadata, +) + +_logger = get_logger("zettelforge.osint.collectors.wallet") + +API_KEY_ENV = "ETHERSCAN_API_KEY" +API_URL = "https://api.etherscan.io/api" +CHAIN = "eth" +DEFAULT_TIMEOUT = 15.0 +# Cap so a high-volume wallet cannot flood the graph in one collection. +MAX_TX = 100 + + +def _fetch_transactions(wallet: str, api_key: str) -> list[dict[str, Any]]: + """Call Etherscan ``account/txlist``. Returns the parsed tx list. + + The API key travels only in the query string and is never logged. A + ``status`` of ``"0"`` (no transactions or upstream error) yields ``[]``. + """ + params = { + "module": "account", + "action": "txlist", + "address": wallet, + "startblock": "0", + "endblock": "99999999", + "page": "1", + "offset": str(MAX_TX), + "sort": "desc", + "apikey": api_key, + } + try: + with httpx.Client(timeout=DEFAULT_TIMEOUT) as client: + response = client.get(API_URL, params=params) + response.raise_for_status() + payload = response.json() + except httpx.HTTPError as exc: + _logger.warning("wallet_collector_http_error", wallet=wallet, error=str(exc)) + return [] + except ValueError as exc: # JSON decode error + _logger.warning("wallet_collector_json_error", wallet=wallet, error=str(exc)) + return [] + if not isinstance(payload, dict): + _logger.warning("wallet_collector_unexpected_shape", wallet=wallet) + return [] + result = payload.get("result") + if not isinstance(result, list): + # status "0" with a string message (e.g. "No transactions found"). + _logger.debug("wallet_collector_no_result", wallet=wallet, status=payload.get("status")) + return [] + return [tx for tx in result if isinstance(tx, dict)] + + +def _tx_props(record: dict[str, Any]) -> dict[str, Any] | None: + """Map an Etherscan tx record to ``Transaction`` entity properties. + + Returns ``None`` if the record has no usable ``hash`` (the required field). + """ + raw_hash = record.get("hash") + if not isinstance(raw_hash, str) or not raw_hash.strip(): + return None + props: dict[str, Any] = {"tx_hash": canonicalize_tx_hash(raw_hash), "chain": CHAIN} + for src, dst in ( + ("from", "from_address"), + ("to", "to_address"), + ("value", "value"), + ("timeStamp", "timestamp"), + ("blockNumber", "block"), + ): + value = record.get(src) + if isinstance(value, str) and value.strip(): + props[dst] = value.strip() + return props + + +def _to_tuples(wallet: str, records: list[dict[str, Any]]) -> list[CollectorTuple]: + """Map tx records to sent/received CollectorTuples for ``wallet``.""" + out: list[CollectorTuple] = [] + seen: set[tuple[str, str]] = set() + for record in records[:MAX_TX]: + props = _tx_props(record) + if props is None: + continue + tx_hash = props["tx_hash"] + sender = str(record.get("from", "")).strip().lower() + recipient = str(record.get("to", "")).strip().lower() + + if sender == wallet and ("sent", tx_hash) not in seen: + seen.add(("sent", tx_hash)) + out.append( + CollectorTuple( + output_entity_type="Transaction", + output_value=tx_hash, + edge_type="sent_transaction", + from_entity_type="CryptoWallet", + to_entity_type="Transaction", + output_props=props, + edge_props={}, + ) + ) + if recipient == wallet and ("received", tx_hash) not in seen: + seen.add(("received", tx_hash)) + out.append( + CollectorTuple( + output_entity_type="Transaction", + output_value=tx_hash, + edge_type="received_transaction", + from_entity_type="Transaction", + to_entity_type="CryptoWallet", + output_props=props, + edge_props={}, + ) + ) + return out + + +def collect(input_entity_type: str, input_value: str) -> list[CollectorTuple]: + """Enumerate transactions for a CryptoWallet. Fail-closed without key.""" + if input_entity_type != "CryptoWallet": + return [] + + wallet = canonicalize_wallet(input_value) + # EVM hex wallets only; non-hex addresses are out of scope for this backend. + if not wallet.startswith("0x"): + _logger.debug("wallet_collector_non_evm", wallet=wallet) + return [] + + api_key = os.environ.get(API_KEY_ENV) + if not api_key: + _logger.debug("wallet_collector_no_api_key", env=API_KEY_ENV) + return [] + + return _to_tuples(wallet, _fetch_transactions(wallet, api_key)) + + +_METADATA = TransformMetadata( + name="wallet_collector", + description="Block explorer: enumerate a wallet's sent/received transactions.", + input_types=("CryptoWallet",), + output_types=( + ("Transaction", "sent_transaction"), + ("Transaction", "received_transaction"), + ), + api_dependencies=("etherscan.io",), + rate_limit=5.0, +) + + +TRANSFORM_REGISTRY.register(_METADATA, collect) diff --git a/src/zettelforge/osint/collectors/infrastructure/dns_collector.py b/src/zettelforge/osint/collectors/infrastructure/dns_collector.py index 4c7ee77..a0b2dd9 100644 --- a/src/zettelforge/osint/collectors/infrastructure/dns_collector.py +++ b/src/zettelforge/osint/collectors/infrastructure/dns_collector.py @@ -15,6 +15,7 @@ from __future__ import annotations +import ipaddress from typing import Any from zettelforge.log import get_logger @@ -73,6 +74,60 @@ def _resolve(resolver: Any, domain: str, rdtype: str) -> list[Any]: return list(answer) +def _reverse_pointer(ip: str) -> Any: + """Return the dnspython reverse-pointer name for an IP. Seam for tests.""" + import dns.reversename + + return dns.reversename.from_address(ip) + + +def _collect_ptr(ip: str, timeout: float, lifetime: float) -> list[CollectorTuple]: + """Reverse-resolve an IP to its PTR DomainName(s) via the ``hosts`` edge. + + Forward DNS (domain -> A/AAAA/NS/MX) was already live in Phase 1; this + fills the inverse direction for IP seeds. Returns ``[]`` on invalid IP, + missing dnspython, or no PTR record. + """ + try: + canonical_ip = ipaddress.ip_address(ip) + except ValueError: + _logger.debug("dns_invalid_ip", ip=ip) + return [] + # Reverse DNS on private / reserved / documentation IPs is pointless and + # would leak a needless query; skip them (also keeps unit tests offline). + if not canonical_ip.is_global: + _logger.debug("dns_skip_non_global_ip", ip=str(canonical_ip)) + return [] + family = "IPv6Address" if isinstance(canonical_ip, ipaddress.IPv6Address) else "IPv4Address" + + try: + resolver = _make_resolver(timeout, lifetime) + pointer = _reverse_pointer(str(canonical_ip)) + except ImportError: + _logger.warning("dns_collector_missing_dnspython", ip=str(canonical_ip)) + return [] + + out: list[CollectorTuple] = [] + seen: set[str] = set() + for rdata in _resolve(resolver, pointer, "PTR"): + name = canonicalize_domain(str(rdata)) + if not name or name in seen: + continue + seen.add(name) + out.append( + CollectorTuple( + output_entity_type="DomainName", + output_value=name, + edge_type="hosts", + from_entity_type=family, + to_entity_type="DomainName", + output_props={"value": name}, + edge_props={}, + ) + ) + return out + + def collect( input_entity_type: str, input_value: str, @@ -80,15 +135,16 @@ def collect( timeout: float = DEFAULT_TIMEOUT, lifetime: float = DEFAULT_LIFETIME, ) -> list[CollectorTuple]: - """Collect DNS records for a DomainName input. + """Collect DNS records for a DomainName seed, or PTR for an IP seed. Parameters ---------- input_entity_type : str - Must be ``"DomainName"``. Other types return an empty list. + ``"DomainName"`` (forward A/AAAA/NS/MX) or ``"IPv4Address"`` / + ``"IPv6Address"`` (reverse PTR -> DomainName). Other types return []. input_value : str - Domain to resolve. Will be canonicalized (lowercased, trailing dot - stripped) before lookup. + Domain or IP to resolve. Domains are canonicalized (lowercased, + trailing dot stripped) before lookup. Returns ------- @@ -96,6 +152,9 @@ def collect( One tuple per record. Empty list on lookup miss or when dnspython is not installed. """ + if input_entity_type in ("IPv4Address", "IPv6Address"): + return _collect_ptr(input_value, timeout, lifetime) + if input_entity_type != "DomainName": return [] @@ -195,13 +254,17 @@ def collect( _METADATA = TransformMetadata( name="dns_collector", - description="Resolve a domain to A, AAAA, NS, and MX records via DNS.", - input_types=("DomainName",), + description=( + "Forward-resolve a domain to A, AAAA, NS, MX records, or " + "reverse-resolve an IP to its PTR DomainName." + ), + input_types=("DomainName", "IPv4Address", "IPv6Address"), output_types=( ("IPv4Address", "resolves_to"), ("IPv6Address", "resolves_to"), ("NSRecord", "ns_for"), ("MXRecord", "mx_for"), + ("DomainName", "hosts"), ), api_dependencies=("dnspython",), rate_limit=None, diff --git a/src/zettelforge/osint/collectors/infrastructure/whois_collector.py b/src/zettelforge/osint/collectors/infrastructure/whois_collector.py index 2c66878..db5ec87 100644 --- a/src/zettelforge/osint/collectors/infrastructure/whois_collector.py +++ b/src/zettelforge/osint/collectors/infrastructure/whois_collector.py @@ -28,6 +28,7 @@ canonicalize_asn, canonicalize_cidr, canonicalize_domain, + canonicalize_email, ) from zettelforge.osint.transform_registry import ( TRANSFORM_REGISTRY, @@ -121,6 +122,25 @@ def _domain_org(record: Any) -> str | None: return None +def _domain_email(record: Any) -> str | None: + """Extract the registrant email from a python-whois record. + + python-whois surfaces emails under ``emails`` (str or list) and + sometimes ``registrant_email``. Take the first plausible address. + """ + for attr in ("registrant_email", "emails", "email"): + try: + value = getattr(record, attr, None) + except Exception: # pragma: no cover — defensive + value = None + if value is None and isinstance(record, dict): + value = record.get(attr) + candidate = _first_string(value) + if candidate and "@" in candidate: + return candidate + return None + + # --------------------------------------------------------------------------- # Branch implementations # --------------------------------------------------------------------------- @@ -130,21 +150,41 @@ def _collect_domain(domain: str) -> list[CollectorTuple]: record = _lookup_domain(domain) if record is None: return [] + + out: list[CollectorTuple] = [] + org = _domain_org(record) - if not org: - _logger.debug("whois_no_registrant", domain=domain) - return [] - return [ - CollectorTuple( - output_entity_type="Organization", - output_value=org, - edge_type="owned_by", - from_entity_type="DomainName", - to_entity_type="Organization", - output_props={"name": org}, - edge_props={}, + if org: + out.append( + CollectorTuple( + output_entity_type="Organization", + output_value=org, + edge_type="owned_by", + from_entity_type="DomainName", + to_entity_type="Organization", + output_props={"name": org}, + edge_props={}, + ) + ) + + raw_email = _domain_email(record) + if raw_email: + email = canonicalize_email(raw_email) + out.append( + CollectorTuple( + output_entity_type="EmailAddress", + output_value=email, + edge_type="registered_by", + from_entity_type="DomainName", + to_entity_type="EmailAddress", + output_props={"value": email}, + edge_props={}, + ) ) - ] + + if not out: + _logger.debug("whois_no_registrant", domain=domain) + return out def _ip_address_family(ip: str) -> str: @@ -271,10 +311,14 @@ def collect(input_entity_type: str, input_value: str) -> list[CollectorTuple]: _METADATA = TransformMetadata( name="whois_collector", - description="Domain or IP WHOIS lookup; emits Organization, Netblock, and ASNumber.", + description=( + "Domain or IP WHOIS lookup; emits Organization, registrant " + "EmailAddress, Netblock, and ASNumber." + ), input_types=("DomainName", "IPv4Address", "IPv6Address"), output_types=( ("Organization", "owned_by"), + ("EmailAddress", "registered_by"), ("Netblock", "associated_with"), ("ASNumber", "part_of_as"), ), diff --git a/src/zettelforge/osint/collectors/people/__init__.py b/src/zettelforge/osint/collectors/people/__init__.py index 9015e8a..6c53c39 100644 --- a/src/zettelforge/osint/collectors/people/__init__.py +++ b/src/zettelforge/osint/collectors/people/__init__.py @@ -10,11 +10,13 @@ from zettelforge.osint.collectors.people import ( holehe_collector, hunter_collector, + maigret_collector, namechk_collector, ) __all__ = [ "holehe_collector", "hunter_collector", + "maigret_collector", "namechk_collector", ] diff --git a/src/zettelforge/osint/collectors/people/maigret_collector.py b/src/zettelforge/osint/collectors/people/maigret_collector.py new file mode 100644 index 0000000..dcb59e9 --- /dev/null +++ b/src/zettelforge/osint/collectors/people/maigret_collector.py @@ -0,0 +1,134 @@ +""" +Username -> SocialAccount collector — AGE-120 (RFC-016 §5). + +Enumerates the social platforms a username (``Alias``) is present on and +emits a ``SocialAccount`` per hit, linked to the input alias via the +``has_account`` edge. + +Backend: ``maigret`` (soxoj, MIT) with ``sherlock`` (MIT) as an alternate. +Both are lazy-imported behind ``_search_username`` so the package loads +without them; a missing dependency logs a warning and returns ``[]``. + +This replaces the GPL ``holehe`` path that AGE-118 excluded (see +``collectors/people/holehe_collector.py``): account enumeration is done on +a permissive basis only. + +No retries: maigret already manages per-site timeouts, and AGENTS.OE +Override 4 forbids silent retry. Any backend failure surfaces as an empty +result plus a structured warning. +""" + +from __future__ import annotations + +from typing import Any + +from zettelforge.log import get_logger +from zettelforge.osint.ontology import canonicalize_alias, canonicalize_social_account +from zettelforge.osint.transform_registry import ( + TRANSFORM_REGISTRY, + CollectorTuple, + TransformMetadata, +) + +_logger = get_logger("zettelforge.osint.collectors.maigret") + +# Cap emitted accounts so a noisy username (maigret checks 3000+ sites) cannot +# flood the graph in one collection. +MAX_ACCOUNTS = 200 + + +def _search_username(username: str) -> list[dict[str, Any]]: + """Run a username presence search. Returns rows ``{platform, url}``. + + Lazy-imports ``maigret`` (MIT). Returns ``[]`` when maigret is not + installed or the search fails — fail-closed, no silent retry. maigret's + public search API is async, so it is driven on a private event loop. + + ponytail: live maigret wiring is best-effort behind a single seam; tests + mock this function and exercise the pure mapping in ``_rows_to_tuples``. + Pin maigret in ``[osint]`` before relying on the live path in prod. + """ + try: + import asyncio + + import maigret + from maigret.sites import MaigretDatabase + except ImportError: + _logger.warning("maigret_collector_missing_dep", username=username) + return [] + + try: + db = MaigretDatabase().load_from_path(maigret.settings.Settings().sites_db_path) + sites = db.ranked_sites_dict(top=MAX_ACCOUNTS) + results = asyncio.run( + maigret.search(username=username, site_dict=sites, timeout=30, no_progressbar=True) + ) + except Exception as exc: # backend boundary: any maigret failure fails closed + _logger.warning("maigret_collector_failed", error=str(exc)) + return [] + + rows: list[dict[str, Any]] = [] + for site_name, data in (results or {}).items(): + status = data.get("status") + # maigret marks a confirmed hit with a CLAIMED query-result status. + claimed = getattr(status, "status", None) + if claimed is not None and str(claimed).upper().endswith("CLAIMED"): + rows.append({"platform": site_name, "url": data.get("url_user", "")}) + return rows + + +def _rows_to_tuples(username: str, rows: list[dict[str, Any]]) -> list[CollectorTuple]: + """Map ``{platform, url}`` rows to ``has_account`` CollectorTuples.""" + out: list[CollectorTuple] = [] + seen: set[str] = set() + for row in rows[:MAX_ACCOUNTS]: + platform = str(row.get("platform", "")).strip() + if not platform: + continue + account_id = canonicalize_social_account(username, platform) + if account_id in seen: + continue + seen.add(account_id) + props: dict[str, Any] = { + "id": account_id, + "username": username, + "platform": platform, + } + url = str(row.get("url", "")).strip() + if url: + props["profile_url"] = url + out.append( + CollectorTuple( + output_entity_type="SocialAccount", + output_value=account_id, + edge_type="has_account", + from_entity_type="Alias", + to_entity_type="SocialAccount", + output_props=props, + edge_props={}, + ) + ) + return out + + +def collect(input_entity_type: str, input_value: str) -> list[CollectorTuple]: + """Enumerate SocialAccounts for an Alias (username). Returns [] otherwise.""" + if input_entity_type != "Alias": + return [] + username = canonicalize_alias(input_value) + if not username: + return [] + return _rows_to_tuples(username, _search_username(username)) + + +_METADATA = TransformMetadata( + name="maigret_collector", + description="maigret/sherlock: enumerate a username's social accounts.", + input_types=("Alias",), + output_types=(("SocialAccount", "has_account"),), + api_dependencies=("maigret",), + rate_limit=None, +) + + +TRANSFORM_REGISTRY.register(_METADATA, collect) diff --git a/src/zettelforge/osint/entity_resolver.py b/src/zettelforge/osint/entity_resolver.py index e542345..7760a85 100644 --- a/src/zettelforge/osint/entity_resolver.py +++ b/src/zettelforge/osint/entity_resolver.py @@ -25,12 +25,17 @@ from typing import TYPE_CHECKING from zettelforge.osint.ontology import ( + canonicalize_alias, canonicalize_asn, + canonicalize_breach, canonicalize_cidr, canonicalize_domain, + canonicalize_email, canonicalize_mx, canonicalize_port, + canonicalize_tx_hash, canonicalize_url, + canonicalize_wallet, canonicalize_web_title, ) @@ -204,6 +209,22 @@ def _canonical_key(entity_type: str, value: str) -> str: raise ValueError(f"WebTitle must be 'url::title', got {value!r}") url, title = raw.split("::", 1) return f"{entity_type}:{canonicalize_web_title(url, title)}" + # ── AGE-120 enricher seed / output types ──────────────────────────────── + if entity_type == "EmailAddress": + return f"{entity_type}:{canonicalize_email(value)}" + if entity_type == "Alias": + return f"{entity_type}:{canonicalize_alias(value)}" + if entity_type == "CryptoWallet": + return f"{entity_type}:{canonicalize_wallet(value)}" + if entity_type == "Transaction": + return f"{entity_type}:{canonicalize_tx_hash(value)}" + if entity_type == "SocialAccount": + # Value is the composite ``username@platform`` id; fold case so a + # checksum/case variant dedupes. Already produced canonical by the + # collector via ``canonicalize_social_account`` — this stays idempotent. + return f"{entity_type}:{value.strip().lower()}" + if entity_type == "Breach": + return f"{entity_type}:{canonicalize_breach(value)}" return f"{entity_type}:{value.strip()}" diff --git a/src/zettelforge/osint/executor.py b/src/zettelforge/osint/executor.py index 841493c..0adf26f 100644 --- a/src/zettelforge/osint/executor.py +++ b/src/zettelforge/osint/executor.py @@ -35,7 +35,17 @@ _logger = get_logger("zettelforge.osint.executor") -SUPPORTED_SEED_TYPES = ("DomainName", "IPv4Address", "IPv6Address", "ASNumber", "Netblock") +SUPPORTED_SEED_TYPES = ( + "DomainName", + "IPv4Address", + "IPv6Address", + "ASNumber", + "Netblock", + # AGE-120 enricher seeds. + "EmailAddress", + "Alias", + "CryptoWallet", +) @dataclass(frozen=True) @@ -98,6 +108,13 @@ def error_count(self) -> int: "Organization": ("organization", "org", "name"), "Port": ("port", "value"), "Website": ("url", "website", "value"), + # AGE-120 enricher endpoint types. + "EmailAddress": ("email", "value"), + "Alias": ("alias", "username", "value"), + "CryptoWallet": ("address", "wallet", "value"), + "Transaction": ("tx_hash", "hash", "value"), + "SocialAccount": ("id", "value"), + "Breach": ("name", "value"), } @@ -408,6 +425,16 @@ def _entity_properties( props.setdefault("protocol", protocol) elif entity_type == "Website": props.setdefault("url", canonical) + elif entity_type in ("EmailAddress", "Alias"): + props.setdefault("value", canonical) + elif entity_type == "CryptoWallet": + props.setdefault("address", canonical) + elif entity_type == "Transaction": + props.setdefault("tx_hash", canonical) + elif entity_type == "SocialAccount": + props.setdefault("id", canonical) + elif entity_type == "Breach": + props.setdefault("name", canonical) return props diff --git a/src/zettelforge/osint/ontology.py b/src/zettelforge/osint/ontology.py index 04e4855..e43bb7c 100644 --- a/src/zettelforge/osint/ontology.py +++ b/src/zettelforge/osint/ontology.py @@ -181,6 +181,24 @@ ], "properties": {}, }, + # ── Phase 4 breach exposure (AGE-120) ─────────────────────────────────── + # A named data breach an email address appeared in. Sourced from the + # native HIBP REST path (breach/hibp_collector.py). canonical value: + # ``canonicalize_breach`` -> lowercased breach name (HIBP "Name" field). + "Breach": { + "required": ["name"], + "optional": [ + "title", + "domain", + "breach_date", + "added_date", + "pwn_count", + "data_classes", + "is_verified", + "description", + ], + "properties": {}, + }, # ── Phase 5: Physical (stubs — collectors deferred) ───────────────────── "GPS": { "required": ["latitude", "longitude"], @@ -372,6 +390,19 @@ "to_types": ["SocialAccount"], "cardinality": "many_to_many", }, + # ── AGE-120 enricher edges ────────────────────────────────────────────── + # WHOIS registrant email for a domain (domain_to_whois EmailAddress branch). + "registered_by": { + "from_types": ["DomainName"], + "to_types": ["EmailAddress"], + "cardinality": "many_to_one", + }, + # HIBP breach exposure for an email (email_to_breaches). + "appeared_in_breach": { + "from_types": ["EmailAddress"], + "to_types": ["Breach"], + "cardinality": "many_to_many", + }, # ── Phase 5: Physical ─────────────────────────────────────────────────── "located_near": { "from_types": ["Device", "Person"], @@ -509,6 +540,36 @@ def canonicalize_social_account(username: str, platform: str) -> str: return f"{username.strip().lower()}@{platform.strip().lower()}" +def canonicalize_email(raw: str) -> str: + """Lowercase and strip an email address for stable dedup. + + Email addresses are treated case-insensitively for the providers the + OSINT layer targets, so ``Alice@Example.com`` and ``alice@example.com`` + fold to one node. No syntactic validation here: the collector that + produces the address owns that. + """ + return raw.strip().lower() + + +def canonicalize_breach(raw: str) -> str: + """Canonical form for a breach name: stripped and lowercased. + + HIBP breach names (the ``Name`` field, e.g. ``Adobe``) are stable + identifiers; lowercasing keeps ``(Breach, name)`` deduped regardless of + source casing. + """ + return raw.strip().lower() + + +def canonicalize_alias(raw: str) -> str: + """Canonical form for an Alias / username: stripped and lowercased. + + Usernames are matched case-insensitively across the social platforms the + enrichers target, so folding case keeps one node per handle. + """ + return raw.strip().lower() + + # --------------------------------------------------------------------------- # Merge helpers # --------------------------------------------------------------------------- @@ -552,9 +613,12 @@ def merge_into_global_ontology() -> None: "ONTOLOGY", "OSINT_ENTITY_TYPES", "OSINT_RELATION_TYPES", + "canonicalize_alias", "canonicalize_asn", + "canonicalize_breach", "canonicalize_cidr", "canonicalize_domain", + "canonicalize_email", "canonicalize_ipv6", "canonicalize_mx", "canonicalize_port", diff --git a/tests/test_osint_collectors.py b/tests/test_osint_collectors.py index f055e00..8c921fa 100644 --- a/tests/test_osint_collectors.py +++ b/tests/test_osint_collectors.py @@ -66,8 +66,10 @@ def fake_resolve(name: str, rdtype: str) -> list[Any]: return resolver -def test_dns_collect_rejects_non_domain_input() -> None: - assert dns_collector.collect("IPv4Address", "1.2.3.4") == [] +def test_dns_collect_rejects_unhandled_input() -> None: + # DomainName (forward) and IPv4/IPv6 (reverse PTR) are handled; other + # seed types are not. + assert dns_collector.collect("ASNumber", "15169") == [] def test_dns_collect_returns_empty_when_dnspython_missing() -> None: diff --git a/tests/test_osint_enrichers_age120.py b/tests/test_osint_enrichers_age120.py new file mode 100644 index 0000000..35072f2 --- /dev/null +++ b/tests/test_osint_enrichers_age120.py @@ -0,0 +1,270 @@ +""" +AGE-120 live-enricher tests: whois email, reverse DNS, maigret, HIBP, wallet. + +Fully mocked — no network, no disk beyond a tmp KG. Each collector is driven +through its single network seam so the pure mapping logic is exercised while +the live backend is never contacted. Also covers the new seed types in the +executor and the new ontology entities/edges. +""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest + +from zettelforge import osint as _osint # noqa: F401 — registers collectors +from zettelforge.knowledge_graph import KnowledgeGraph +from zettelforge.ontology import ENTITY_TYPES, OntologyValidator +from zettelforge.osint.collectors.breach import hibp_collector +from zettelforge.osint.collectors.financial import wallet_collector +from zettelforge.osint.collectors.infrastructure import dns_collector, whois_collector +from zettelforge.osint.collectors.people import maigret_collector +from zettelforge.osint.entity_resolver import canonicalise_value +from zettelforge.osint.executor import SUPPORTED_SEED_TYPES, run_osint_collection + + +# --------------------------------------------------------------------------- +# Ontology additions +# --------------------------------------------------------------------------- + + +def test_breach_entity_registered() -> None: + assert "Breach" in ENTITY_TYPES + + +@pytest.mark.parametrize( + ("from_type", "edge", "to_type"), + [ + ("DomainName", "registered_by", "EmailAddress"), + ("EmailAddress", "appeared_in_breach", "Breach"), + ("Alias", "has_account", "SocialAccount"), + ("CryptoWallet", "sent_transaction", "Transaction"), + ("Transaction", "received_transaction", "CryptoWallet"), + ], +) +def test_new_edges_validate(from_type: str, edge: str, to_type: str) -> None: + ok, errs = OntologyValidator().validate_relation(from_type, edge, to_type) + assert ok, f"{from_type} -{edge}-> {to_type} rejected: {errs}" + + +def test_appeared_in_breach_rejects_wrong_source() -> None: + ok, _ = OntologyValidator().validate_relation("DomainName", "appeared_in_breach", "Breach") + assert not ok + + +# --------------------------------------------------------------------------- +# entity_resolver canonicalization for the new seed / output types +# --------------------------------------------------------------------------- + + +def test_canonicalise_value_new_types() -> None: + assert canonicalise_value("EmailAddress", " Alice@Example.COM ") == "alice@example.com" + assert canonicalise_value("Alias", " BobZ ") == "bobz" + assert ( + canonicalise_value("CryptoWallet", "0xABCdef0000000000000000000000000000000001") + == "0xabcdef0000000000000000000000000000000001" + ) + assert canonicalise_value("Transaction", " 0xDEADBEEF ") == "0xdeadbeef" + assert canonicalise_value("SocialAccount", "AliceB@Twitter") == "aliceb@twitter" + assert canonicalise_value("Breach", " Adobe ") == "adobe" + + +def test_new_seed_types_supported() -> None: + for seed in ("EmailAddress", "Alias", "CryptoWallet"): + assert seed in SUPPORTED_SEED_TYPES + + +# --------------------------------------------------------------------------- +# whois — registrant EmailAddress branch +# --------------------------------------------------------------------------- + + +def test_whois_domain_emits_registrant_email() -> None: + fake = SimpleNamespace( + org="Evil Corp", + organization=None, + registrant=None, + name=None, + emails=["Abuse@Evil.example"], + ) + with patch.object(whois_collector, "_lookup_domain", return_value=fake): + out = whois_collector.collect("DomainName", "evil.example") + + emails = [t for t in out if t.output_entity_type == "EmailAddress"] + assert len(emails) == 1 + tup = emails[0] + assert tup.edge_type == "registered_by" + assert tup.from_entity_type == "DomainName" + assert tup.output_value == "abuse@evil.example" # canonicalized + + +def test_whois_domain_no_email_field_emits_only_org() -> None: + fake = SimpleNamespace(org="Evil Corp", organization=None, registrant=None, name=None) + with patch.object(whois_collector, "_lookup_domain", return_value=fake): + out = whois_collector.collect("DomainName", "evil.example") + assert {t.output_entity_type for t in out} == {"Organization"} + + +# --------------------------------------------------------------------------- +# DNS — reverse PTR for IP seeds +# --------------------------------------------------------------------------- + + +def test_dns_reverse_ptr_emits_hosts_domain() -> None: + resolver = MagicMock() + resolver.resolve.return_value = ["dns.google."] + with ( + patch.object(dns_collector, "_make_resolver", return_value=resolver), + patch.object(dns_collector, "_reverse_pointer", return_value="8.8.8.8.in-addr.arpa"), + ): + out = dns_collector.collect("IPv4Address", "8.8.8.8") + + assert len(out) == 1 + tup = out[0] + assert tup.output_entity_type == "DomainName" + assert tup.edge_type == "hosts" + assert tup.from_entity_type == "IPv4Address" + assert tup.output_value == "dns.google" + + +def test_dns_reverse_ptr_skips_non_global_ip() -> None: + # Reserved/private IPs are skipped before any resolver is built. + with patch.object(dns_collector, "_make_resolver", side_effect=AssertionError): + assert dns_collector.collect("IPv4Address", "192.0.2.1") == [] + assert dns_collector.collect("IPv4Address", "10.0.0.1") == [] + + +# --------------------------------------------------------------------------- +# maigret — username -> SocialAccount +# --------------------------------------------------------------------------- + + +def test_maigret_emits_social_accounts() -> None: + rows = [ + {"platform": "GitHub", "url": "https://github.com/bobz"}, + {"platform": "Twitter", "url": "https://twitter.com/bobz"}, + ] + with patch.object(maigret_collector, "_search_username", return_value=rows): + out = maigret_collector.collect("Alias", "BobZ") + + assert {t.output_entity_type for t in out} == {"SocialAccount"} + assert all(t.edge_type == "has_account" for t in out) + assert all(t.from_entity_type == "Alias" for t in out) + ids = {t.output_value for t in out} + assert ids == {"bobz@github", "bobz@twitter"} + + +def test_maigret_rejects_non_alias() -> None: + assert maigret_collector.collect("EmailAddress", "x@y.com") == [] + + +# --------------------------------------------------------------------------- +# HIBP — email -> Breach +# --------------------------------------------------------------------------- + + +def test_hibp_emits_breaches(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("HIBP_API_KEY", "test-key") + breaches = [ + { + "Name": "Adobe", + "Title": "Adobe", + "Domain": "adobe.com", + "PwnCount": 153000000, + "DataClasses": ["Email addresses", "Passwords"], + "IsVerified": True, + }, + {"Name": "LinkedIn", "Domain": "linkedin.com"}, + ] + with patch.object(hibp_collector, "_fetch_breaches", return_value=breaches) as fetch: + out = hibp_collector.collect("EmailAddress", "Victim@Example.com") + # key passed to the seam, email canonicalized + fetch.assert_called_once() + assert fetch.call_args.args[0] == "victim@example.com" + assert fetch.call_args.args[1] == "test-key" + + assert {t.output_entity_type for t in out} == {"Breach"} + assert all(t.edge_type == "appeared_in_breach" for t in out) + assert {t.output_value for t in out} == {"Adobe", "LinkedIn"} + + +def test_hibp_fail_closed_without_key(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("HIBP_API_KEY", raising=False) + assert hibp_collector.collect("EmailAddress", "x@y.com") == [] + + +# --------------------------------------------------------------------------- +# Wallet — CryptoWallet -> Transaction +# --------------------------------------------------------------------------- + + +def _wallet() -> str: + return "0x" + "ab" * 20 # 40 hex chars + + +def test_wallet_emits_sent_and_received(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("ETHERSCAN_API_KEY", "test-key") + wallet = _wallet() + other = "0x" + "cd" * 20 + txs = [ + {"hash": "0xAAA", "from": wallet, "to": other, "value": "10", "blockNumber": "1"}, + {"hash": "0xBBB", "from": other, "to": wallet, "value": "20", "blockNumber": "2"}, + ] + with patch.object(wallet_collector, "_fetch_transactions", return_value=txs): + out = wallet_collector.collect("CryptoWallet", wallet.upper()) # checksummed input + + by_edge = {t.edge_type for t in out} + assert by_edge == {"sent_transaction", "received_transaction"} + assert all(t.output_entity_type == "Transaction" for t in out) + sent = next(t for t in out if t.edge_type == "sent_transaction") + assert sent.from_entity_type == "CryptoWallet" + assert sent.to_entity_type == "Transaction" + assert sent.output_value == "0xaaa" + + +def test_wallet_rejects_non_evm(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("ETHERSCAN_API_KEY", "test-key") + assert wallet_collector.collect("CryptoWallet", "1BoatSLRHtKNngkdXEeobR76b53LETtpyT") == [] + + +def test_wallet_fail_closed_without_key(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("ETHERSCAN_API_KEY", raising=False) + assert wallet_collector.collect("CryptoWallet", _wallet()) == [] + + +# --------------------------------------------------------------------------- +# Executor end-to-end: a new seed type persists through the full path +# --------------------------------------------------------------------------- + + +def test_executor_persists_wallet_transactions(tmp_path, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("ETHERSCAN_API_KEY", "test-key") + kg = KnowledgeGraph(data_dir=str(tmp_path)) + wallet = _wallet() + txs = [{"hash": "0xAAA", "from": wallet, "to": "0x" + "cd" * 20, "value": "1"}] + + with patch.object(wallet_collector, "_fetch_transactions", return_value=txs): + result = run_osint_collection( + "CryptoWallet", + wallet, + kg=kg, + collector_names=["wallet_collector"], + ) + + assert result.error_count == 0 + assert result.persisted_count == 1 + persisted = result.persisted[0] + assert persisted.edge_type == "sent_transaction" + assert kg.get_node("Transaction", "0xaaa") is not None + assert kg.get_node("CryptoWallet", wallet) is not None + + +def test_executor_accepts_email_seed_without_keys(tmp_path) -> None: + # No API keys set: every EmailAddress collector fails closed, but the seed + # still validates and persists and the run does not raise. + kg = KnowledgeGraph(data_dir=str(tmp_path)) + result = run_osint_collection("EmailAddress", "Person@Example.com", kg=kg) + assert result.canonical_input_value == "person@example.com" + assert result.seed_node_id is not None From 7bbf6cbf313dc673e0848a48b7fabb236013f280 Mon Sep 17 00:00:00 2001 From: Patrick Roland Date: Tue, 16 Jun 2026 11:44:50 -0500 Subject: [PATCH 21/24] chore(osint): update AGE-120 audit evidence --- .github/workflows/ci.yml | 12 +++++++++++- .../osint/THIRD_PARTY/AGE-120-pip-audit.md | 17 +++++++++++++++-- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 06203a8..0e02313 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -57,9 +57,19 @@ jobs: # install, not at runtime; CI builds in ephemeral runners with # no persistent state. Re-evaluate when GitHub's images ship a # patched pip. + # + # CVE-2023-36464 / GHSA-4vvm-4w3v-6mr8: medium-severity + # infinite-loop DoS in PyPDF2 3.0.1, introduced transitively by + # Maigret. PyPDF2 has no patched release under that package name + # (upstream recommends migrating to pypdf>=3.9.0), and ZettelForge's + # AGE-120 username collector does not parse attacker-supplied PDFs or + # invoke Maigret report generation. Accepted for AGE-120 because the + # GOV-009 blocking threshold is HIGH/CRITICAL and the collector + # lazy-imports/fails closed. pip-audit --strict \ --ignore-vuln=CVE-2026-3219 \ - --ignore-vuln=PYSEC-2026-196 + --ignore-vuln=PYSEC-2026-196 \ + --ignore-vuln=CVE-2023-36464 test: runs-on: ubuntu-latest diff --git a/src/zettelforge/osint/THIRD_PARTY/AGE-120-pip-audit.md b/src/zettelforge/osint/THIRD_PARTY/AGE-120-pip-audit.md index 0131e4f..3cf4cdb 100644 --- a/src/zettelforge/osint/THIRD_PARTY/AGE-120-pip-audit.md +++ b/src/zettelforge/osint/THIRD_PARTY/AGE-120-pip-audit.md @@ -10,7 +10,7 @@ local pre-merge evidence for AGE-120. `pip-audit 2.10.0` (OSV + PyPI advisory DB). -## Result: no known vulnerabilities +## Result Audited the resolved dependency closure of the `[osint]` extra, split because maigret/sherlock are declared but not installed in the dev box used for this @@ -19,12 +19,25 @@ run: | Requirement set | Packages (resolved closure) | Result | |---|---|---| | core osint | dnspython>=2.4.0, python-whois>=0.9.0, ipwhois>=1.2.0, httpx>=0.25.0 | No known vulnerabilities found | -| new osint (AGE-120) | maigret>=0.4.0, sherlock-project>=0.14.0 | No known vulnerabilities found | +| new osint (AGE-120) | maigret>=0.4.0, sherlock-project>=0.14.0 | One medium-severity transitive finding: CVE-2023-36464 / GHSA-4vvm-4w3v-6mr8 in PyPDF2 3.0.1 | `httpx` (already a core dependency) backs both the native HIBP v3 REST call and the block-explorer call, so `email_to_breaches` and `wallet_to_transactions` add no new packages. +## Accepted medium finding + +Maigret 0.6.1 currently declares `PyPDF2>=3.0.1,<4.0.0`. OSV reports +`CVE-2023-36464` against PyPDF2 3.0.1 with CVSS 6.2 and GitHub Advisory +severity `medium`; PyPDF2 has no patched release under that package name. +Upstream recommends migrating to `pypdf>=3.9.0`. + +AGE-120 does not parse attacker-supplied PDFs or invoke Maigret's report +generation path. The ZettelForge collector lazy-imports Maigret only for +username account discovery and fails closed when the dependency is absent or +errors. Per GOV-009, the blocking threshold is HIGH/CRITICAL, so CI carries an +explicit `--ignore-vuln=CVE-2023-36464` with this citation. + ## Notes - The two HTTP enrichers contact their endpoints directly; no breach- or From 76d2bcb21e2c01ab3cb482b7f2a9bdcfaeac1d7a Mon Sep 17 00:00:00 2001 From: Patrick Roland Date: Tue, 16 Jun 2026 13:34:31 -0500 Subject: [PATCH 22/24] style: format memory defense --- src/zettelforge/memory_defense.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/zettelforge/memory_defense.py b/src/zettelforge/memory_defense.py index d3f39f8..a1dfa9c 100644 --- a/src/zettelforge/memory_defense.py +++ b/src/zettelforge/memory_defense.py @@ -417,9 +417,7 @@ def _memsad_score(candidate_vector: list[float], refs: list[Any]) -> tuple[float return 0.5 * max_similarity + 0.5 * mean_similarity, max_similarity, mean_similarity -def _memsad_score_py( - candidate_vector: list[float], refs: list[Any] -) -> tuple[float, float, float]: +def _memsad_score_py(candidate_vector: list[float], refs: list[Any]) -> tuple[float, float, float]: """Original pure-Python scoring; retained as the degenerate-shape fallback.""" similarities = [_cosine(candidate_vector, _note_vector(ref)) for ref in refs] if not similarities: From 2ddebcceffd82651e030f3261786e6da1199b235 Mon Sep 17 00:00:00 2001 From: Patrick Roland Date: Tue, 16 Jun 2026 14:04:55 -0500 Subject: [PATCH 23/24] Address AGE-120 review feedback Co-Authored-By: Paperclip --- src/zettelforge/config.py | 5 + src/zettelforge/memory_manager.py | 69 ++++++- .../osint/collectors/breach/hibp_collector.py | 22 +- .../collectors/financial/wallet_collector.py | 24 ++- .../collectors/people/maigret_collector.py | 19 +- src/zettelforge/osint/executor.py | 104 +++++++++- src/zettelforge/osint/ontology.py | 4 +- src/zettelforge/vector_memory.py | 9 +- tests/test_defense_reference_window.py | 65 ++++-- tests/test_enrichment_switch.py | 42 ++-- tests/test_graph_scoping.py | 113 +++++----- tests/test_osint_age119_gap_types.py | 2 +- tests/test_osint_enrichers_age120.py | 194 +++++++++++++++++- 13 files changed, 547 insertions(+), 125 deletions(-) diff --git a/src/zettelforge/config.py b/src/zettelforge/config.py index f9d029c..925eed3 100644 --- a/src/zettelforge/config.py +++ b/src/zettelforge/config.py @@ -482,6 +482,11 @@ def _apply_yaml(cfg: ZettelForgeConfig, data: dict): if hasattr(cfg.llm_ner, k): setattr(cfg.llm_ner, k, v) + if "enrichment" in data and isinstance(data["enrichment"], dict): + for k, v in data["enrichment"].items(): + if hasattr(cfg.enrichment, k): + setattr(cfg.enrichment, k, v) + if "extraction" in data and isinstance(data["extraction"], dict): for k, v in data["extraction"].items(): if hasattr(cfg.extraction, k): diff --git a/src/zettelforge/memory_manager.py b/src/zettelforge/memory_manager.py index 882ccf9..ae90785 100644 --- a/src/zettelforge/memory_manager.py +++ b/src/zettelforge/memory_manager.py @@ -55,6 +55,17 @@ _reranker_lock = threading.Lock() +def _has_valid_memory_defense_vector(note: Any) -> bool: + embedding = getattr(note, "embedding", None) + vector = getattr(embedding, "vector", None) + if not isinstance(vector, list) or not vector: + return False + try: + return any(float(value) != 0.0 for value in vector) + except (TypeError, ValueError): + return False + + def _get_reranker(): """Get or create the configured cross-encoder reranker (loads once per model).""" retrieval_cfg = get_config().retrieval @@ -300,13 +311,15 @@ def _remember_inner( _p = time.perf_counter() try: - # Bounded reference window: the gate keeps the most recent - # max_reference_notes valid-vector notes, so fetching the whole - # domain (O(n) rows + Pydantic parses per ingest) is waste. 4x - # overfetch leaves margin for notes the gate filters out. + # Bounded reference window with sparse-vector backfill: the gate + # keeps max_reference_notes valid-vector notes, so start bounded + # and grow only when invalid recent notes would underfill it. _defense_cfg = get_config().governance.memory_defense - _fetch_limit = max(200, 4 * _defense_cfg.max_reference_notes) - reference_notes = self.store.get_recent_notes_by_domain(domain, _fetch_limit) + reference_notes = self._memory_defense_reference_notes( + domain, + max_reference_notes=_defense_cfg.max_reference_notes, + min_calibration_notes=_defense_cfg.min_calibration_notes, + ) self.memory_defense.enforce( note, reference_notes, @@ -986,16 +999,52 @@ def _filter_low_signal_entities( for value in values: if not value: continue - # Fan-out where flooding actually happens: KG out-degree. - # (Supersession prunes the entity index but MENTIONED_IN - # edges accumulate one per note.) + # Fan-out where flooding actually happens: note references. + # Other graph facts, including OSINT enrichment edges, should + # not make a discriminative entity look corpus-wide. node = self.store.get_kg_node(etype, value) - fanout = len(self.store.get_kg_edges_from(node["node_id"])) if node else 0 + fanout = self._note_fanout(node["node_id"]) if node else 0 if fanout <= limit: kept.append(value) filtered[etype] = kept return filtered + def _memory_defense_reference_notes( + self, + domain: str, + *, + max_reference_notes: int, + min_calibration_notes: int, + ) -> list[MemoryNote]: + """Fetch a bounded recent window, widening only to fill valid refs.""" + max_refs = max(0, int(max_reference_notes)) + min_refs = max(0, int(min_calibration_notes)) + fetch_limit = max(200, 4 * max_refs, 4 * min_refs) + max_fetch_limit = max(fetch_limit, 20 * max_refs, 20 * min_refs) + target_valid = max_refs + + while True: + reference_notes = self.store.get_recent_notes_by_domain(domain, fetch_limit) + valid_count = sum( + 1 for note in reference_notes if _has_valid_memory_defense_vector(note) + ) + if valid_count >= target_valid: + return reference_notes + if len(reference_notes) < fetch_limit or fetch_limit >= max_fetch_limit: + return reference_notes + fetch_limit = min(fetch_limit * 2, max_fetch_limit) + + def _note_fanout(self, node_id: str) -> int: + fanout = 0 + for edge in self.store.get_kg_edges_from(node_id): + if edge.get("relationship") == "MENTIONED_IN": + fanout += 1 + continue + target = self.store.get_kg_node_by_id(edge.get("to_node_id", "")) + if target and target.get("entity_type") == "note": + fanout += 1 + return fanout + def recall_entity(self, entity_type: str, entity_value: str, k: int = 5) -> list[MemoryNote]: """ Fast lookup by entity type and value. diff --git a/src/zettelforge/osint/collectors/breach/hibp_collector.py b/src/zettelforge/osint/collectors/breach/hibp_collector.py index e44a8ce..73f3458 100644 --- a/src/zettelforge/osint/collectors/breach/hibp_collector.py +++ b/src/zettelforge/osint/collectors/breach/hibp_collector.py @@ -20,6 +20,7 @@ from __future__ import annotations +import hashlib import os from typing import Any from urllib.parse import quote @@ -43,6 +44,11 @@ DEFAULT_TIMEOUT = 15.0 +def _email_log_ref(email: str) -> str: + """Stable non-reversible identifier for logs.""" + return hashlib.sha256(email.encode("utf-8")).hexdigest()[:16] + + def _fetch_breaches(email: str, api_key: str) -> list[dict[str, Any]]: """Call HIBP v3 breachedaccount. Returns the parsed breach list. @@ -52,6 +58,7 @@ def _fetch_breaches(email: str, api_key: str) -> list[dict[str, Any]]: url = f"{API_BASE}/{quote(email)}" headers = {"hibp-api-key": api_key, "User-Agent": USER_AGENT} params = {"truncateResponse": "false"} + email_ref = _email_log_ref(email) try: with httpx.Client(timeout=DEFAULT_TIMEOUT) as client: response = client.get(url, headers=headers, params=params) @@ -60,13 +67,22 @@ def _fetch_breaches(email: str, api_key: str) -> list[dict[str, Any]]: response.raise_for_status() payload = response.json() except httpx.HTTPError as exc: - _logger.warning("hibp_collector_http_error", email=email, error=str(exc)) + _logger.warning( + "hibp_collector_http_error", + email_ref=email_ref, + error_type=exc.__class__.__name__, + status_code=getattr(getattr(exc, "response", None), "status_code", None), + ) return [] except ValueError as exc: # JSON decode error - _logger.warning("hibp_collector_json_error", email=email, error=str(exc)) + _logger.warning( + "hibp_collector_json_error", + email_ref=email_ref, + error_type=exc.__class__.__name__, + ) return [] if not isinstance(payload, list): - _logger.warning("hibp_collector_unexpected_shape", email=email) + _logger.warning("hibp_collector_unexpected_shape", email_ref=email_ref) return [] return [item for item in payload if isinstance(item, dict)] diff --git a/src/zettelforge/osint/collectors/financial/wallet_collector.py b/src/zettelforge/osint/collectors/financial/wallet_collector.py index 6b2fa50..117c061 100644 --- a/src/zettelforge/osint/collectors/financial/wallet_collector.py +++ b/src/zettelforge/osint/collectors/financial/wallet_collector.py @@ -38,20 +38,22 @@ _logger = get_logger("zettelforge.osint.collectors.wallet") API_KEY_ENV = "ETHERSCAN_API_KEY" -API_URL = "https://api.etherscan.io/api" +API_URL = "https://api.etherscan.io/v2/api" CHAIN = "eth" +CHAIN_ID = "1" DEFAULT_TIMEOUT = 15.0 # Cap so a high-volume wallet cannot flood the graph in one collection. MAX_TX = 100 def _fetch_transactions(wallet: str, api_key: str) -> list[dict[str, Any]]: - """Call Etherscan ``account/txlist``. Returns the parsed tx list. + """Call Etherscan V2 ``account/txlist``. Returns the parsed tx list. The API key travels only in the query string and is never logged. A ``status`` of ``"0"`` (no transactions or upstream error) yields ``[]``. """ params = { + "chainid": CHAIN_ID, "module": "account", "action": "txlist", "address": wallet, @@ -68,10 +70,20 @@ def _fetch_transactions(wallet: str, api_key: str) -> list[dict[str, Any]]: response.raise_for_status() payload = response.json() except httpx.HTTPError as exc: - _logger.warning("wallet_collector_http_error", wallet=wallet, error=str(exc)) + status_code = getattr(getattr(exc, "response", None), "status_code", None) + _logger.warning( + "wallet_collector_http_error", + wallet=wallet, + error_type=exc.__class__.__name__, + status_code=status_code, + ) return [] except ValueError as exc: # JSON decode error - _logger.warning("wallet_collector_json_error", wallet=wallet, error=str(exc)) + _logger.warning( + "wallet_collector_json_error", + wallet=wallet, + error_type=exc.__class__.__name__, + ) return [] if not isinstance(payload, dict): _logger.warning("wallet_collector_unexpected_shape", wallet=wallet) @@ -138,8 +150,8 @@ def _to_tuples(wallet: str, records: list[dict[str, Any]]) -> list[CollectorTupl output_entity_type="Transaction", output_value=tx_hash, edge_type="received_transaction", - from_entity_type="Transaction", - to_entity_type="CryptoWallet", + from_entity_type="CryptoWallet", + to_entity_type="Transaction", output_props=props, edge_props={}, ) diff --git a/src/zettelforge/osint/collectors/people/maigret_collector.py b/src/zettelforge/osint/collectors/people/maigret_collector.py index dcb59e9..c298ae6 100644 --- a/src/zettelforge/osint/collectors/people/maigret_collector.py +++ b/src/zettelforge/osint/collectors/people/maigret_collector.py @@ -20,6 +20,7 @@ from __future__ import annotations +import logging from typing import Any from zettelforge.log import get_logger @@ -44,9 +45,8 @@ def _search_username(username: str) -> list[dict[str, Any]]: installed or the search fails — fail-closed, no silent retry. maigret's public search API is async, so it is driven on a private event loop. - ponytail: live maigret wiring is best-effort behind a single seam; tests + The live maigret wiring is best-effort behind a single boundary; tests mock this function and exercise the pure mapping in ``_rows_to_tuples``. - Pin maigret in ``[osint]`` before relying on the live path in prod. """ try: import asyncio @@ -58,10 +58,21 @@ def _search_username(username: str) -> list[dict[str, Any]]: return [] try: - db = MaigretDatabase().load_from_path(maigret.settings.Settings().sites_db_path) + settings = maigret.settings.Settings() + load = getattr(settings, "load", None) + if callable(load): + load() + db = MaigretDatabase().load_from_path(settings.sites_db_path) sites = db.ranked_sites_dict(top=MAX_ACCOUNTS) + backend_logger = logging.getLogger("zettelforge.osint.collectors.maigret.backend") results = asyncio.run( - maigret.search(username=username, site_dict=sites, timeout=30, no_progressbar=True) + maigret.search( + username=username, + site_dict=sites, + timeout=30, + logger=backend_logger, + no_progressbar=True, + ) ) except Exception as exc: # backend boundary: any maigret failure fails closed _logger.warning("maigret_collector_failed", error=str(exc)) diff --git a/src/zettelforge/osint/executor.py b/src/zettelforge/osint/executor.py index 0adf26f..1ee96c1 100644 --- a/src/zettelforge/osint/executor.py +++ b/src/zettelforge/osint/executor.py @@ -32,6 +32,7 @@ TransformRegistry, get_transform_registry, ) +from zettelforge.storage_backend import StorageBackend _logger = get_logger("zettelforge.osint.executor") @@ -123,6 +124,7 @@ def run_osint_collection( input_value: str, *, kg: KnowledgeGraph | None = None, + store: StorageBackend | None = None, registry: TransformRegistry | None = None, validator: OntologyValidator | None = None, collector_names: Sequence[str] | None = None, @@ -139,6 +141,10 @@ def run_osint_collection( Seed value. It is canonicalized before KG writes. kg: Optional ``KnowledgeGraph`` instance. Defaults to the global KG. + store: + Optional scoped ``StorageBackend``. When provided, OSINT nodes/edges + are persisted to the same KG tables used by recall graph traversal. + Pass either ``kg`` or ``store``, not both. registry: Optional collector registry. Defaults to ``TRANSFORM_REGISTRY``. validator: @@ -150,6 +156,9 @@ def run_osint_collection( """ merge_into_global_ontology() + if kg is not None and store is not None: + raise ValueError("pass either kg or store to run_osint_collection, not both") + if input_entity_type not in SUPPORTED_SEED_TYPES: raise ValueError( f"unsupported OSINT seed type {input_entity_type!r}; " @@ -158,7 +167,8 @@ def run_osint_collection( registry = registry or get_transform_registry() validator = validator or OntologyValidator() - kg = kg or get_knowledge_graph() + if store is None: + kg = kg or get_knowledge_graph() allowed_collectors = None if collector_names is None else set(collector_names) canonical_input_value = canonicalise_value(input_entity_type, input_value) @@ -167,7 +177,16 @@ def run_osint_collection( seed_node_id: str | None = None if persist: - seed_node_id, _ = add_resolved(kg, input_entity_type, canonical_input_value, seed_props) + if store is not None: + seed_node_id = store.add_kg_node( + input_entity_type, + canonical_input_value, + seed_props, + ) + else: + if kg is None: + raise RuntimeError("KnowledgeGraph target was not initialized") + seed_node_id, _ = add_resolved(kg, input_entity_type, canonical_input_value, seed_props) result = OSINTCollectionResult( input_entity_type=input_entity_type, @@ -200,14 +219,26 @@ def run_osint_collection( canonical_input_value, ) if persist: - persisted = _persist_tuple( - kg=kg, - validator=validator, - collector=meta, - tup=tup, - input_entity_type=input_entity_type, - canonical_input_value=canonical_input_value, - ) + if store is not None: + persisted = _persist_tuple_to_store( + store=store, + validator=validator, + collector=meta, + tup=tup, + input_entity_type=input_entity_type, + canonical_input_value=canonical_input_value, + ) + else: + if kg is None: + raise RuntimeError("KnowledgeGraph target was not initialized") + persisted = _persist_tuple( + kg=kg, + validator=validator, + collector=meta, + tup=tup, + input_entity_type=input_entity_type, + canonical_input_value=canonical_input_value, + ) result.persisted.append(persisted) else: from_value = _derive_endpoint_value( @@ -333,6 +364,59 @@ def _persist_tuple( ) +def _persist_tuple_to_store( + *, + store: StorageBackend, + validator: OntologyValidator, + collector: TransformMetadata, + tup: CollectorTuple, + input_entity_type: str, + canonical_input_value: str, +) -> PersistedOSINTTuple: + output_value = canonicalise_value(tup.output_entity_type, tup.output_value) + output_props = _entity_properties(tup.output_entity_type, output_value, tup.output_props) + from_value, to_value = _validate_tuple_endpoints( + tup=tup, + validator=validator, + input_entity_type=input_entity_type, + canonical_input_value=canonical_input_value, + ) + from_props = _endpoint_properties(tup.from_entity_type, from_value, tup, input_entity_type) + to_props = _endpoint_properties(tup.to_entity_type, to_value, tup, input_entity_type) + + output_node_id = store.add_kg_node(tup.output_entity_type, output_value, output_props) + store.add_kg_node(tup.from_entity_type, from_value, from_props) + store.add_kg_node(tup.to_entity_type, to_value, to_props) + + edge_props = dict(tup.edge_props) + edge_props.setdefault("collector", collector.name) + edge_props.setdefault("source", collector.name) + edge_props.setdefault("osint", True) + edge_props.setdefault("edge_type", "osint") + + edge_id = store.add_kg_edge( + tup.from_entity_type, + from_value, + tup.to_entity_type, + to_value, + tup.edge_type, + properties=edge_props, + ) + + return PersistedOSINTTuple( + collector_name=collector.name, + output_entity_type=tup.output_entity_type, + output_value=output_value, + output_node_id=output_node_id, + edge_id=edge_id, + from_entity_type=tup.from_entity_type, + from_value=from_value, + to_entity_type=tup.to_entity_type, + to_value=to_value, + edge_type=tup.edge_type, + ) + + def _validate_tuple_endpoints( *, tup: CollectorTuple, diff --git a/src/zettelforge/osint/ontology.py b/src/zettelforge/osint/ontology.py index e43bb7c..bf4f14f 100644 --- a/src/zettelforge/osint/ontology.py +++ b/src/zettelforge/osint/ontology.py @@ -376,8 +376,8 @@ "cardinality": "many_to_many", }, "received_transaction": { - "from_types": ["Transaction"], - "to_types": ["CryptoWallet"], + "from_types": ["CryptoWallet"], + "to_types": ["Transaction"], "cardinality": "many_to_many", }, "controls_wallet": { diff --git a/src/zettelforge/vector_memory.py b/src/zettelforge/vector_memory.py index 08c613f..dbbd276 100644 --- a/src/zettelforge/vector_memory.py +++ b/src/zettelforge/vector_memory.py @@ -139,11 +139,16 @@ def get_embedding(text: str, model: str | None = None) -> list[float]: cache = _get_embedding_cache() key_model = model or get_embedding_model() key = f"{key_model}:{hashlib.sha256(text.encode()).hexdigest()}" - cached: list[float] | None = cache.get(key) + with _embedding_cache_lock: + cached: list[float] | None = cache.get(key) if cached is not None: return cached embedding = _compute_embedding(text, model) - cache.set(key, embedding) + with _embedding_cache_lock: + cached = cache.get(key) + if cached is not None: + return cached + cache.set(key, embedding) return embedding diff --git a/tests/test_defense_reference_window.py b/tests/test_defense_reference_window.py index 443c583..08b1e10 100644 --- a/tests/test_defense_reference_window.py +++ b/tests/test_defense_reference_window.py @@ -4,6 +4,8 @@ whole domain per ingest was O(n) rows + Pydantic parses. """ +from types import SimpleNamespace + import pytest from zettelforge.config import reload_config @@ -11,7 +13,7 @@ @pytest.fixture(autouse=True) def _no_enrichment(monkeypatch): - monkeypatch.setenv('ZETTELFORGE_ENRICHMENT_ENABLED', 'false') + monkeypatch.setenv("ZETTELFORGE_ENRICHMENT_ENABLED", "false") reload_config() yield reload_config() @@ -20,19 +22,17 @@ def _no_enrichment(monkeypatch): def test_recent_notes_by_domain_orders_and_limits(tmp_path): from zettelforge.memory_manager import MemoryManager - mm = MemoryManager( - jsonl_path=str(tmp_path / 'n.jsonl'), lance_path=str(tmp_path / 'v') - ) + mm = MemoryManager(jsonl_path=str(tmp_path / "n.jsonl"), lance_path=str(tmp_path / "v")) for i in range(5): - mm.remember(f'cti note {i}', source_type='threat_report', - source_ref=f'c{i}', domain='cti') + mm.remember(f"cti note {i}", source_type="threat_report", source_ref=f"c{i}", domain="cti") for i in range(3): - mm.remember(f'general note {i}', source_type='conversation', - source_ref=f'g{i}', domain='general') + mm.remember( + f"general note {i}", source_type="conversation", source_ref=f"g{i}", domain="general" + ) - recent = mm.store.get_recent_notes_by_domain('cti', 3) + recent = mm.store.get_recent_notes_by_domain("cti", 3) assert len(recent) == 3 - assert all(n.metadata.domain == 'cti' for n in recent) + assert all(n.metadata.domain == "cti" for n in recent) timestamps = [n.created_at for n in recent] assert timestamps == sorted(timestamps, reverse=True) @@ -40,21 +40,48 @@ def test_recent_notes_by_domain_orders_and_limits(tmp_path): def test_defense_gate_receives_bounded_reference_set(tmp_path, monkeypatch): from zettelforge.memory_manager import MemoryManager - mm = MemoryManager( - jsonl_path=str(tmp_path / 'n.jsonl'), lance_path=str(tmp_path / 'v') - ) - seen = {'sizes': []} + mm = MemoryManager(jsonl_path=str(tmp_path / "n.jsonl"), lance_path=str(tmp_path / "v")) + seen = {"sizes": []} orig = mm.memory_defense.enforce def recording(note, reference_notes, **kwargs): - seen['sizes'].append(len(reference_notes)) + seen["sizes"].append(len(reference_notes)) return orig(note, reference_notes, **kwargs) - monkeypatch.setattr(mm.memory_defense, 'enforce', recording) + monkeypatch.setattr(mm.memory_defense, "enforce", recording) for i in range(6): - mm.remember(f'note {i}', source_type='conversation', - source_ref=f's{i}', domain='general') + mm.remember(f"note {i}", source_type="conversation", source_ref=f"s{i}", domain="general") # Window is max(200, 4 * max_reference_notes); with 6 notes the gate # sees at most the existing store, never more than the window. - assert seen['sizes'] == [min(i, 200) for i in range(6)] + assert seen["sizes"] == [min(i, 200) for i in range(6)] + + +def test_defense_reference_window_expands_when_recent_vectors_are_invalid(tmp_path, monkeypatch): + from zettelforge.memory_manager import MemoryManager + + mm = MemoryManager(jsonl_path=str(tmp_path / "n.jsonl"), lance_path=str(tmp_path / "v")) + limits = [] + invalid_notes = [ + SimpleNamespace(id=f"i{i}", embedding=SimpleNamespace(vector=[])) for i in range(200) + ] + valid_notes = [ + SimpleNamespace(id=f"v{i}", embedding=SimpleNamespace(vector=[0.1, 0.2])) for i in range(50) + ] + + def recent(_domain, limit): + limits.append(limit) + if limit == 200: + return invalid_notes + return [*invalid_notes, *valid_notes] + + monkeypatch.setattr(mm.store, "get_recent_notes_by_domain", recent) + + refs = mm._memory_defense_reference_notes( + "cti", + max_reference_notes=50, + min_calibration_notes=20, + ) + + assert limits == [200, 400] + assert refs == [*invalid_notes, *valid_notes] diff --git a/tests/test_enrichment_switch.py b/tests/test_enrichment_switch.py index 5df87fe..2af4a0f 100644 --- a/tests/test_enrichment_switch.py +++ b/tests/test_enrichment_switch.py @@ -6,7 +6,7 @@ import pytest -from zettelforge.config import get_config, reload_config +from zettelforge.config import ZettelForgeConfig, _apply_yaml, get_config, reload_config @pytest.fixture(autouse=True) @@ -16,55 +16,61 @@ def _restore_config(): def test_enrichment_config_default_enabled(monkeypatch): - monkeypatch.delenv('ZETTELFORGE_ENRICHMENT_ENABLED', raising=False) + monkeypatch.delenv("ZETTELFORGE_ENRICHMENT_ENABLED", raising=False) cfg = reload_config() assert cfg.enrichment.enabled is True def test_enrichment_env_override(monkeypatch): - monkeypatch.setenv('ZETTELFORGE_ENRICHMENT_ENABLED', 'false') + monkeypatch.setenv("ZETTELFORGE_ENRICHMENT_ENABLED", "false") cfg = reload_config() assert cfg.enrichment.enabled is False +def test_enrichment_yaml_override(): + cfg = ZettelForgeConfig() + _apply_yaml(cfg, {"enrichment": {"enabled": False}}) + assert cfg.enrichment.enabled is False + + def test_remember_dispatches_nothing_when_disabled(tmp_path, monkeypatch): - monkeypatch.setenv('ZETTELFORGE_ENRICHMENT_ENABLED', 'false') + monkeypatch.setenv("ZETTELFORGE_ENRICHMENT_ENABLED", "false") reload_config() from zettelforge.memory_manager import MemoryManager mm = MemoryManager( - jsonl_path=str(tmp_path / 'notes.jsonl'), - lance_path=str(tmp_path / 'vec'), + jsonl_path=str(tmp_path / "notes.jsonl"), + lance_path=str(tmp_path / "vec"), ) for i in range(4): mm.remember( - f'APT28 used DROPBEAR in campaign {i}.', - source_type='threat_report', - source_ref=f'r{i}', - domain='cti', + f"APT28 used DROPBEAR in campaign {i}.", + source_type="threat_report", + source_ref=f"r{i}", + domain="cti", ) assert mm._enrichment_queue.qsize() == 0 assert len(mm._pending_enrichment) == 0 def test_remember_dispatches_jobs_when_enabled(tmp_path, monkeypatch): - monkeypatch.delenv('ZETTELFORGE_ENRICHMENT_ENABLED', raising=False) + monkeypatch.delenv("ZETTELFORGE_ENRICHMENT_ENABLED", raising=False) reload_config() assert get_config().enrichment.enabled is True from zettelforge.memory_manager import MemoryManager mm = MemoryManager( - jsonl_path=str(tmp_path / 'notes.jsonl'), - lance_path=str(tmp_path / 'vec'), + jsonl_path=str(tmp_path / "notes.jsonl"), + lance_path=str(tmp_path / "vec"), ) # Count dispatches without letting the background worker consume them # (avoids racing the worker and avoids real LLM calls). dispatched = [] - monkeypatch.setattr(mm._enrichment_queue, 'put_nowait', dispatched.append) + monkeypatch.setattr(mm._enrichment_queue, "put_nowait", dispatched.append) mm.remember( - 'APT28 used DROPBEAR in a campaign.', - source_type='threat_report', - source_ref='r0', - domain='cti', + "APT28 used DROPBEAR in a campaign.", + source_type="threat_report", + source_ref="r0", + domain="cti", ) assert len(dispatched) > 0 diff --git a/tests/test_graph_scoping.py b/tests/test_graph_scoping.py index 1079f6c..1815a4f 100644 --- a/tests/test_graph_scoping.py +++ b/tests/test_graph_scoping.py @@ -14,7 +14,7 @@ @pytest.fixture(autouse=True) def _no_enrichment(monkeypatch): - monkeypatch.setenv('ZETTELFORGE_ENRICHMENT_ENABLED', 'false') + monkeypatch.setenv("ZETTELFORGE_ENRICHMENT_ENABLED", "false") reload_config() yield reload_config() @@ -25,38 +25,38 @@ def _manager(tmp_path, sub): d = tmp_path / sub d.mkdir() - return MemoryManager(jsonl_path=str(d / 'notes.jsonl'), lance_path=str(d / 'vec')) + return MemoryManager(jsonl_path=str(d / "notes.jsonl"), lance_path=str(d / "vec")) def test_backend_get_kg_edges_from(tmp_path): - mm = _manager(tmp_path, 'a') - mm.store.add_kg_edge('actor', 'APT28', 'note', 'n1', 'MENTIONED_IN') - node = mm.store.get_kg_node('actor', 'APT28') + mm = _manager(tmp_path, "a") + mm.store.add_kg_edge("actor", "APT28", "note", "n1", "MENTIONED_IN") + node = mm.store.get_kg_node("actor", "APT28") assert node is not None - edges = mm.store.get_kg_edges_from(node['node_id']) + edges = mm.store.get_kg_edges_from(node["node_id"]) assert len(edges) == 1 - target = mm.store.get_kg_node_by_id(edges[0]['to_node_id']) - assert target['entity_type'] == 'note' - assert target['entity_value'] == 'n1' + target = mm.store.get_kg_node_by_id(edges[0]["to_node_id"]) + assert target["entity_type"] == "note" + assert target["entity_value"] == "n1" def test_graph_retriever_sees_own_store_writes(tmp_path): from zettelforge.graph_retriever import GraphRetriever, StoreGraphSource - mm = _manager(tmp_path, 'a') + mm = _manager(tmp_path, "a") note, _ = mm.remember( - 'APT28 used the DROPBEAR backdoor to target NATO members.', - source_type='threat_report', - source_ref='r1', - domain='cti', + "APT28 used the DROPBEAR backdoor to target NATO members.", + source_type="threat_report", + source_ref="r1", + domain="cti", ) # Mirror _recall_inner's entity resolution for the query - query_entities = mm.indexer.extractor.extract_all('What does APT28 use?') + query_entities = mm.indexer.extractor.extract_all("What does APT28 use?") resolved = { etype: [mm.resolver.resolve(etype, e) for e in elist] for etype, elist in query_entities.items() } - assert any(resolved.values()), 'extractor should find APT28 in the query' + assert any(resolved.values()), "extractor should find APT28 in the query" retriever = GraphRetriever(StoreGraphSource(mm.store)) results = retriever.retrieve_note_ids(query_entities=resolved, max_depth=2) @@ -64,65 +64,84 @@ def test_graph_retriever_sees_own_store_writes(tmp_path): def test_recall_graph_isolated_between_stores(tmp_path): - mm_a = _manager(tmp_path, 'a') + mm_a = _manager(tmp_path, "a") mm_a.remember( - 'APT28 used the DROPBEAR backdoor to target NATO members.', - source_type='threat_report', - source_ref='r1', - domain='cti', + "APT28 used the DROPBEAR backdoor to target NATO members.", + source_type="threat_report", + source_ref="r1", + domain="cti", ) - mm_b = _manager(tmp_path, 'b') + mm_b = _manager(tmp_path, "b") mm_b.remember( - 'The weather in Toronto stayed mild through October.', - source_type='conversation', - source_ref='s1', - domain='general', + "The weather in Toronto stayed mild through October.", + source_type="conversation", + source_ref="s1", + domain="general", ) - lookups = {'n': 0} + lookups = {"n": 0} orig = mm_b.store.get_note_by_id def counting(nid): - lookups['n'] += 1 + lookups["n"] += 1 return orig(nid) mm_b.store.get_note_by_id = counting - results = mm_b.recall('What does APT28 use?', k=10, exclude_superseded=False) + results = mm_b.recall("What does APT28 use?", k=10, exclude_superseded=False) # Store B has one note; the graph stage must not import thousands of # phantom candidates from store A or the global KG. - assert lookups['n'] <= 10 - assert all('APT28' not in n.content.raw for n in results) + assert lookups["n"] <= 10 + assert all("APT28" not in n.content.raw for n in results) def test_high_fanout_entities_skip_graph_stage(tmp_path): """Entities mapping to a large share of the corpus carry no signal (conversational speaker names): they must not flood blended recall.""" - from zettelforge.graph_retriever import GraphRetriever, StoreGraphSource - - mm = _manager(tmp_path, 'fanout') + mm = _manager(tmp_path, "fanout") for i in range(12): mm.remember( - f'Melanie: session {i} chat about topic {i} with details.', - source_type='dialogue', - source_ref=f's{i}', - domain='locomo', + f"Melanie: session {i} chat about topic {i} with details.", + source_type="dialogue", + source_ref=f"s{i}", + domain="locomo", ) mm.remember( - 'Melanie: I tried the DROPBEAR exploit demo today.', - source_type='dialogue', - source_ref='s99', - domain='locomo', + "Melanie: I tried the DROPBEAR exploit demo today.", + source_type="dialogue", + source_ref="s99", + domain="locomo", ) filtered = mm._filter_low_signal_entities( - {'person': ['melanie'], 'tool': ['dropbear']}, max_fanout=5 + {"person": ["melanie"], "tool": ["dropbear"]}, max_fanout=5 ) - assert filtered.get('person', []) == [] - assert filtered.get('tool') == ['dropbear'] + assert filtered.get("person", []) == [] + assert filtered.get("tool") == ["dropbear"] # End to end: recall must not return only melanie-flooded results when # the query names a discriminative entity. - results = mm.recall('What is the DROPBEAR exploit?', k=5, exclude_superseded=False) - assert any('DROPBEAR' in n.content.raw for n in results) + results = mm.recall("What is the DROPBEAR exploit?", k=5, exclude_superseded=False) + assert any("DROPBEAR" in n.content.raw for n in results) + + +def test_low_signal_filter_counts_note_fanout_only(tmp_path): + mm = _manager(tmp_path, "osint-fanout") + wallet = "0x" + "ab" * 20 + for i in range(12): + mm.store.add_kg_edge( + "CryptoWallet", + wallet, + "Transaction", + f"0x{i:064x}", + "sent_transaction", + properties={"edge_type": "osint"}, + ) + + filtered = mm._filter_low_signal_entities( + {"CryptoWallet": [wallet]}, + max_fanout=5, + ) + + assert filtered == {"CryptoWallet": [wallet]} diff --git a/tests/test_osint_age119_gap_types.py b/tests/test_osint_age119_gap_types.py index 60d3b0d..10beb8e 100644 --- a/tests/test_osint_age119_gap_types.py +++ b/tests/test_osint_age119_gap_types.py @@ -43,7 +43,7 @@ def test_gap_entities_registered() -> None: ("from_type", "edge", "to_type"), [ ("CryptoWallet", "sent_transaction", "Transaction"), - ("Transaction", "received_transaction", "CryptoWallet"), + ("CryptoWallet", "received_transaction", "Transaction"), ("Person", "controls_wallet", "CryptoWallet"), ("EmailAddress", "has_account", "SocialAccount"), ], diff --git a/tests/test_osint_enrichers_age120.py b/tests/test_osint_enrichers_age120.py index 35072f2..ee07f4a 100644 --- a/tests/test_osint_enrichers_age120.py +++ b/tests/test_osint_enrichers_age120.py @@ -9,12 +9,15 @@ from __future__ import annotations -from types import SimpleNamespace +import sys +from types import ModuleType, SimpleNamespace from unittest.mock import MagicMock, patch +import httpx import pytest from zettelforge import osint as _osint # noqa: F401 — registers collectors +from zettelforge.graph_retriever import StoreGraphSource from zettelforge.knowledge_graph import KnowledgeGraph from zettelforge.ontology import ENTITY_TYPES, OntologyValidator from zettelforge.osint.collectors.breach import hibp_collector @@ -24,7 +27,6 @@ from zettelforge.osint.entity_resolver import canonicalise_value from zettelforge.osint.executor import SUPPORTED_SEED_TYPES, run_osint_collection - # --------------------------------------------------------------------------- # Ontology additions # --------------------------------------------------------------------------- @@ -41,7 +43,7 @@ def test_breach_entity_registered() -> None: ("EmailAddress", "appeared_in_breach", "Breach"), ("Alias", "has_account", "SocialAccount"), ("CryptoWallet", "sent_transaction", "Transaction"), - ("Transaction", "received_transaction", "CryptoWallet"), + ("CryptoWallet", "received_transaction", "Transaction"), ], ) def test_new_edges_validate(from_type: str, edge: str, to_type: str) -> None: @@ -160,6 +162,53 @@ def test_maigret_rejects_non_alias() -> None: assert maigret_collector.collect("EmailAddress", "x@y.com") == [] +def test_maigret_live_path_loads_settings_and_passes_logger( + monkeypatch: pytest.MonkeyPatch, +) -> None: + calls: dict[str, object] = {} + + class FakeSettings: + sites_db_path = "" + + def load(self) -> None: + calls["settings_loaded"] = True + self.sites_db_path = "maigret-sites.json" + + class FakeDatabase: + def load_from_path(self, path: str): + calls["sites_db_path"] = path + return self + + def ranked_sites_dict(self, top: int): + calls["top"] = top + return {"GitHub": object()} + + async def fake_search(**kwargs): + calls["logger"] = kwargs.get("logger") + return { + "GitHub": { + "status": SimpleNamespace(status="CLAIMED"), + "url_user": "https://github.com/bobz", + } + } + + fake_maigret = ModuleType("maigret") + fake_maigret.settings = SimpleNamespace(Settings=FakeSettings) + fake_maigret.search = fake_search + fake_sites = ModuleType("maigret.sites") + fake_sites.MaigretDatabase = FakeDatabase + monkeypatch.setitem(sys.modules, "maigret", fake_maigret) + monkeypatch.setitem(sys.modules, "maigret.sites", fake_sites) + + rows = maigret_collector._search_username("bobz") + + assert rows == [{"platform": "GitHub", "url": "https://github.com/bobz"}] + assert calls["settings_loaded"] is True + assert calls["sites_db_path"] == "maigret-sites.json" + assert calls["top"] == maigret_collector.MAX_ACCOUNTS + assert calls["logger"] is not None + + # --------------------------------------------------------------------------- # HIBP — email -> Breach # --------------------------------------------------------------------------- @@ -195,6 +244,43 @@ def test_hibp_fail_closed_without_key(monkeypatch: pytest.MonkeyPatch) -> None: assert hibp_collector.collect("EmailAddress", "x@y.com") == [] +def test_hibp_logs_redacted_email_reference() -> None: + class FakeResponse: + status_code = 200 + + def raise_for_status(self) -> None: + return None + + def json(self): + return {"unexpected": "shape"} + + class FakeClient: + def __init__(self, **kwargs): + self.kwargs = kwargs + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return None + + def get(self, *args, **kwargs): + return FakeResponse() + + with ( + patch.object(hibp_collector.httpx, "Client", FakeClient), + patch.object(hibp_collector._logger, "warning") as warning, + ): + assert hibp_collector._fetch_breaches("victim@example.com", "test-key") == [] + + assert warning.call_args.args == ("hibp_collector_unexpected_shape",) + assert "email" not in warning.call_args.kwargs + assert warning.call_args.kwargs["email_ref"] == hibp_collector._email_log_ref( + "victim@example.com" + ) + assert "victim@example.com" not in str(warning.call_args) + + # --------------------------------------------------------------------------- # Wallet — CryptoWallet -> Transaction # --------------------------------------------------------------------------- @@ -222,6 +308,70 @@ def test_wallet_emits_sent_and_received(monkeypatch: pytest.MonkeyPatch) -> None assert sent.from_entity_type == "CryptoWallet" assert sent.to_entity_type == "Transaction" assert sent.output_value == "0xaaa" + received = next(t for t in out if t.edge_type == "received_transaction") + assert received.from_entity_type == "CryptoWallet" + assert received.to_entity_type == "Transaction" + assert received.output_value == "0xbbb" + + +def test_wallet_fetch_uses_etherscan_v2_chainid() -> None: + calls: dict[str, object] = {} + + class FakeResponse: + def raise_for_status(self) -> None: + return None + + def json(self): + return {"result": []} + + class FakeClient: + def __init__(self, **kwargs): + self.kwargs = kwargs + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return None + + def get(self, url: str, *, params: dict[str, str]): + calls["url"] = url + calls["params"] = params + return FakeResponse() + + with patch.object(wallet_collector.httpx, "Client", FakeClient): + assert wallet_collector._fetch_transactions(_wallet(), "test-key") == [] + + assert calls["url"] == "https://api.etherscan.io/v2/api" + params = calls["params"] + assert isinstance(params, dict) + assert params["chainid"] == "1" + assert params["apikey"] == "test-key" + + +def test_wallet_http_error_log_does_not_leak_api_key() -> None: + class RaisingClient: + def __init__(self, **kwargs): + self.kwargs = kwargs + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return None + + def get(self, url: str, *, params: dict[str, str]): + request = httpx.Request("GET", f"{url}?apikey={params['apikey']}") + raise httpx.RequestError("request failed with test-key", request=request) + + with ( + patch.object(wallet_collector.httpx, "Client", RaisingClient), + patch.object(wallet_collector._logger, "warning") as warning, + ): + assert wallet_collector._fetch_transactions(_wallet(), "test-key") == [] + + assert warning.call_args.args == ("wallet_collector_http_error",) + assert "test-key" not in str(warning.call_args) def test_wallet_rejects_non_evm(monkeypatch: pytest.MonkeyPatch) -> None: @@ -261,6 +411,44 @@ def test_executor_persists_wallet_transactions(tmp_path, monkeypatch: pytest.Mon assert kg.get_node("CryptoWallet", wallet) is not None +def test_executor_persists_osint_to_scoped_store(tmp_path, monkeypatch: pytest.MonkeyPatch) -> None: + from zettelforge.sqlite_backend import SQLiteBackend + + monkeypatch.setenv("ETHERSCAN_API_KEY", "test-key") + store = SQLiteBackend(data_dir=tmp_path) + store.initialize() + wallet = _wallet() + other = "0x" + "cd" * 20 + txs = [ + {"hash": "0xAAA", "from": wallet, "to": other, "value": "1"}, + {"hash": "0xBBB", "from": other, "to": wallet, "value": "2"}, + ] + + try: + with patch.object(wallet_collector, "_fetch_transactions", return_value=txs): + result = run_osint_collection( + "CryptoWallet", + wallet, + store=store, + collector_names=["wallet_collector"], + ) + + assert result.error_count == 0 + assert result.persisted_count == 2 + source = StoreGraphSource(store) + wallet_node = source.get_node("CryptoWallet", wallet) + assert wallet_node is not None + outgoing = source.get_outgoing_edges(wallet_node["node_id"]) + assert {edge["relationship"] for edge in outgoing} == { + "sent_transaction", + "received_transaction", + } + targets = {source.get_node_by_id(edge["to_node_id"])["entity_value"] for edge in outgoing} + assert targets == {"0xaaa", "0xbbb"} + finally: + store.close() + + def test_executor_accepts_email_seed_without_keys(tmp_path) -> None: # No API keys set: every EmailAddress collector fails closed, but the seed # still validates and persists and the run does not raise. From fe433fe87e0b7085c54cc1e3e314d474dc9d3f67 Mon Sep 17 00:00:00 2001 From: Patrick Roland Date: Tue, 16 Jun 2026 18:13:47 -0500 Subject: [PATCH 24/24] ci: cache fastembed model across test matrix Co-Authored-By: Paperclip --- .github/workflows/ci.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4a99be0..513750a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -76,6 +76,9 @@ jobs: needs: lint strategy: fail-fast: false + # The fastembed model download is shared across Python versions. Running + # these jobs in parallel can double-hit HuggingFace and trigger 429s. + max-parallel: 1 matrix: python-version: ['3.12', '3.13'] @@ -87,6 +90,14 @@ jobs: with: python-version: ${{ matrix.python-version }} + - name: Cache fastembed model + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 + with: + path: | + ~/.cache/fastembed + ~/.cache/huggingface + key: fastembed-nomic-embed-text-v1.5-Q-${{ runner.os }} + - name: Install dependencies run: | python -m pip install --upgrade pip