[
  {
    "apiVersion": "anthropic.com/v1",
    "kind": "AlloyDbTableSemantics",
    "metadata": {
      "name": "dim_memory",
      "labels": {
        "lane": "citation-memory",
        "shape": "managed-agents memory (path + content)"
      }
    },
    "spec": {
      "version": "1.2.0",
      "schema": "dw",
      "description": "Citation memories in the managed-agents memory shape (path + content), one current row per memory path. SCD II — dreams-curated rewrites of a memory's content close the old row so curation history is auditable. Source memories are derived from csl_items; curated ones are written by the dreams consolidation job (B3).",
      "columns": [
        {
          "name": "memory_path",
          "sql_type": "text",
          "description": "memory path (/citations/<csl-id-slug>.md), natural key",
          "kind": "dimension"
        },
        {
          "name": "content",
          "sql_type": "text",
          "description": "markdown memory body (citation summary + CSL-JSON block)",
          "kind": "dimension"
        },
        {
          "name": "csl_id",
          "sql_type": "text",
          "description": "originating csl_items.id; NULL for synthesized memories",
          "kind": "dimension"
        },
        {
          "name": "curation_source",
          "sql_type": "text",
          "description": "ingest | dreams — who wrote this version of the memory",
          "kind": "dimension"
        },
        {
          "name": "row_effective_from",
          "sql_type": "timestamptz",
          "description": "SCD II validity start",
          "kind": "time_dimension"
        }
      ],
      "codeowners": [
        "subagentceo",
        "alex@opensubagents.com"
      ],
      "sla_policy": "curated within 24h of dreams tick",
      "data_start_date": "2026-06-09",
      "load_type": "incremental",
      "allowed_operations": [
        "read",
        "scd2_rewrite"
      ]
    },
    "table_kind": "dim",
    "scd_type": 2
  },
  {
    "apiVersion": "anthropic.com/v1",
    "kind": "AlloyDbTableSemantics",
    "metadata": {
      "name": "dim_research_doc",
      "labels": {
        "lane": "economic-research",
        "source": "vendor/anthropic-sitemap"
      }
    },
    "spec": {
      "version": "1.2.0",
      "schema": "dw",
      "description": "Anthropic research documents mirrored under vendor/anthropic-sitemap/{research,_pdfs}, one current row per doc. SCD II — title/abstract/URL changes close the old row and open a new one so citation provenance is auditable over crawls.",
      "columns": [
        {
          "name": "csl_id",
          "sql_type": "text",
          "description": "CSL item id (anthropic-sitemap:research:<slug>), natural key",
          "kind": "dimension"
        },
        {
          "name": "title",
          "sql_type": "text",
          "description": "document H1 title at crawl time",
          "kind": "dimension"
        },
        {
          "name": "doc_type",
          "sql_type": "text",
          "description": "CSL type — article (research page) or document (_pdfs mirror)",
          "kind": "dimension"
        },
        {
          "name": "url",
          "sql_type": "text",
          "description": "live anthropic.com URL; NULL for hash-named pdf mirrors",
          "kind": "dimension"
        },
        {
          "name": "research_team",
          "sql_type": "text",
          "description": "owning team page (economic-research first; null when unattributed)",
          "kind": "dimension"
        },
        {
          "name": "issued_date",
          "sql_type": "date",
          "description": "publication date parsed from the page (NULL when undated)",
          "kind": "time_dimension"
        },
        {
          "name": "row_effective_from",
          "sql_type": "timestamptz",
          "description": "SCD II validity start",
          "kind": "time_dimension"
        }
      ],
      "codeowners": [
        "subagentceo",
        "alex@opensubagents.com"
      ],
      "sla_policy": "refresh within 24h of vendor crawl",
      "data_start_date": "2026-06-09",
      "load_type": "incremental",
      "allowed_operations": [
        "read",
        "scd2_rewrite"
      ]
    },
    "table_kind": "dim",
    "scd_type": 2
  },
  {
    "apiVersion": "anthropic.com/v1",
    "kind": "AlloyDbTableSemantics",
    "metadata": {
      "name": "dim_vendor",
      "labels": {
        "lane": "full-corpus",
        "source": "src/lib/vendor-corpus.ts"
      }
    },
    "spec": {
      "version": "1.2.0",
      "schema": "dw",
      "description": "One row per mirrored vendor under vendor/. SCD I — corpus counts and crawl freshness overwrite in place; history lives in fact_vendor_crawl, not here.",
      "columns": [
        {
          "name": "vendor_name",
          "sql_type": "text",
          "description": "vendor directory name (natural key)",
          "kind": "dimension"
        },
        {
          "name": "host",
          "sql_type": "text",
          "description": "reconstructed doc host; NULL when unmapped (never guessed)",
          "kind": "dimension"
        },
        {
          "name": "doc_count",
          "sql_type": "bigint",
          "description": "tracked markdown docs in the current corpus",
          "kind": "measure",
          "measure_type": "count"
        },
        {
          "name": "last_loaded_at",
          "sql_type": "timestamptz",
          "description": "last warehouse load touching this vendor",
          "kind": "time_dimension"
        }
      ],
      "codeowners": [
        "subagentceo",
        "alex@opensubagents.com"
      ],
      "sla_policy": "refresh on every dw:load",
      "data_start_date": "2026-06-09",
      "load_type": "full",
      "allowed_operations": [
        "read",
        "scd1_overwrite"
      ]
    },
    "table_kind": "dim",
    "scd_type": 1
  },
  {
    "apiVersion": "anthropic.com/v1",
    "kind": "AlloyDbTableSemantics",
    "metadata": {
      "name": "events_cache_promotion",
      "labels": {
        "lane": "semantic-cache",
        "source": "src/cache/durable-store.ts"
      }
    },
    "spec": {
      "version": "1.2.0",
      "schema": "dw",
      "description": "Append-only audit log of volatile→durable cache promotions. Never UPDATE, never DELETE — the promotion history is the record of what the agent fleet found durable-worthy and when.",
      "columns": [
        {
          "name": "cache_key",
          "sql_type": "text",
          "description": "semantic_cache key that was promoted",
          "kind": "dimension"
        },
        {
          "name": "source_path",
          "sql_type": "text",
          "description": "originating repo path; NULL when synthesized",
          "kind": "dimension"
        },
        {
          "name": "agent_id",
          "sql_type": "text",
          "description": "promoting agent (session or coworker id)",
          "kind": "dimension"
        },
        {
          "name": "occurred_at",
          "sql_type": "timestamptz",
          "description": "promotion wall-clock time",
          "kind": "time_dimension"
        },
        {
          "name": "hits",
          "sql_type": "bigint",
          "description": "volatile hits carried at promotion time",
          "kind": "measure",
          "measure_type": "sum"
        }
      ],
      "codeowners": [
        "subagentceo",
        "alex@opensubagents.com"
      ],
      "sla_policy": "append at promotion time; never mutated",
      "data_start_date": "2026-06-09",
      "load_type": "append_only",
      "allowed_operations": [
        "read",
        "append"
      ]
    },
    "table_kind": "events",
    "grain": "one row per DurableStore.persistVolatile promotion (cache_key × occurred_at)"
  },
  {
    "apiVersion": "anthropic.com/v1",
    "kind": "AlloyDbTableSemantics",
    "metadata": {
      "name": "fact_doc_ingest",
      "labels": {
        "lane": "economic-research",
        "feeds": "rpt_citations_by_year"
      }
    },
    "spec": {
      "version": "1.2.0",
      "schema": "dw",
      "description": "Transactional ingest facts from scripts/ingest-citations.ts and scripts/warm-semantic-cache.ts. Each pipeline run appends one row per document touched, carrying size and extraction measures.",
      "columns": [
        {
          "name": "doc_sk",
          "sql_type": "bigint",
          "description": "FK → dw.dim_research_doc.surrogate_key",
          "kind": "dimension"
        },
        {
          "name": "date_key",
          "sql_type": "integer",
          "description": "FK → dw.dim_date.date_key (YYYYMMDD of the ingest run)",
          "kind": "time_dimension"
        },
        {
          "name": "ingest_run_at",
          "sql_type": "timestamptz",
          "description": "pipeline run wall-clock start",
          "kind": "time_dimension"
        },
        {
          "name": "markdown_bytes",
          "sql_type": "bigint",
          "description": "size of the mirrored markdown body at ingest",
          "kind": "measure",
          "measure_type": "sum"
        },
        {
          "name": "fields_extracted",
          "sql_type": "bigint",
          "description": "count of CSL fields populated (title/abstract/issued/URL)",
          "kind": "measure",
          "measure_type": "sum"
        },
        {
          "name": "extraction_completeness",
          "sql_type": "numeric",
          "description": "fields_extracted over the 4 extractable CSL fields",
          "kind": "measure",
          "measure_type": "ratio",
          "calculation": {
            "expression": "fields_extracted::numeric / 4",
            "inherits": [
              "fact_doc_ingest.fields_extracted"
            ],
            "result_type": "numeric"
          }
        }
      ],
      "codeowners": [
        "subagentceo",
        "alex@opensubagents.com"
      ],
      "sla_policy": "append on every ingest run",
      "data_start_date": "2026-06-09",
      "load_type": "append_only",
      "allowed_operations": [
        "read",
        "append"
      ]
    },
    "table_kind": "fact",
    "grain": "one row per research document per ingest run (csl_id × ingest_run_at)"
  },
  {
    "apiVersion": "anthropic.com/v1",
    "kind": "AlloyDbTableSemantics",
    "metadata": {
      "name": "fact_memory_access",
      "labels": {
        "lane": "citation-memory",
        "feeds": "dreams consolidation (B3)"
      }
    },
    "spec": {
      "version": "1.2.0",
      "schema": "dw",
      "description": "Transactional log of research-agent reads of citation memories. The dreams job (B3) aggregates access frequency to decide which memories to consolidate, expand, or retire.",
      "columns": [
        {
          "name": "memory_sk",
          "sql_type": "bigint",
          "description": "FK → dw.dim_memory.surrogate_key",
          "kind": "dimension"
        },
        {
          "name": "agent_id",
          "sql_type": "text",
          "description": "reading agent (session or coworker id)",
          "kind": "dimension"
        },
        {
          "name": "date_key",
          "sql_type": "integer",
          "description": "FK-style YYYYMMDD of the access",
          "kind": "time_dimension"
        },
        {
          "name": "accessed_at",
          "sql_type": "timestamptz",
          "description": "access wall-clock time",
          "kind": "time_dimension"
        },
        {
          "name": "access_count",
          "sql_type": "bigint",
          "description": "accesses recorded in this row (batched writers may roll up)",
          "kind": "measure",
          "measure_type": "sum"
        }
      ],
      "codeowners": [
        "subagentceo",
        "alex@opensubagents.com"
      ],
      "sla_policy": "append within 5m of agent read",
      "data_start_date": "2026-06-09",
      "load_type": "append_only",
      "allowed_operations": [
        "read",
        "append"
      ]
    },
    "table_kind": "fact",
    "grain": "one row per memory access (memory_sk × accessed_at)"
  },
  {
    "apiVersion": "anthropic.com/v1",
    "kind": "AlloyDbTableSemantics",
    "metadata": {
      "name": "fact_vendor_crawl",
      "labels": {
        "lane": "full-corpus",
        "feeds": "B6 visualizations (year × vendor)"
      }
    },
    "spec": {
      "version": "1.2.0",
      "schema": "dw",
      "description": "Transactional load facts per vendor per run — corpus size and citation extraction quality over time across all 34 mirrored vendors.",
      "columns": [
        {
          "name": "vendor_name",
          "sql_type": "text",
          "description": "FK-style → dw.dim_vendor.vendor_name",
          "kind": "dimension"
        },
        {
          "name": "date_key",
          "sql_type": "integer",
          "description": "YYYYMMDD of the load run",
          "kind": "time_dimension"
        },
        {
          "name": "loaded_at",
          "sql_type": "timestamptz",
          "description": "load run wall-clock start",
          "kind": "time_dimension"
        },
        {
          "name": "doc_count",
          "sql_type": "bigint",
          "description": "docs in this vendor's corpus at load time",
          "kind": "measure",
          "measure_type": "count"
        },
        {
          "name": "dated_count",
          "sql_type": "bigint",
          "description": "docs with a parsed issued date",
          "kind": "measure",
          "measure_type": "count"
        },
        {
          "name": "dated_share",
          "sql_type": "numeric",
          "description": "dated_count over doc_count",
          "kind": "measure",
          "measure_type": "ratio",
          "calculation": {
            "expression": "dated_count::numeric / NULLIF(doc_count, 0)",
            "inherits": [
              "fact_vendor_crawl.dated_count",
              "fact_vendor_crawl.doc_count"
            ],
            "result_type": "numeric"
          }
        }
      ],
      "codeowners": [
        "subagentceo",
        "alex@opensubagents.com"
      ],
      "sla_policy": "append on every dw:load",
      "data_start_date": "2026-06-09",
      "load_type": "append_only",
      "allowed_operations": [
        "read",
        "append"
      ]
    },
    "table_kind": "fact",
    "grain": "one row per vendor per warehouse load (vendor_name × loaded_at)"
  },
  {
    "apiVersion": "anthropic.com/v1",
    "kind": "AlloyDbTableSemantics",
    "metadata": {
      "name": "rpt_citations_by_team",
      "labels": {
        "lane": "economic-research",
        "consumer": "frontend citations panel"
      }
    },
    "spec": {
      "version": "1.0.0",
      "schema": "dw",
      "description": "Predefined aggregation — citable research output per Anthropic research team page (economic-research first), refreshed after each ingest run.",
      "columns": [
        {
          "name": "research_team",
          "sql_type": "text",
          "description": "team slug from dim_research_doc.research_team",
          "kind": "dimension"
        },
        {
          "name": "doc_count",
          "sql_type": "bigint",
          "description": "distinct current docs attributed to the team",
          "kind": "measure",
          "measure_type": "count"
        }
      ],
      "codeowners": [
        "subagentceo",
        "alex@opensubagents.com"
      ],
      "sla_policy": "full refresh on every dw:load",
      "data_start_date": "2026-06-10",
      "load_type": "full",
      "allowed_operations": [
        "read",
        "full_refresh"
      ]
    },
    "table_kind": "rpt",
    "aggregates": "fact_doc_ingest"
  },
  {
    "apiVersion": "anthropic.com/v1",
    "kind": "AlloyDbTableSemantics",
    "metadata": {
      "name": "rpt_citations_by_year",
      "labels": {
        "lane": "economic-research",
        "consumer": "frontend citations panel"
      }
    },
    "spec": {
      "version": "1.2.0",
      "schema": "dw",
      "description": "Predefined aggregation — citable research output per publication year, refreshed after each ingest run. Backs the researcher-facing frontend so the SPA never aggregates raw facts client-side.",
      "columns": [
        {
          "name": "issued_year",
          "sql_type": "smallint",
          "description": "publication year from dim_research_doc.issued_date",
          "kind": "time_dimension"
        },
        {
          "name": "doc_count",
          "sql_type": "bigint",
          "description": "distinct research docs published in the year",
          "kind": "measure",
          "measure_type": "count"
        },
        {
          "name": "dated_share",
          "sql_type": "numeric",
          "description": "share of corpus docs carrying a parseable issued date",
          "kind": "measure",
          "measure_type": "ratio",
          "calculation": {
            "expression": "doc_count::numeric / NULLIF(sum(doc_count) OVER (), 0)",
            "inherits": [
              "rpt_citations_by_year.doc_count"
            ],
            "result_type": "numeric"
          }
        }
      ],
      "codeowners": [
        "subagentceo",
        "alex@opensubagents.com"
      ],
      "sla_policy": "full refresh on every dw:load",
      "data_start_date": "2026-06-09",
      "load_type": "full",
      "allowed_operations": [
        "read",
        "full_refresh"
      ]
    },
    "table_kind": "rpt",
    "aggregates": "fact_doc_ingest"
  },
  {
    "apiVersion": "anthropic.com/v1",
    "kind": "AlloyDbTableSemantics",
    "metadata": {
      "name": "rpt_vendor_freshness",
      "labels": {
        "lane": "full-corpus",
        "consumer": "frontend service-status panel"
      }
    },
    "spec": {
      "version": "1.0.0",
      "schema": "dw",
      "description": "Predefined aggregation — last load time and run count per vendor, the freshness signal paying crawlers and the 24/7 loop key off.",
      "columns": [
        {
          "name": "vendor_name",
          "sql_type": "text",
          "description": "vendor directory name",
          "kind": "dimension"
        },
        {
          "name": "last_loaded_at",
          "sql_type": "timestamptz",
          "description": "most recent fact_vendor_crawl.loaded_at",
          "kind": "time_dimension"
        },
        {
          "name": "load_runs",
          "sql_type": "bigint",
          "description": "total load runs recorded for the vendor",
          "kind": "measure",
          "measure_type": "count"
        },
        {
          "name": "latest_doc_count",
          "sql_type": "bigint",
          "description": "doc_count at the most recent load",
          "kind": "measure",
          "measure_type": "max"
        }
      ],
      "codeowners": [
        "subagentceo",
        "alex@opensubagents.com"
      ],
      "sla_policy": "full refresh on every dw:load",
      "data_start_date": "2026-06-10",
      "load_type": "full",
      "allowed_operations": [
        "read",
        "full_refresh"
      ]
    },
    "table_kind": "rpt",
    "aggregates": "fact_vendor_crawl"
  }
]
