{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://iris.athenainsights.org/dev/schema.json",
  "title": "Iris",
  "description": "A schema for publishing aggregated public opinion survey toplines and crosstabs: study metadata, a catalog of questions expressed as dimensional constructs, one or more waves of fielding, and the marginal distributions (toplines and subgroup crosstabs) those waves produced.",
  "type": "object",
  "required": ["schema_version", "study_id", "publisher", "questions", "waves"],
  "additionalProperties": false,
  "properties": {
    "schema_version": {
      "type": "string",
      "const": "dev",
      "description": "Iris version this instance follows. \"dev\" is the active development channel — the schema shape may change without notice. Real semver versions (\"0.1.0\", \"1.0.0\", …) ship at stable, immutable URLs under /x.y.z/."
    },
    "study_id": {
      "type": "string",
      "pattern": "^[a-z0-9][a-z0-9\\-_]*$",
      "description": "Short stable identifier for the study. Lowercase ASCII with dashes or underscores, e.g. \"pew-atp-w173\" or \"gss-2024\". Used in filenames, URLs, and cross-references."
    },
    "title": {
      "type": "string",
      "description": "Human-readable study title, e.g. \"Pew ATP Wave 173 — AI and its impact\". Shown in citations and dashboards."
    },
    "publisher": {
      "$ref": "#/$defs/Organization",
      "description": "The organization under whose name the study is published. For jointly branded releases (e.g. \"AP-NORC Center\"), use the joint brand as publisher and list the parent organizations in contributors."
    },
    "contributors": {
      "type": "array",
      "items": { "$ref": "#/$defs/Organization" },
      "description": "Other organizations associated with the study — fielders, funders, commissioners, partners. Roles (if given) describe what each organization contributed."
    },
    "methodology_notes": {
      "type": "string",
      "description": "Free-text notes about how the study was run: panel recruitment, mode mix, weighting targets, oversamples — anything a methodologist would want to know but that doesn't fit the structured fields below."
    },
    "source_urls": {
      "type": "array",
      "items": { "type": "string", "format": "uri" },
      "description": "Links to the authoritative published artifacts for this study — topline PDFs, methodology pages, microdata releases. Include at least one so consumers can trace numbers back to the source."
    },
    "subgroup_schema": {
      "$ref": "#/$defs/SubgroupSchema",
      "description": "Catalog of demographic variables used anywhere in this study's crosstabs. Declared once here; referenced from each wave's subgroups. Omit for total-sample-only studies."
    },
    "questions": {
      "type": "array",
      "items": { "$ref": "#/$defs/Question" },
      "description": "Every question reported in any wave of this study. Each question is defined once and reused across waves by id; trend lines emerge from multiple waves reporting against the same question."
    },
    "waves": {
      "type": "array",
      "minItems": 1,
      "items": { "$ref": "#/$defs/Wave" },
      "description": "One entry per fielding occasion. Single-shot studies still use an array of one. Field dates, sample metadata, and results live on the wave; everything constant across the program lives at the study level."
    }
  },

  "$defs": {

    "Organization": {
      "type": "object",
      "required": ["display_name"],
      "additionalProperties": false,
      "description": "An organization associated with a study. URL, when present, is the canonical identifier: two references to the same URL denote the same organization across studies.",
      "properties": {
        "display_name": {
          "type": "string",
          "description": "Human-readable name shown in attribution contexts (citations, display tables)."
        },
        "url": {
          "type": "string",
          "format": "uri",
          "description": "[RECOMMENDED] Canonical homepage URL. Serves as the machine-readable identifier for the organization. Use a urn: scheme when no real URL exists (e.g. defunct orgs); omit entirely when no stable identifier is available."
        },
        "roles": {
          "type": "array",
          "items": { "type": "string" },
          "description": "What this organization contributed. Open vocabulary — consumers should accept unknown values. Suggested baseline (from FiveThirtyEight's historical poll taxonomy): \"pollster\" (firm that designed and fielded), \"designer\" (instrument designer when separate), \"fielder\" (subcontractor doing interviews), \"sponsor\" (client commissioning the wave), \"sponsor_candidate\" (candidate or campaign sponsor, for partisan polls), \"funder\" (separate from sponsor — e.g. foundation), \"publisher\" (media outlet releasing the results), \"academic_partner\" (affiliated research institution), \"other\" (anything else; describe in methodology_notes). An organization may carry multiple roles."
        }
      }
    },

    "SubgroupSchema": {
      "type": "object",
      "additionalProperties": false,
      "description": "The set of demographic variables a study uses for crosstabs. Each variable (party, age, region, …) is declared once with its possible values; waves then cite a subgroup as a filter over one or more of these variables.",
      "properties": {
        "variables": {
          "type": "array",
          "items": { "$ref": "#/$defs/SubgroupVariable" },
          "description": "Demographic variables available for crosstabs in this study."
        }
      }
    },

    "SubgroupVariable": {
      "type": "object",
      "required": ["id"],
      "additionalProperties": false,
      "description": "One demographic axis used in crosstabs — party identification, age bucket, census region, and so on.",
      "allOf": [
        {
          "if": { "properties": { "kind": { "const": "numeric" } }, "required": ["kind"] },
          "else": { "required": ["values"] }
        }
      ],
      "properties": {
        "id": {
          "type": "string",
          "description": "Short machine identifier for the variable, e.g. \"party_id\" or \"age_bucket\". Referenced by each subgroup's filters."
        },
        "label": {
          "type": "string",
          "description": "[RECOMMENDED] Human-readable name, e.g. \"Partisan identification with leaners\"."
        },
        "kind": {
          "type": "string",
          "enum": ["categorical", "numeric"],
          "default": "categorical",
          "description": "Use \"categorical\" for discrete values (parties, regions); \"numeric\" for measured quantities (income, age in years). Most crosstab variables are categorical because their values have already been bucketed."
        },
        "values": {
          "type": "array",
          "description": "Possible values the variable can take. Each has a code and a display label; rolled-up categories also list the raw codes they combine. Required when kind is \"categorical\" (the default); may be omitted when kind is \"numeric\" since there is no enumerable set.",
          "items": {
            "type": "object",
            "required": ["code"],
            "additionalProperties": false,
            "properties": {
              "code": {
                "oneOf": [{ "type": "string" }, { "type": "integer" }],
                "description": "Canonical code for this value. Strings for semantic codes (\"dem\", \"urban\"); integers when the pollster's codebook uses them."
              },
              "label": {
                "type": "string",
                "description": "[RECOMMENDED] Display label shown in crosstab headers."
              },
              "rollup_of": {
                "type": "array",
                "items": { "oneOf": [{ "type": "string" }, { "type": "integer" }] },
                "description": "If this value is a rollup of more granular codes (e.g. \"Dem/Lean Dem\" folds strong-dem, weak-dem, and lean-dem together), list the raw codes here."
              }
            }
          }
        }
      }
    },

    "Wave": {
      "type": "object",
      "required": ["wave_id"],
      "additionalProperties": false,
      "description": "A single fielding occasion. Carries field dates, sample metadata, the subgroups reported in this wave, and the results themselves. Multiple waves in one Study form a trend. Trend-reference waves (entries carried only to support prior-wave comparisons) may omit the RECOMMENDED fields below when authoritative methodology is documented in the original release.",
      "properties": {
        "wave_id": {
          "type": "string",
          "description": "Short identifier for the wave, e.g. \"w173\" or \"2024-08\". Unique within the study."
        },
        "label": {
          "type": "string",
          "description": "Human-readable wave label, e.g. \"Wave 173 — June 2025\"."
        },
        "field_dates": {
          "type": "object",
          "required": ["start", "end"],
          "additionalProperties": false,
          "description": "[RECOMMENDED] Start and end of the fielding window, in ISO 8601 (YYYY-MM-DD). For continuous panels, use the published collection window for this wave. May be omitted for trend-reference waves whose field dates are not republished in the current topline.",
          "properties": {
            "start": { "type": "string", "format": "date" },
            "end": { "type": "string", "format": "date" }
          }
        },
        "sample": {
          "$ref": "#/$defs/SampleSpec",
          "description": "[RECOMMENDED] Sample design, size, and weighting for this wave. May be omitted for trend-reference waves; authoritative studies should provide it."
        },
        "subgroups": {
          "type": "array",
          "items": { "$ref": "#/$defs/Subgroup" },
          "description": "Crosstab cells actually reported in this wave. Each subgroup references variables declared in the study-level subgroup_schema. Full-sample results are expressed by omitting subgroup_id on the Result — no sentinel subgroup need be declared."
        },
        "results": {
          "type": "array",
          "items": { "$ref": "#/$defs/Result" },
          "description": "Published marginal distributions for this wave — one entry per (question, dimension, variant, subgroup) slice."
        },
        "notes": {
          "type": "string",
          "description": "Wave-specific notes that don't fit the structured fields: oversample details, field events, deviations from the study's standard methodology."
        }
      }
    },

    "SampleSpec": {
      "type": "object",
      "additionalProperties": false,
      "description": "Who was sampled, how many responded, and how the results were weighted. Fields that vary between waves live here; fields stable across a multi-wave study live in the study's methodology_notes.",
      "properties": {
        "population": {
          "type": "string",
          "description": "[RECOMMENDED] Target population in plain language, e.g. \"U.S. adults 18+\" or \"Registered voters in Iowa\"."
        },
        "n": {
          "type": "integer",
          "minimum": 1,
          "description": "[RECOMMENDED] Unweighted completed interviews in this wave."
        },
        "mode": {
          "type": "array",
          "description": "Modes of data collection. Follows AAPOR's conventional categories; Iris does not mint this taxonomy. Use \"mixed\" with weighting.notes for hybrid designs that don't fit cleanly.",
          "items": {
            "type": "string",
            "enum": ["online", "phone_landline", "phone_cell", "sms", "mail", "in_person", "mixed", "other"]
          }
        },
        "frame": {
          "type": "string",
          "description": "Sampling frame type. Follows AAPOR's conventional distinctions between probability and nonprobability designs; Iris does not mint this taxonomy.",
          "enum": ["probability_abs", "probability_rdd", "probability_panel", "nonprobability_panel", "river", "quota", "mixed", "other"]
        },
        "margin_of_error": {
          "type": "number",
          "description": "Design-effect-adjusted margin of error at 95% confidence, in percentage points, for the full sample. A wave of 5,000 U.S. adults typically reports ±1.6. Subgroup-level precision lives on Result.precision."
        },
        "weighting": {
          "type": "object",
          "additionalProperties": false,
          "description": "Weighting procedure used to produce the reported percentages.",
          "properties": {
            "scheme": {
              "type": "string",
              "description": "Weighting procedure. Follows AAPOR's conventional categories; Iris does not mint this taxonomy.",
              "enum": ["raking", "post_stratification", "propensity", "none", "other"]
            },
            "variables": {
              "type": "array",
              "items": { "type": "string" },
              "description": "Demographic variables used as raking or post-stratification targets, e.g. [\"age\", \"sex\", \"education\", \"region\"]."
            },
            "notes": {
              "type": "string",
              "description": "Free-text description of weighting benchmarks, trimming rules, or anything else a reader needs to interpret the percentages."
            }
          }
        }
      }
    },

    "Subgroup": {
      "type": "object",
      "required": ["id"],
      "additionalProperties": false,
      "description": "A subgroup reported in this wave's crosstabs: a filter that identifies a cell like \"Democrats\", \"women 18-29\", or \"respondents who answered 1 or 2 to AI_BENE\". Simple subgroups use a flat filter list (AND of matches, with OR within values). Leaner-allocated or otherwise disjunctive subgroups use the nested filter form.",
      "properties": {
        "id": {
          "type": "string",
          "description": "Short identifier for this subgroup, e.g. \"dem\" or \"women_18_29\". Referenced by result.subgroup_id. Unique within the wave."
        },
        "label": {
          "type": "string",
          "description": "[RECOMMENDED] Display label for this subgroup in tables and charts."
        },
        "filters": {
          "description": "The filter(s) that define this subgroup. Two forms are accepted: a flat array of matches (AND across entries, OR within values), or a single nested expression using all_of / any_of / match for leaner-allocated or otherwise disjunctive subgroups. Each match is either variable-based ({variable_id, values}) or question-based ({question_id, dimension_id, values}) — see SubgroupMatch.",
          "oneOf": [
            {
              "type": "array",
              "items": { "$ref": "#/$defs/SubgroupMatch" },
              "description": "Flat form. Each entry names one variable and qualifying values; entries AND together, values within an entry OR together. Equivalent to { all_of: [ match, match, … ] }."
            },
            { "$ref": "#/$defs/SubgroupFilter" }
          ]
        },
        "n_unweighted": {
          "type": "integer",
          "minimum": 0,
          "description": "Unweighted count of respondents in this subgroup."
        },
        "n_weighted": {
          "type": "number",
          "minimum": 0,
          "description": "Weighted count of respondents in this subgroup, if the pollster reports one."
        },
        "parent_subgroup_id": {
          "type": "string",
          "description": "Id of the parent subgroup this one partitions, if any. Omit when the parent is the full sample. Enables pct_of_parent to be interpreted."
        },
        "pct_of_parent": {
          "type": "number",
          "minimum": 0,
          "maximum": 100,
          "description": "Share of the parent subgroup represented by this subgroup — e.g. in exit polls, \"voters ages 18–29 made up 14% of all voters\". Requires parent_subgroup_id."
        }
      }
    },

    "SubgroupMatch": {
      "type": "object",
      "additionalProperties": false,
      "description": "A single match leaf. Two forms: (a) variable-based — {variable_id: \"age_band\", values: [\"18_29\", \"30_49\"]} qualifies respondents by a declared demographic variable; (b) question-based — {question_id: \"AI_BENE\", dimension_id: \"main\", values: [1, 2]} qualifies respondents who gave one of the listed responses to a prior question, used for skip-logic-defined cohorts and conditional follow-ups. Exactly one of variable_id / question_id must be present. Values OR together within a match.",
      "properties": {
        "variable_id": {
          "type": "string",
          "description": "Id of a variable declared in the study's subgroup_schema. Mutually exclusive with question_id."
        },
        "question_id": {
          "type": "string",
          "description": "Id of a question declared at the study level. Mutually exclusive with variable_id. Use when filtering by a prior-question response — e.g. \"respondents who answered 1 or 2 to AI_BENE\"."
        },
        "dimension_id": {
          "type": "string",
          "description": "Id of a dimension within the referenced question. Required when question_id names a multi-dimension question (matrix, ranking, multi-select); may be omitted for single-dimension questions, in which case the sole dimension is assumed."
        },
        "values": {
          "type": "array",
          "minItems": 1,
          "items": { "oneOf": [{ "type": "string" }, { "type": "integer" }] },
          "description": "Qualifying values. For variable-based matches, values of the referenced subgroup variable. For question-based matches, codes on the referenced dimension's response space. Multiple values form a disjunction."
        }
      },
      "oneOf": [
        { "required": ["variable_id", "values"] },
        { "required": ["question_id", "values"] }
      ]
    },

    "SubgroupFilter": {
      "type": "object",
      "additionalProperties": false,
      "description": "A boolean combination of subgroup matches. Exactly one of all_of / any_of / match must be populated. Used for filters that cannot be expressed as a flat AND of single-variable matches — e.g. \"Dem or Lean Dem\" = party=D OR (party=I AND lean=D).",
      "properties": {
        "all_of": {
          "type": "array",
          "minItems": 1,
          "items": { "$ref": "#/$defs/SubgroupFilterNode" },
          "description": "Every child must hold."
        },
        "any_of": {
          "type": "array",
          "minItems": 1,
          "items": { "$ref": "#/$defs/SubgroupFilterNode" },
          "description": "At least one child must hold."
        },
        "match": {
          "$ref": "#/$defs/SubgroupMatch",
          "description": "A single variable-value match leaf."
        }
      },
      "oneOf": [
        { "required": ["all_of"] },
        { "required": ["any_of"] },
        { "required": ["match"] }
      ]
    },

    "SubgroupFilterNode": {
      "description": "Either a leaf match or a nested filter combinator. A leaf is a bare SubgroupMatch; a nested node wraps its match in a SubgroupFilter.",
      "oneOf": [
        { "$ref": "#/$defs/SubgroupMatch" },
        { "$ref": "#/$defs/SubgroupFilter" }
      ]
    },

    "Question": {
      "type": "object",
      "required": ["id", "dimensions"],
      "additionalProperties": false,
      "description": "A question is a stem plus one or more dimensions. Each dimension is one reportable axis of response — a matrix row, a rank item, a multi-select option, or the lone dimension of a single-item question. A response_space (enum / integer_range / real_range / text) defines the set of legitimate values; it may be declared once at the question level (applying to all dimensions) or overridden per dimension. Response-space precedence when multiple are declared: dimension-within-variant > variant > dimension > question (most specific wins).",
      "properties": {
        "id": {
          "type": "string",
          "description": "Short identifier for this question, unique within the study. Typically matches the pollster's questionnaire tag (\"AI_HEARD\", \"Q12\")."
        },
        "stem": {
          "type": "string",
          "description": "Shared prefix text. Empty for single-dimension questions where the full wording lives in the single dimension's text."
        },
        "concept_refs": {
          "type": "array",
          "items": { "$ref": "#/$defs/ConceptRef" },
          "description": "Optional concept tags linking questions that measure the same underlying construct. Used two ways: (a) within a study, to bridge questions whose id was version-bumped on wording change so consumers can still derive a trend — declare a shared ref with scheme=\"local\" (or any author-chosen namespace); (b) across studies, to federate across pollsters by matching against external vocabularies (ANES VCF codes, GSS mnemonics, Roper iPoll DOIs, DDI concept URIs). Same mechanism, different schemes. Trends within a single study are otherwise derived from matching question_id across waves ordered by field_dates — concept_refs is only needed when question_id is not stable."
        },
        "notes": {
          "type": "string",
          "description": "Question-specific notes: rotation and anchor schemes, interviewer instructions, wording quirks. Anything a reader needs to interpret this question that doesn't fit the structured fields."
        },
        "skip_notes": {
          "type": "string",
          "description": "Plain-English description of any skip logic (\"Asked only of respondents who answered 1 or 2 to AI_HEARD\"). When the question is conditional on earlier answers, this explains why the result's base.n_unweighted differs from the wave's full sample size."
        },
        "dimensions": {
          "type": "array",
          "minItems": 1,
          "items": { "$ref": "#/$defs/Dimension" },
          "description": "One or more reportable axes of response. A single-item question has one dimension; a matrix has one per row; a ranking or multi-select has one per item. A MaxDiff question declares one dimension per item being scored."
        },
        "response_space": {
          "$ref": "#/$defs/ResponseSpace",
          "description": "Default response space applied to every dimension that does not override. If omitted, each dimension must declare its own. Superseded by variant response_space_override and dimension-within-variant overrides where present."
        },
        "constraints": {
          "type": "array",
          "items": { "$ref": "#/$defs/Constraint" },
          "description": "Cross-dimension rules the question mechanics impose: ranking as a permutation, budget allocation as a sum, \"None of the above\" as an exclusivity. Five kinds available. Variants may override via constraints_override when a variant's dimension set differs."
        },
        "variants": {
          "type": "array",
          "items": { "$ref": "#/$defs/Variant" },
          "description": "Split-sample wording variants. When different halves of the sample see different phrasings of the same question, declare each wording as a variant here; results then reference variant_id."
        }
      }
    },

    "ConceptRef": {
      "type": "object",
      "required": ["scheme", "id"],
      "additionalProperties": false,
      "description": "One (scheme, id) tuple pointing into an external or local vocabulary. Multiple refs may attach to the same question (e.g. one \"local\" for within-study rename-bridging, one \"anes_vcf\" for cross-study federation).",
      "properties": {
        "scheme": {
          "type": "string",
          "description": "Identifier of the naming scheme. Use \"local\" (or a study-specific namespace) for within-study linkage across renamed questions; use external-vocabulary schemes — e.g. \"anes_vcf\", \"gss\", \"ess\", \"roper_ipoll\", \"ddi_concept\", \"cessda_topic\" — for cross-pollster federation. Open vocabulary."
        },
        "id": {
          "type": "string",
          "description": "The identifier within that scheme. Could be a short code (\"VCF0501\"), a mnemonic (\"polviews\"), or a URI (\"https://…\")."
        }
      }
    },

    "Dimension": {
      "type": "object",
      "required": ["id"],
      "additionalProperties": false,
      "description": "One reportable axis of response. For a single-item question, there is one dimension whose text carries the full wording; for a matrix, each row is a dimension; for a ranking or multi-select, each item is a dimension; for MaxDiff, each item being scored is a dimension.",
      "properties": {
        "id": {
          "type": "string",
          "description": "Short identifier for this dimension, unique within its question. Referenced by result.dimension_id and by any constraints."
        },
        "text": {
          "type": "string",
          "description": "For a single-dimension question, this carries the full question wording (stem is empty). For a matrix row, rank item, or MaxDiff item, this is the row/item label. May be omitted for anonymous dimensions."
        },
        "response_space": {
          "$ref": "#/$defs/ResponseSpace",
          "description": "Per-dimension response space. Overrides the question-level default when dimensions have different response scales."
        }
      }
    },

    "Variant": {
      "type": "object",
      "required": ["variant_id"],
      "additionalProperties": false,
      "description": "A split-sample variant. May override stem, dimensions, response_space, and constraints; unspecified fields inherit from the parent question.",
      "properties": {
        "variant_id": {
          "type": "string",
          "description": "Short identifier for this variant, unique within the question. Referenced by result.variant_id."
        },
        "label": {
          "type": "string",
          "description": "Display label distinguishing this wording from the others, e.g. \"Form A (Pew framing)\" or \"with 'concerned' anchor\"."
        },
        "stem_override": {
          "type": "string",
          "description": "Stem text used under this variant, if it differs from the question's stem."
        },
        "dimensions_override": {
          "type": "array",
          "items": { "$ref": "#/$defs/Dimension" },
          "description": "Dimensions used under this variant, if they differ from the question's dimensions."
        },
        "response_space_override": {
          "$ref": "#/$defs/ResponseSpace",
          "description": "Response space used under this variant, if it differs from the question's."
        },
        "constraints_override": {
          "type": "array",
          "items": { "$ref": "#/$defs/Constraint" },
          "description": "Constraints used under this variant, if they differ from the question's. Use when a variant drops or adds dimensions referenced by a constraint (e.g. a ranking variant that removes one of the items)."
        }
      }
    },

    "ResponseSpace": {
      "description": "The set of legitimate values on a dimension. Four kinds: \"enum\" for coded options, \"integer_range\" for bounded integers, \"real_range\" for continuous values, and \"text\" for open-ended prose. Precedence when multiple response spaces are in scope: dimension-within-variant > variant > dimension > question — most specific wins.",
      "oneOf": [
        { "$ref": "#/$defs/EnumSpace" },
        { "$ref": "#/$defs/IntegerRangeSpace" },
        { "$ref": "#/$defs/RealRangeSpace" },
        { "$ref": "#/$defs/TextSpace" }
      ]
    },

    "EnumSpace": {
      "type": "object",
      "required": ["kind", "codes"],
      "additionalProperties": false,
      "description": "A finite set of coded options — the backbone of Likert scales, forced-choice questions, and multi-select options (modelled as binary enums).",
      "properties": {
        "kind": { "const": "enum" },
        "ordered": {
          "type": "boolean",
          "default": false,
          "description": "True if codes form an ordered continuum (Likert, intensity). When true, consumers may: (a) apply a monotone color scale with the declared order as canonical direction; (b) treat nets of contiguous code ranges as natural rollups; (c) interpret order-dependent stats (mean_rank, median code) as meaningful. Code.value, if supplied, should be monotone in the declared code order. False for nominal enumerations (parties, regions) where codes have no natural order."
        },
        "codes": {
          "type": "array",
          "minItems": 1,
          "items": { "$ref": "#/$defs/Code" },
          "description": "Every legitimate option a respondent can pick, including non-substantive options (\"Don't know\", \"Refused\") marked with missing: true."
        },
        "nets": {
          "type": "array",
          "description": "Named rollups over subsets of codes (e.g. \"Favorable (net)\" = 1 + 2). Declared here so every published net percentage carries its construction with it. A net's members may reference raw codes or other net ids (hierarchical nets). Difference nets capture pollster artifacts like \"Net Approval\" = total_approve − total_disapprove.",
          "items": { "$ref": "#/$defs/Net" }
        }
      }
    },

    "Net": {
      "type": "object",
      "required": ["id"],
      "additionalProperties": false,
      "description": "A named rollup over codes of an enum response space. Sum nets combine the share of their members; difference nets report the signed difference between a minuend and a subtrahend net.",
      "properties": {
        "id": {
          "type": "string",
          "description": "Short identifier for the net, referenced by result.stats.nets[].id."
        },
        "label": {
          "type": "string",
          "description": "[RECOMMENDED] Display label, e.g. \"Favorable (net)\" or \"Net approval\"."
        },
        "kind": {
          "type": "string",
          "enum": ["sum", "difference"],
          "default": "sum",
          "description": "\"sum\" — the default — combines the share of the listed members. \"difference\" reports minuend − subtrahend (e.g. Net Approval = total_approve − total_disapprove)."
        },
        "members": {
          "type": "array",
          "minItems": 1,
          "items": { "oneOf": [{ "type": "string" }, { "type": "integer" }] },
          "description": "For sum nets: codes or other net ids that together make up this net. Each must resolve to a defined code on this response space or a net declared alongside it. String members are first looked up as net ids, then as string codes; integer members are always codes. Omit for difference nets (which use minuend_id/subtrahend_id instead)."
        },
        "minuend_id": {
          "type": "string",
          "description": "For difference nets: id of the net to subtract from. Must reference another declared net on the same response space."
        },
        "subtrahend_id": {
          "type": "string",
          "description": "For difference nets: id of the net to subtract. Must reference another declared net on the same response space."
        }
      },
      "allOf": [
        {
          "if": { "properties": { "kind": { "const": "difference" } }, "required": ["kind"] },
          "then": { "required": ["minuend_id", "subtrahend_id"] },
          "else": { "required": ["members"] }
        }
      ]
    },

    "Code": {
      "type": "object",
      "required": ["code"],
      "additionalProperties": false,
      "description": "One option within an enum response space, or one missing-value tag on a numeric range.",
      "properties": {
        "code": {
          "oneOf": [{ "type": "string" }, { "type": "integer" }],
          "description": "The value the pollster's data uses for this option — string (\"yes\", \"dem\") or integer (1, -2). Must be unique within the response space."
        },
        "label": {
          "type": "string",
          "description": "[RECOMMENDED] Display text shown to respondents and in published tables, e.g. \"Strongly agree\" or \"Don't know\"."
        },
        "value": {
          "type": "number",
          "description": "Optional numeric value for aggregation (means, regressions). A consumer-default numeric mapping — useful for Likert scales with symmetric numeric interpretations (e.g. strongly-positive = 2, somewhat-positive = 1, neutral = 0, somewhat-negative = −1, strongly-negative = −2). Not guaranteed to be interval-scaled; consumers requiring interval-scaled interpretation should treat this as a hint and apply their own scoring."
        },
        "pole": {
          "type": "string",
          "enum": ["positive", "neutral", "negative", "none"],
          "description": "Which end of the spectrum this code anchors. Lets consumers auto-color a chart without hand-annotating every response space. \"none\" means polarity is not applicable to this code (e.g. a partisan self-ID code where no direction is implied) — it does not mean the respondent declined to answer. Use missing: true for non-substantive responses."
        },
        "missing": {
          "type": "boolean",
          "default": false,
          "description": "True if this code is a non-substantive response (\"Don't know\", \"Refused\", \"No answer\"). Lets consumers compute an \"answered\" base by excluding these entries."
        },
        "missing_kind": {
          "type": "string",
          "enum": ["dk", "refused", "skipped", "no_opinion", "other"],
          "description": "If missing is true, which kind: \"dk\" (don't know), \"refused\", \"skipped\" (not shown or no answer), \"no_opinion\" (offered as a distinct option), or \"other\"."
        },
        "anchor_text": {
          "type": "string",
          "description": "Anchor phrasing shown alongside the code in a questionnaire, e.g. \"Cold — 0\" paired with a 0-100 feeling thermometer value."
        }
      }
    },

    "IntegerRangeSpace": {
      "type": "object",
      "required": ["kind", "min", "max"],
      "additionalProperties": false,
      "description": "Bounded integer values. Used for rank positions (1..N), feeling thermometers (0..100), and ladder questions (Cantril 0..10).",
      "properties": {
        "kind": { "const": "integer_range" },
        "min": {
          "type": "integer",
          "description": "Smallest allowed value."
        },
        "max": {
          "type": "integer",
          "description": "Largest allowed value."
        },
        "step": {
          "type": "integer",
          "default": 1,
          "minimum": 1,
          "description": "Step between allowed values. Usually 1 (every integer in range)."
        },
        "anchors": {
          "type": "array",
          "description": "Labelled reference points along the range, e.g. 0 = \"Very cold\", 50 = \"Neutral\", 100 = \"Very warm\" on a feeling thermometer.",
          "items": {
            "type": "object",
            "required": ["at", "label"],
            "additionalProperties": false,
            "properties": {
              "at": {
                "type": "integer",
                "description": "The integer value this label attaches to."
              },
              "label": {
                "type": "string",
                "description": "Text shown at this point on the scale."
              }
            }
          }
        },
        "missing_codes": {
          "type": "array",
          "items": { "$ref": "#/$defs/Code" },
          "description": "Non-substantive responses (\"Don't know\", \"Refused\") shown alongside the numeric scale. Structurally identical to enum codes but stored separately because they aren't part of the numeric range."
        }
      }
    },

    "RealRangeSpace": {
      "type": "object",
      "required": ["kind", "min", "max"],
      "additionalProperties": false,
      "description": "Bounded continuous values. Used for continuous scores like rescaled MaxDiff percentages or budget allocations with fractional amounts.",
      "properties": {
        "kind": { "const": "real_range" },
        "min": {
          "type": "number",
          "description": "Smallest allowed value."
        },
        "max": {
          "type": "number",
          "description": "Largest allowed value."
        },
        "step": {
          "type": "number",
          "exclusiveMinimum": 0,
          "description": "Smallest increment between allowed values. 0.01 for percentages, 0.5 for half-points, and so on."
        },
        "anchors": {
          "type": "array",
          "description": "Labelled reference points along the range.",
          "items": {
            "type": "object",
            "required": ["at", "label"],
            "additionalProperties": false,
            "properties": {
              "at": {
                "type": "number",
                "description": "The numeric value this label attaches to."
              },
              "label": {
                "type": "string",
                "description": "Text shown at this point on the scale."
              }
            }
          }
        },
        "missing_codes": {
          "type": "array",
          "items": { "$ref": "#/$defs/Code" },
          "description": "Non-substantive responses shown alongside the numeric scale."
        }
      }
    },

    "TextSpace": {
      "type": "object",
      "required": ["kind"],
      "additionalProperties": false,
      "description": "Open-ended text responses. Rarely used in published toplines — pollsters almost always post-code verbatims into an enum for reporting — but declared here for completeness and for instrument descriptions where the uncoded form matters.",
      "properties": {
        "kind": { "const": "text" },
        "min_length": {
          "type": "integer",
          "minimum": 0,
          "description": "Minimum character count if the pollster enforced one; leave unset otherwise."
        },
        "max_length": {
          "type": "integer",
          "minimum": 1,
          "description": "Maximum character count if the pollster enforced one."
        }
      }
    },

    "Constraint": {
      "description": "A cross-dimension rule imposed by the question's mechanics. Choose the kind that matches the question: permutation for full rankings, sum for budget allocations, exclusive for \"None of the above\", and so on.",
      "oneOf": [
        { "$ref": "#/$defs/PermutationConstraint" },
        { "$ref": "#/$defs/PartialRankConstraint" },
        { "$ref": "#/$defs/SumConstraint" },
        { "$ref": "#/$defs/CardinalityConstraint" },
        { "$ref": "#/$defs/ExclusiveConstraint" }
      ]
    },

    "PermutationConstraint": {
      "type": "object",
      "required": ["kind", "dimensions"],
      "additionalProperties": false,
      "description": "The named dimensions collectively take the values 1..N exactly once each (full ranking).",
      "properties": {
        "kind": { "const": "permutation" },
        "dimensions": {
          "type": "array",
          "minItems": 2,
          "items": { "type": "string" },
          "description": "The ranked items. Each respondent assigns each dimension a distinct rank from 1 to N."
        }
      }
    },

    "PartialRankConstraint": {
      "type": "object",
      "required": ["kind", "dimensions", "top_k"],
      "additionalProperties": false,
      "description": "Respondents rank top_k of the named dimensions; the rest are unranked.",
      "properties": {
        "kind": { "const": "partial_rank" },
        "dimensions": {
          "type": "array",
          "minItems": 2,
          "items": { "type": "string" },
          "description": "Items available to rank."
        },
        "top_k": {
          "type": "integer",
          "minimum": 1,
          "description": "How many items each respondent ranks; the rest are unranked."
        }
      }
    },

    "SumConstraint": {
      "type": "object",
      "required": ["kind", "dimensions", "value"],
      "additionalProperties": false,
      "description": "The named dimensions' numeric values sum to the given constant (constant-sum or budget allocation).",
      "properties": {
        "kind": { "const": "sum" },
        "dimensions": {
          "type": "array",
          "minItems": 2,
          "items": { "type": "string" },
          "description": "Items across which the sum applies."
        },
        "value": {
          "type": "number",
          "description": "The constant the dimensions must sum to, e.g. 100 for percent-allocation questions."
        }
      }
    },

    "CardinalityConstraint": {
      "type": "object",
      "required": ["kind", "dimensions", "target_code"],
      "additionalProperties": false,
      "description": "At least min and at most max of the named dimensions take the target code value — typically the \"selected\" code in a multi-select.",
      "properties": {
        "kind": { "const": "cardinality" },
        "dimensions": {
          "type": "array",
          "minItems": 1,
          "items": { "type": "string" },
          "description": "Items over which the count applies."
        },
        "target_code": {
          "oneOf": [{ "type": "string" }, { "type": "integer" }],
          "description": "The code whose occurrences are being counted, typically the \"selected\" code in a multi-select."
        },
        "min": {
          "type": "integer",
          "minimum": 0,
          "description": "Lower bound on the count of dimensions taking target_code."
        },
        "max": {
          "type": "integer",
          "minimum": 1,
          "description": "Upper bound on the count. Common values: 3 for \"pick up to 3\"; equal to the number of dimensions for unrestricted."
        }
      }
    },

    "ExclusiveConstraint": {
      "type": "object",
      "required": ["kind", "when_dimension", "when_code", "excludes"],
      "additionalProperties": false,
      "description": "If when_dimension takes value when_code, none of the excludes dimensions may take their target code. Used for \"None of the above\" in multi-selects.",
      "properties": {
        "kind": { "const": "exclusive" },
        "when_dimension": {
          "type": "string",
          "description": "The sentinel dimension that, when selected, excludes the others."
        },
        "when_code": {
          "oneOf": [{ "type": "string" }, { "type": "integer" }],
          "description": "The code value on when_dimension that triggers the exclusion — typically the \"selected\" code."
        },
        "excludes": {
          "type": "array",
          "minItems": 1,
          "items": { "type": "string" },
          "description": "Dimensions that must not take their selected code when the sentinel fires."
        }
      }
    },

    "Result": {
      "type": "object",
      "required": ["question_id", "dimension_id", "base", "distribution"],
      "additionalProperties": false,
      "description": "One published marginal: the distribution of responses on a single dimension, within one (question, variant, subgroup) slice, in this wave.",
      "properties": {
        "question_id": {
          "type": "string",
          "description": "Id of the question this result reports on. Must match a question declared at the study level."
        },
        "dimension_id": {
          "type": "string",
          "description": "Id of the dimension within that question that this result marginalizes over."
        },
        "variant_id": {
          "type": "string",
          "description": "If the question has split-sample variants, which one this result belongs to. Omit for the canonical form."
        },
        "subgroup_id": {
          "type": "string",
          "description": "Id of the subgroup this result is filtered to. Omit for full-sample results — there is no sentinel id for the full sample, absence is the signal."
        },
        "base": {
          "$ref": "#/$defs/Base",
          "description": "The slice over which this result's percentages are computed — unified object carrying base kind, counts, and optional notes."
        },
        "precision": {
          "$ref": "#/$defs/Precision",
          "description": "Subgroup-level precision — margin of error, design effect, confidence level — for the percentages in this result. Supplements the wave-level sample.margin_of_error, which applies to the full sample."
        },
        "distribution": {
          "$ref": "#/$defs/Distribution",
          "description": "The percentages themselves. Choose the kind that matches the dimension: \"categorical\" (one entry per code, entries sum to ~100%), \"multi_select\" (entries are per-option % selected, may sum to >100%), \"numeric\" (summary stats and/or bins over an integer/real range), or \"best_worst\" (MaxDiff aggregates per scored item)."
        },
        "stats": {
          "$ref": "#/$defs/DerivedStats",
          "description": "Precomputed statistics — nets, mean ranks — that the pollster published alongside the raw distribution."
        }
      }
    },

    "Base": {
      "type": "object",
      "required": ["kind"],
      "additionalProperties": false,
      "description": "The reporting base for a result. kind=\"all\" includes missing codes in the denominator; \"answered\" excludes them (so missing-coded entries typically do not appear in the distribution); \"custom\" is any other filter and must describe itself in notes. For filters expressible as subgroups (\"Among registered voters who answered Q1…\"), prefer a Subgroup over base=\"custom\".",
      "properties": {
        "kind": {
          "type": "string",
          "enum": ["all", "answered", "custom"],
          "description": "Which denominator convention. \"all\" = full subgroup including DK/refused/etc.; \"answered\" = excludes entries marked missing: true (those entries are then typically absent from the distribution); \"custom\" = any other filter — describe in notes."
        },
        "n_unweighted": {
          "type": "integer",
          "minimum": 0,
          "description": "Unweighted count of respondents in the base."
        },
        "n_weighted": {
          "type": "number",
          "minimum": 0,
          "description": "Weighted count of respondents in the base, if the pollster reports one."
        },
        "notes": {
          "type": "string",
          "description": "Free-text description of the base. Required for kind=\"custom\"; ignored for the others."
        }
      }
    },

    "Precision": {
      "type": "object",
      "additionalProperties": false,
      "description": "Precision annotations for the percentages in a result. All fields optional — pollsters who don't publish subgroup precision leave this out.",
      "properties": {
        "margin_of_error": {
          "type": "number",
          "description": "Design-effect-adjusted margin of error in percentage points, at the confidence level given by ci_level (default 0.95). Applies to the subgroup and base this result reports on."
        },
        "design_effect": {
          "type": "number",
          "exclusiveMinimum": 0,
          "description": "Design effect (Deff or Kish's design factor) on the subgroup's estimates. A Deff of 1.0 indicates a simple random sample; larger values indicate variance inflation from weighting."
        },
        "ci_level": {
          "type": "number",
          "exclusiveMinimum": 0,
          "exclusiveMaximum": 1,
          "description": "Confidence level used to compute margin_of_error and any distribution-entry ci bounds, e.g. 0.95 for 95%."
        },
        "method": {
          "type": "string",
          "description": "How the precision was computed — e.g. \"taylor_linearized\", \"jackknife\", \"bootstrap\", \"normal_approximation\". Open vocabulary."
        }
      }
    },

    "Distribution": {
      "description": "The reported marginal. Four kinds: \"categorical\" for code-level marginals that sum to ~100%; \"multi_select\" for per-option select-all-that-apply percentages (may sum to >100%); \"numeric\" for summaries and/or bins over integer/real ranges; \"best_worst\" for MaxDiff aggregates per scored item.",
      "oneOf": [
        { "$ref": "#/$defs/CategoricalDistribution" },
        { "$ref": "#/$defs/NumericDistribution" },
        { "$ref": "#/$defs/BestWorstDistribution" }
      ]
    },

    "CategoricalDistribution": {
      "type": "object",
      "required": ["kind", "entries"],
      "additionalProperties": false,
      "description": "Marginal distribution over the codes of an enum response space. Use kind=\"categorical\" when the dimension is a single pick and entries sum to ~100% (modulo rounding); use kind=\"multi_select\" to report a select-all-that-apply battery under one dimension whose response_space codes are the options — entries are per-option % selected and may sum to >100%. The N-binary-dimensions form of multi-select remains canonical; multi_select is a compact alternative for high-cardinality batteries.",
      "properties": {
        "kind": {
          "type": "string",
          "enum": ["categorical", "multi_select"],
          "description": "\"categorical\" — entries are mutually exclusive code shares summing to ~100%. \"multi_select\" — entries are per-option % selected and may sum to >100%. The \"% selected\" reading is the signal for downstream consumers."
        },
        "weighting": {
          "type": "string",
          "enum": ["weighted", "unweighted"],
          "default": "weighted",
          "description": "Whether the pct values are weighted or unweighted. Most pollster topline percentages are weighted; commercial crosstabs often run significance tests on unweighted cell counts. Default is \"weighted\" — set explicitly when publishing unweighted."
        },
        "entries": {
          "type": "array",
          "description": "One entry per reported code. For kind=\"categorical\", cover every non-missing code (plus any missing codes still in the denominator under base.kind=\"all\"). For kind=\"multi_select\", one entry per option with pct = % selected.",
          "items": {
            "type": "object",
            "required": ["code"],
            "additionalProperties": false,
            "properties": {
              "code": {
                "oneOf": [{ "type": "string" }, { "type": "integer" }],
                "description": "The code in the response space this percentage applies to."
              },
              "pct": {
                "type": "number",
                "minimum": 0,
                "maximum": 100,
                "description": "Percentage of the base for this code — % choosing it (categorical) or % selecting it (multi_select). When pollsters report \"<1\", publishers typically round to 1; document the convention used in methodology_notes. Interpretation (weighted vs. unweighted) is governed by the enclosing distribution's weighting flag."
              },
              "count": {
                "type": "object",
                "additionalProperties": false,
                "description": "Respondent counts for this cell, if published.",
                "properties": {
                  "unweighted": {
                    "type": "integer",
                    "minimum": 0,
                    "description": "Unweighted count of respondents contributing to this cell."
                  },
                  "weighted": {
                    "type": "number",
                    "minimum": 0,
                    "description": "Weighted count, if published."
                  }
                }
              },
              "ci": {
                "type": "object",
                "additionalProperties": false,
                "description": "Confidence bounds on pct. Pair with enclosing result.precision.ci_level.",
                "properties": {
                  "lower": {
                    "type": "number",
                    "description": "Lower confidence bound on the percentage."
                  },
                  "upper": {
                    "type": "number",
                    "description": "Upper confidence bound on the percentage."
                  }
                }
              }
            }
          }
        }
      }
    },

    "NumericDistribution": {
      "type": "object",
      "required": ["kind"],
      "additionalProperties": false,
      "description": "Marginal distribution over a numeric response space (integer_range or real_range). May report summary statistics, a binned histogram, or both. Missing-code shares are reported separately.",
      "properties": {
        "kind": { "const": "numeric" },
        "weighting": {
          "type": "string",
          "enum": ["weighted", "unweighted"],
          "default": "weighted",
          "description": "Whether summary stats and bin pcts are weighted or unweighted. Default \"weighted\"."
        },
        "summary": {
          "type": "object",
          "additionalProperties": false,
          "description": "Summary statistics over the numeric responses.",
          "properties": {
            "mean": {
              "type": "number",
              "description": "Mean value over respondents in the base."
            },
            "median": {
              "type": "number",
              "description": "Median value."
            },
            "sd": {
              "type": "number",
              "description": "Standard deviation."
            },
            "min": {
              "type": "number",
              "description": "Smallest value observed — not the scale's declared minimum."
            },
            "max": {
              "type": "number",
              "description": "Largest value observed."
            }
          }
        },
        "bins": {
          "type": "array",
          "description": "Histogram buckets, each reporting the share of respondents whose answer fell in that range. Useful when the pollster publishes a distribution plot but not raw microdata.",
          "items": {
            "type": "object",
            "required": ["from", "to", "pct"],
            "additionalProperties": false,
            "properties": {
              "from": {
                "type": "number",
                "description": "Lower bound of the bin (inclusive)."
              },
              "to": {
                "type": "number",
                "description": "Upper bound of the bin."
              },
              "inclusive_end": {
                "type": "boolean",
                "default": false,
                "description": "True if the upper bound is inclusive; false (default) if exclusive."
              },
              "label": {
                "type": "string",
                "description": "Display label, e.g. \"Under 25\" or \"Very warm (81–100)\"."
              },
              "pct": {
                "type": "number",
                "minimum": 0,
                "maximum": 100,
                "description": "Percentage of the base in this bin."
              },
              "count_unweighted": {
                "type": "integer",
                "minimum": 0,
                "description": "Unweighted count in this bin, if published."
              }
            }
          }
        },
        "missing": {
          "type": "array",
          "description": "Share of respondents who gave a non-substantive answer. Each entry references one of the response space's missing_codes.",
          "items": {
            "type": "object",
            "required": ["code", "pct"],
            "additionalProperties": false,
            "properties": {
              "code": {
                "oneOf": [{ "type": "string" }, { "type": "integer" }],
                "description": "The missing code being reported."
              },
              "pct": {
                "type": "number",
                "minimum": 0,
                "maximum": 100,
                "description": "Share of the base represented by this missing code."
              }
            }
          }
        }
      }
    },

    "BestWorstDistribution": {
      "type": "object",
      "required": ["kind"],
      "additionalProperties": false,
      "description": "MaxDiff / best-worst scaling aggregate for one scored item. In a MaxDiff question each dimension_id identifies one item; a Result of this kind reports one item's aggregate. Iris v1 covers the published-table shape (counts and simple rescaled scores). HB-estimated individual-level utilities (mean/SD/RLH) and anchored-MaxDiff outputs (pct_respondents_above, etc.) are deliberately out of scope — they are Sawtooth-platform-specific and not commonly published in public opinion toplines.",
      "properties": {
        "kind": { "const": "best_worst" },
        "method": {
          "type": "string",
          "enum": ["counts", "bw_score", "hb"],
          "description": "How the aggregate was produced. \"counts\" = raw best_count / worst_count / appearances, no scoring. \"bw_score\" = counts plus a computed best-minus-worst score (often normalized by appearances). \"hb\" = hierarchical-Bayes estimated; Iris only carries the published summary here, not individual-level utilities."
        },
        "weighting": {
          "type": "string",
          "enum": ["weighted", "unweighted"],
          "default": "weighted",
          "description": "Whether the reported aggregates are weighted or unweighted."
        },
        "appearances": {
          "type": "integer",
          "minimum": 0,
          "description": "Number of times this item appeared across choice sets shown to respondents in the base."
        },
        "best_count": {
          "type": "integer",
          "minimum": 0,
          "description": "Number of times this item was picked as best across all choice sets."
        },
        "worst_count": {
          "type": "integer",
          "minimum": 0,
          "description": "Number of times this item was picked as worst across all choice sets."
        },
        "bw_score": {
          "type": "number",
          "description": "Best-minus-worst score. Often normalized by appearances and published on a −100 to +100 scale or standardized; describe the convention in methodology_notes."
        },
        "rescaled_pct": {
          "type": "number",
          "minimum": 0,
          "maximum": 100,
          "description": "Rescaled probability score on a 0–100 scale, interpretable as the probability this item is selected best from a typical choice set. The most commonly cited MaxDiff headline number."
        },
        "rank": {
          "type": "integer",
          "minimum": 1,
          "description": "Rank of this item among all scored items under this method (1 = highest)."
        },
        "se": {
          "type": "number",
          "minimum": 0,
          "description": "Standard error of the item's reported score, if published."
        },
        "ci": {
          "type": "object",
          "additionalProperties": false,
          "description": "Confidence bounds on rescaled_pct or bw_score, as produced.",
          "properties": {
            "lower": { "type": "number" },
            "upper": { "type": "number" }
          }
        }
      }
    },

    "DerivedStats": {
      "type": "object",
      "additionalProperties": false,
      "description": "Precomputed statistics the pollster published alongside the raw distribution. net entries reference net ids declared on this dimension's enum response space; result-level precision (MOE, design effect, ci_level) lives on Result.precision.",
      "properties": {
        "mean_rank": {
          "type": "number",
          "description": "Mean rank across respondents for a single dimension of a ranking question. Lower means more preferred."
        },
        "top_share": {
          "type": "number",
          "minimum": 0,
          "maximum": 100,
          "description": "Share of respondents who ranked this dimension first (for rankings) or picked it (for most-important / most-concerning questions)."
        },
        "nets": {
          "type": "array",
          "description": "Precomputed net percentages, one per net declared on the response space. Each entry's id references a net defined on response_space.nets[]; the display label lives there.",
          "items": {
            "type": "object",
            "required": ["id", "pct"],
            "additionalProperties": false,
            "properties": {
              "id": {
                "type": "string",
                "description": "Id of the net being reported. Must match a net id declared on the response space."
              },
              "pct": {
                "type": "number",
                "description": "The net percentage — the combined share (for sum nets) or signed difference (for difference nets)."
              }
            }
          }
        }
      }
    }
  }
}
