openapi: 3.1.0
info:
  title: bots.catastrophic.io
  summary: Chaotic well-known files served at canonical paths for testing crawlers, scanners, and AI agents.
  description: |
    bots.catastrophic.io serves chaotic versions of the canonical-path files
    that crawlers, AI agents, security scanners, and federated software fetch
    automatically — robots.txt, llms.txt, sitemap.xml, /.well-known/security.txt,
    and others.

    Each file supports a `?mode=` parameter selecting a specific flavour of
    chaos. Every response carries an `X-Chaos-*-Mode` header reflecting the
    selected mode so monitoring clients can verify which chaos they received.

    All endpoints are intentionally public — no authentication required.

    For the chaos endpoints on the main host, see https://catastrophic.io/openapi.yaml
  version: 1.0.0
  contact:
    url: https://bots.catastrophic.io
  license:
    name: MIT
    identifier: MIT
servers:
  - url: https://bots.catastrophic.io
    description: Production

security: []

tags:
  - name: crawler
    description: Files crawlers, AI agents, and SEO tools fetch from canonical paths.
  - name: app-linking
    description: Mobile and PWA discovery (iOS AASA, Android assetlinks, web app manifest).
  - name: federated-identity
    description: Federated network identity discovery (Fediverse nodeinfo, AT Protocol DID).
  - name: discovery
    description: Three conflicting OIDC/OAuth/agent-card sibling files that disagree about authoritative host.
  - name: discovery-chaos
    description: Parametric chaos for well-known discovery schemas. Also reachable on catastrophic.io.

paths:

  # ----- crawler files -----------------------------------------------------

  /robots.txt:
    get:
      operationId: robotsTxt
      externalDocs:
        description: Endpoint reference on catastrophic.io
        url: https://catastrophic.io/bots/endpoints/crawler-files/bots-robots/
      tags: [crawler]
      summary: Chaotic robots.txt
      description: |
        Modes: `contradictory` (default; Allow and Disallow for same path),
        `tarpit` (50 narrow Allow paths that all loop), `malformed` (missing
        colons, invalid directives), `infinite-crawl-delay` (Crawl-delay: 999999999).
      parameters:
        - $ref: '#/components/parameters/Mode'
      responses:
        '200':
          description: Always 200. Body content depends on mode.
          headers:
            X-Chaos-Robots-Mode:
              description: The mode that produced this response.
              schema: { type: string, enum: [contradictory, tarpit, malformed, infinite-crawl-delay] }
          content:
            text/plain:
              schema: { type: string }

  /llms.txt:
    get:
      operationId: llmsTxt
      externalDocs:
        description: Endpoint reference on catastrophic.io
        url: https://catastrophic.io/bots/endpoints/crawler-files/bots-llms/
      tags: [crawler]
      summary: Chaotic llms.txt (AI agent discovery document)
      description: |
        Modes: `dead-links` (default; every linked path 404s), `contradictory`
        (summary disagrees with linked-page content), `prompt-injection`
        (embeds 'ignore previous instructions' content — useful for testing
        whether your AI agent sanitises ingested llms.txt before acting on it).
      parameters:
        - $ref: '#/components/parameters/Mode'
      responses:
        '200':
          description: Always 200. Markdown body.
          headers:
            X-Chaos-Llms-Mode:
              schema: { type: string, enum: [dead-links, contradictory, prompt-injection] }
          content:
            text/markdown:
              schema: { type: string }

  /sitemap.xml:
    get:
      operationId: sitemapXml
      externalDocs:
        description: Endpoint reference on catastrophic.io
        url: https://catastrophic.io/bots/endpoints/crawler-files/bots-sitemap/
      tags: [crawler]
      summary: Chaotic sitemap
      description: |
        Modes: `dead-urls` (default; every <loc> 404s), `future-lastmod`
        (lastmod dates in 2099+), `circular-index` (sitemap-index referencing
        itself), `wrong-encoding` (Content-Encoding: gzip on a plain-text body).
      parameters:
        - $ref: '#/components/parameters/Mode'
      responses:
        '200':
          description: Always 200. XML body.
          headers:
            X-Chaos-Sitemap-Mode:
              schema: { type: string, enum: [dead-urls, future-lastmod, circular-index, wrong-encoding] }
          content:
            application/xml:
              schema: { type: string }

  /.well-known/ai.txt:
    get:
      operationId: aiTxt
      externalDocs:
        description: Endpoint reference on catastrophic.io
        url: https://catastrophic.io/bots/endpoints/crawler-files/bots-ai-txt/
      tags: [crawler]
      summary: Chaotic ai.txt (AI training opt-in/out)
      description: |
        Modes: `contradictory` (default; Allow and Disallow for same path on
        GPTBot/ClaudeBot/etc.), `fake-bots` (references AI crawlers that
        don't exist), `malformed` (missing colons, invalid values).
      parameters:
        - $ref: '#/components/parameters/Mode'
      responses:
        '200':
          description: Always 200. Plain-text body.
          headers:
            X-Chaos-Ai-Txt-Mode:
              schema: { type: string, enum: [contradictory, fake-bots, malformed] }
          content:
            text/plain:
              schema: { type: string }

  /.well-known/security.txt:
    get:
      operationId: securityTxt
      externalDocs:
        description: Endpoint reference on catastrophic.io
        url: https://catastrophic.io/bots/endpoints/crawler-files/bots-security/
      tags: [crawler]
      summary: Chaotic RFC 9116 security.txt
      description: |
        Modes: `expired` (default; Expires: 1999-12-31, well in the past),
        `dead-contact` (Contact URLs that 404), `nonsense` (gibberish in
        every field), `unsigned-canonical` (Canonical claims a URL we don't
        actually serve from, with no signature).
      parameters:
        - $ref: '#/components/parameters/Mode'
      responses:
        '200':
          description: Always 200. Plain-text body.
          headers:
            X-Chaos-Security-Mode:
              schema: { type: string, enum: [expired, dead-contact, nonsense, unsigned-canonical] }
          content:
            text/plain:
              schema: { type: string }

  /ads.txt:
    get:
      operationId: adsTxt
      externalDocs:
        description: Endpoint reference on catastrophic.io
        url: https://catastrophic.io/bots/endpoints/crawler-files/bots-ads-txt/
      tags: [crawler]
      summary: Chaotic IAB ads.txt
      description: |
        Modes: `contradictory` (default; same seller listed as both DIRECT
        and RESELLER), `fake-sellers` (domains that don't exist or are
        decommissioned), `malformed` (missing fields, bad delimiters).
      parameters:
        - $ref: '#/components/parameters/Mode'
      responses:
        '200':
          description: Always 200. Plain-text body.
          headers:
            X-Chaos-Ads-Mode:
              schema: { type: string, enum: [contradictory, fake-sellers, malformed] }
          content:
            text/plain:
              schema: { type: string }

  /humans.txt:
    get:
      operationId: humansTxt
      externalDocs:
        description: Endpoint reference on catastrophic.io
        url: https://catastrophic.io/bots/endpoints/crawler-files/bots-humans/
      tags: [crawler]
      summary: Chaotic humans.txt
      description: |
        Modes: `contradictory` (default; same person listed in conflicting
        roles), `recursive` ('see /humans.txt' for every field),
        `time-paradox` (impossible dates like 2099-13-32).
      parameters:
        - $ref: '#/components/parameters/Mode'
      responses:
        '200':
          description: Always 200. Plain-text body.
          headers:
            X-Chaos-Humans-Mode:
              schema: { type: string, enum: [contradictory, recursive, time-paradox] }
          content:
            text/plain:
              schema: { type: string }

  # ----- app-linking files -------------------------------------------------

  /.well-known/apple-app-site-association:
    get:
      operationId: aasa
      externalDocs:
        description: Endpoint reference on catastrophic.io
        url: https://catastrophic.io/bots/endpoints/app-linking/bots-aasa/
      tags: [app-linking]
      summary: Chaotic iOS Apple App Site Association
      description: |
        Modes: `wrong-content-type` (default; served as text/html — Apple rejects),
        `malformed-json` (invalid JSON), `bad-appid` (missing TEAMID prefix
        or wrong format), `path-mismatch` (empty paths array).
      parameters:
        - $ref: '#/components/parameters/Mode'
      responses:
        '200':
          description: Always 200. Content-Type varies by mode.
          headers:
            X-Chaos-Aasa-Mode:
              schema: { type: string, enum: [wrong-content-type, malformed-json, bad-appid, path-mismatch] }
          content:
            application/json:
              schema: { type: object }
            text/html:
              schema: { type: string }

  /.well-known/assetlinks.json:
    get:
      operationId: assetlinks
      externalDocs:
        description: Endpoint reference on catastrophic.io
        url: https://catastrophic.io/bots/endpoints/app-linking/bots-assetlinks/
      tags: [app-linking]
      summary: Chaotic Android App Links assetlinks.json
      description: |
        Modes: `wrong-fingerprint-format` (default; fingerprint without colon
        delimiters), `package-mismatch` (fictional package_name),
        `unknown-relation` (invented permission relation), `malformed-json`
        (truncated JSON array).
      parameters:
        - $ref: '#/components/parameters/Mode'
      responses:
        '200':
          description: Always 200.
          headers:
            X-Chaos-Assetlinks-Mode:
              schema: { type: string, enum: [wrong-fingerprint-format, package-mismatch, unknown-relation, malformed-json] }
          content:
            application/json:
              schema:
                type: array
                items: { type: object }

  /manifest.webmanifest:
    get:
      operationId: manifest
      externalDocs:
        description: Endpoint reference on catastrophic.io
        url: https://catastrophic.io/bots/endpoints/app-linking/bots-manifest/
      tags: [app-linking]
      summary: Chaotic PWA web app manifest
      description: |
        Modes: `missing-icons` (default; icons array empty — PWA install prompt
        does not appear), `wrong-start-url` (start_url points to a 404),
        `contradictory-display` (display: standalone with conflicting
        display_override), `invalid-colors` (invalid theme/background color
        values), `name-mismatch` (name and short_name disagree).
      parameters:
        - $ref: '#/components/parameters/Mode'
      responses:
        '200':
          description: Always 200.
          headers:
            X-Chaos-Manifest-Mode:
              schema: { type: string, enum: [missing-icons, wrong-start-url, contradictory-display, invalid-colors, name-mismatch] }
          content:
            application/manifest+json:
              schema: { type: object }

  # ----- federated identity ------------------------------------------------

  /.well-known/nodeinfo:
    get:
      operationId: nodeinfo
      externalDocs:
        description: Endpoint reference on catastrophic.io
        url: https://catastrophic.io/bots/endpoints/federated-identity/bots-nodeinfo/
      tags: [federated-identity]
      summary: Chaotic Fediverse nodeinfo discovery
      description: |
        Modes: `bad-link` (default; links[].href points to a 404),
        `wrong-rel` (rel value is not the standard nodeinfo schema URL),
        `version-mismatch` (claims version 9.99), `cross-host` (links point
        to elsewhere.example).
      parameters:
        - $ref: '#/components/parameters/Mode'
      responses:
        '200':
          description: Always 200.
          headers:
            X-Chaos-Nodeinfo-Mode:
              schema: { type: string, enum: [bad-link, wrong-rel, version-mismatch, cross-host] }
          content:
            application/json:
              schema:
                type: object
                properties:
                  links:
                    type: array
                    items:
                      type: object
                      properties:
                        rel: { type: string }
                        href: { type: string }

  /.well-known/atproto-did:
    get:
      operationId: atprotoDid
      externalDocs:
        description: Endpoint reference on catastrophic.io
        url: https://catastrophic.io/bots/endpoints/federated-identity/bots-atproto-did/
      tags: [federated-identity]
      summary: Chaotic AT Protocol (Bluesky) DID identifier
      description: |
        Modes: `invalid-did-format` (default; not in DID format),
        `multiple-lines` (two DIDs returned, spec says one),
        `wrong-method` (did:web pointing to a different domain),
        `empty` (no body at all).
      parameters:
        - $ref: '#/components/parameters/Mode'
      responses:
        '200':
          description: Always 200. Plain-text body.
          headers:
            X-Chaos-Atproto-Did-Mode:
              schema: { type: string, enum: [invalid-did-format, multiple-lines, wrong-method, empty] }
          content:
            text/plain:
              schema: { type: string }

  # ----- conflicting discovery siblings -----------------------------------

  /.well-known/openid-configuration:
    get:
      operationId: conflictOidc
      externalDocs:
        description: Endpoint reference on catastrophic.io
        url: https://catastrophic.io/bots/endpoints/discovery-siblings/openid-configuration/
      tags: [discovery]
      summary: OIDC discovery (claims issuer A)
      description: |
        Claims `issuer: https://catastrophic.io`. Part of the conflicting-
        discovery sibling group; compare against `/.well-known/oauth-authorization-server`
        and `/.well-known/agent-card.json`, which claim different
        authoritative hosts.
      responses:
        '200':
          description: Always 200.
          headers:
            X-Chaos-Conflict-Group:
              schema: { type: string, const: conflicting-discovery }
            X-Chaos-Claims-Issuer:
              schema: { type: string }
          content:
            application/json:
              schema: { type: object }

  /.well-known/oauth-authorization-server:
    get:
      operationId: conflictOauthAs
      externalDocs:
        description: Endpoint reference on catastrophic.io
        url: https://catastrophic.io/bots/endpoints/discovery-siblings/oauth-authorization-server/
      tags: [discovery]
      summary: OAuth Authorization Server (claims issuer B)
      description: |
        Claims `issuer: https://identity.catastrophic.io`. Disagrees with
        the OIDC document above and the agent card.
      responses:
        '200':
          description: Always 200.
          headers:
            X-Chaos-Conflict-Group:
              schema: { type: string, const: conflicting-discovery }
            X-Chaos-Claims-Issuer:
              schema: { type: string }
          content:
            application/json:
              schema: { type: object }

  /.well-known/oauth-protected-resource:
    get:
      operationId: oauthProtectedResource
      externalDocs:
        description: Endpoint reference on catastrophic.io
        url: https://catastrophic.io/bots/endpoints/discovery-siblings/oauth-protected-resource/
      tags: [discovery]
      summary: RFC 9728 Protected Resource Metadata (parametric chaos)
      description: |
        Parametric chaos for the OAuth Protected Resource Metadata
        well-known document. Default mode joins the conflicting-
        discovery group with an `authorization_servers` value that
        disagrees with the OIDC and AS documents above. Other modes
        target resource-server-specific flaws.
      parameters:
        - name: mode
          in: query
          description: |
            `mismatched-issuer` (default) — `authorization_servers` points at identity.catastrophic.io but `/.well-known/openid-configuration` claims issuer catastrophic.io.
            `unreachable-as` — `authorization_servers` references nonexistent.invalid (RFC 6761).
            `invalid-bearer-methods` — `bearer_methods_supported` lists values not in the IANA registry.
            `mismatched-resource-id` — `resource` field doesn't match the URL the document is served from (RFC 9728 §3 violation).
          schema:
            type: string
            enum: [mismatched-issuer, unreachable-as, invalid-bearer-methods, mismatched-resource-id]
            default: mismatched-issuer
      responses:
        '200':
          description: Always 200. Body shape per RFC 9728 with the chosen flaw baked in.
          headers:
            X-Chaos-Opr-Mode:
              schema: { type: string }
            X-Chaos-Opr-Note:
              description: Plain-text explanation of the flaw under test.
              schema: { type: string }
            X-Chaos-Conflict-Group:
              description: Present in `mismatched-issuer` mode only.
              schema: { type: string, const: conflicting-discovery }
            X-Chaos-Claims-Issuer:
              description: Present in `mismatched-issuer` mode only.
              schema: { type: string }
          content:
            application/json:
              schema: { type: object }

  /.well-known/mcp/server-card.json:
    get:
      operationId: mcpServerCard
      externalDocs:
        description: Endpoint reference on catastrophic.io
        url: https://catastrophic.io/bots/endpoints/discovery-siblings/mcp-server-card/
      tags: [discovery]
      summary: SEP-1649 MCP Server Card (parametric chaos)
      description: |
        Parametric chaos for the Model Context Protocol Server Card.
        Four modes covering transport omission, protocol-version
        mismatch, dead transport URLs, and schema-invalid documents.
        SEP-1649 is still being standardised
        (modelcontextprotocol/modelcontextprotocol#2127); modes target
        the stable core shape.
      parameters:
        - name: mode
          in: query
          description: |
            `missing-transport` (default) — capabilities declared but no transport field.
            `version-mismatch` — protocolVersion claims 2024-11-05 but capabilities include elicitation (introduced 2025-06-18).
            `transport-404` — transport.endpoint references nonexistent.invalid (RFC 6761).
            `invalid-schema` — wrong field types plus extra unknown top-level fields.
          schema:
            type: string
            enum: [missing-transport, version-mismatch, transport-404, invalid-schema]
            default: missing-transport
      responses:
        '200':
          description: Always 200. Body shape per SEP-1649 with the chosen flaw baked in.
          headers:
            X-Chaos-Mcp-Mode:
              schema: { type: string }
            X-Chaos-Mcp-Note:
              description: Plain-text explanation of the flaw under test.
              schema: { type: string }
          content:
            application/json:
              schema: { type: object }

  /.well-known/agent-skills/index.json:
    get:
      operationId: agentSkillsIndex
      externalDocs:
        description: Endpoint reference on catastrophic.io
        url: https://catastrophic.io/bots/endpoints/discovery-siblings/agent-skills-index/
      tags: [discovery]
      summary: Agent Skills Discovery v0.2.0 index (parametric chaos)
      description: |
        Parametric chaos for the Agent Skills Discovery index that AI
        agents check to learn what skills a host advertises. Four
        modes covering digest mismatch, missing schema, dead skill
        URLs, and stale schema-version declarations.
      parameters:
        - name: mode
          in: query
          description: |
            `bad-digest` (default) — skills[0].sha256 doesn't match the body returned by skills[0].url.
            `missing-schema` — top-level $schema absent.
            `skill-404` — skills[0].url returns 404.
            `stale-version` — $schema references v0.1.0 but body uses v0.2.0-only fields.
          schema:
            type: string
            enum: [bad-digest, missing-schema, skill-404, stale-version]
            default: bad-digest
      responses:
        '200':
          description: Always 200. Body shape per Agent Skills Discovery v0.2.0 with the chosen flaw baked in.
          headers:
            X-Chaos-Skills-Mode:
              schema: { type: string }
            X-Chaos-Skills-Note:
              description: Plain-text explanation of the flaw under test.
              schema: { type: string }
          content:
            application/json:
              schema: { type: object }

  /.well-known/agent-card.json:
    get:
      operationId: conflictAgentCard
      externalDocs:
        description: Endpoint reference on catastrophic.io
        url: https://catastrophic.io/bots/endpoints/discovery-siblings/agent-card/
      tags: [discovery]
      summary: Agent card (claims base C)
      description: |
        Claims `url: https://agents.catastrophic.io/echo`. Also served at
        `/.well-known/agent.json` for the older Google A2A path.
      responses:
        '200':
          description: Always 200.
          headers:
            X-Chaos-Conflict-Group:
              schema: { type: string, const: conflicting-discovery }
            X-Chaos-Claims-Url:
              schema: { type: string }
          content:
            application/json:
              schema: { type: object }

  /.well-known/agent.json:
    get:
      operationId: conflictAgentCardLegacy
      externalDocs:
        description: Endpoint reference on catastrophic.io
        url: https://catastrophic.io/bots/endpoints/discovery-siblings/agent-card/
      tags: [discovery]
      summary: Agent card (legacy Google A2A path)
      description: Alias of `/.well-known/agent-card.json`.
      responses:
        '200':
          description: Always 200.
          content:
            application/json:
              schema: { type: object }

  # ----- parametric discovery chaos (dual-hosted) -------------------------

  /.well-known/chaos:
    get:
      operationId: wellKnownChaos
      externalDocs:
        description: Endpoint reference on catastrophic.io
        url: https://catastrophic.io/chaos/endpoints/discovery/well-known-chaos/
      tags: [discovery-chaos]
      summary: Parametric chaos for well-known discovery schemas
      description: |
        Returns one of six discovery schemas (openid-configuration,
        oauth-authorization-server, webfinger, jwks, host-meta, agent-card)
        transformed by the chosen mode (semantic, missing-fields,
        wrong-types). Content-Type matches the schema's real-world type so
        Content-Type-keyed clients behave as they would against a real server.

        Also reachable on https://catastrophic.io/.well-known/chaos with the
        same handler.
      parameters:
        - name: schema
          in: query
          description: Which schema shape to base the response on.
          schema:
            type: string
            enum: [openid-configuration, oauth-authorization-server, webfinger, jwks, host-meta, agent-card]
            default: openid-configuration
        - name: mode
          in: query
          description: How to misbehave.
          schema:
            type: string
            enum: [semantic, missing-fields, wrong-types]
            default: semantic
        - name: ai
          in: query
          description: If true, source data is generated by an edge LLM.
          schema: { type: boolean, default: false }
      responses:
        '200':
          description: Always 200. Content-Type matches the schema's real-world type.
          headers:
            X-Chaos-Schema:
              schema: { type: string }
            X-Chaos-Drift:
              schema: { type: string }
            X-Chaos-Ai-Source:
              description: 'Present when ai=true. Indicates which path served: `ai` (success) or `fallback` (degraded).'
              schema: { type: string, enum: [ai, fallback] }

components:
  parameters:
    Mode:
      name: mode
      in: query
      description: Which flavour of chaos to return. Each endpoint defines its own enum; consult that endpoint's description for valid values. Unknown values fall back to the endpoint's default mode.
      schema: { type: string }
