online / endpoints 62 / categories 10 / rate 60/min/ip /

GET /html

GET /html alias: /html

Well-formed HTML control. Tags nest correctly, declared charset matches body bytes, doctype matches the markup version, no tag is left open. Use this as the parser-passes baseline; flip hostname to chaos.catastrophic.io to exercise the four failure modes.

expect: 200 OK, Content-Type: text/html; charset=utf-8. Document parses cleanly under any HTML5 or XHTML parser. X-Chaos-Origin: control. X-Chaos-Counterpart points at chaos.catastrophic.io/html.

bash
curl -i 'https://not.catastrophic.io/html'
# html.parser is permissive; html5lib mimics a browser; lxml is strict.
import urllib.request
from html.parser import HTMLParser

raw = urllib.request.urlopen("https://not.catastrophic.io/html").read().decode("utf-8", errors="replace")

class TagTracker(HTMLParser):
    def __init__(self):
        super().__init__()
        self.stack = []
        self.errors = []
    def handle_starttag(self, tag, attrs):
        self.stack.append(tag)
    def handle_endtag(self, tag):
        if not self.stack or self.stack[-1] != tag:
            self.errors.append(f"unexpected , stack: {self.stack}")
        else:
            self.stack.pop()

t = TagTracker()
t.feed(raw)
print("Tag errors:", t.errors)
print("Unclosed at EOF:", t.stack)
// Browser: DOMParser is lenient and will silently recover.
// Node: use a strict parser like @parse5/parse5 with errorReporting on.
const res = await fetch("https://not.catastrophic.io/html");
const raw = await res.text();
const doc = new DOMParser().parseFromString(raw, "text/html");
console.log("Title:", doc.title);
console.log("Body bytes recovered:", doc.body.innerHTML.length);
console.log("Mode header:", res.headers.get("X-Chaos-Html-Mode"));
package main

import (
    "fmt"
    "io"
    "net/http"
    "strings"

    "golang.org/x/net/html"
)

func main() {
    resp, _ := http.Get("https://not.catastrophic.io/html")
    defer resp.Body.Close()
    raw, _ := io.ReadAll(resp.Body)
    fmt.Println("Mode:", resp.Header.Get("X-Chaos-Html-Mode"))

    // golang.org/x/net/html follows the HTML5 spec — it accepts
    // mismatched and unclosed tags. To detect them, walk tokens
    // and track the open-tag stack manually.
    _, err := html.Parse(strings.NewReader(string(raw)))
    fmt.Println("Parse error:", err)
}
// Cargo.toml: reqwest = { version = "0.12", features = ["blocking"] }
//             html5ever = "0.27"
use html5ever::parse_document;
use html5ever::tendril::TendrilSink;

fn main() -> Result<(), Box> {
    let raw = reqwest::blocking::get("https://not.catastrophic.io/html")?.text()?;
    let dom = parse_document(
        html5ever::rcdom::RcDom::default(),
        Default::default(),
    ).from_utf8().read_from(&mut raw.as_bytes())?;
    println!("Parse errors: {:?}", dom.errors);
    Ok(())
}
// Requires Jsoup: org.jsoup:jsoup
import org.jsoup.Jsoup;
import org.jsoup.parser.Parser;
import java.net.URI;
import java.net.http.*;

public class HtmlChaos {
    public static void main(String[] args) throws Exception {
        var client = HttpClient.newHttpClient();
        var req = HttpRequest.newBuilder(URI.create("https://not.catastrophic.io/html")).build();
        var resp = client.send(req, HttpResponse.BodyHandlers.ofString());

        var parser = Parser.htmlParser().setTrackErrors(50);
        var doc = parser.parseInput(resp.body(), "https://not.catastrophic.io/html");
        System.out.println("Mode: " + resp.headers().firstValue("X-Chaos-Html-Mode").orElse(""));
        System.out.println("Parse errors: " + parser.getErrors());
        System.out.println("Title: " + doc.title());
    }
}
// Requires HtmlAgilityPack.
using HtmlAgilityPack;
using var client = new HttpClient();
var resp = await client.GetAsync("https://not.catastrophic.io/html");
var raw  = await resp.Content.ReadAsStringAsync();

var doc = new HtmlDocument();
doc.OptionCheckSyntax = true;
doc.LoadHtml(raw);
Console.WriteLine($"Mode: {resp.Headers.GetValues("X-Chaos-Html-Mode").First()}");
foreach (var err in doc.ParseErrors)
    Console.WriteLine($"  {err.Code} at {err.Line}:{err.LinePosition}");
require "net/http"
require "nokogiri"

resp = Net::HTTP.get_response(URI("https://not.catastrophic.io/html"))
raw  = resp.body
puts "Mode: #{resp['X-Chaos-Html-Mode']}"

# Nokogiri::HTML is lenient; Nokogiri::XML rejects malformed markup.
doc = Nokogiri::HTML(raw) { |c| c.strict.recover }
puts "Errors: #{doc.errors}"
# Invoke-WebRequest exposes ParsedHtml on Windows PowerShell but not pwsh.
# Inspect the raw body and headers directly.
$resp = Invoke-WebRequest -Uri 'https://not.catastrophic.io/html' -SkipHttpErrorCheck
$resp.Headers['X-Chaos-Html-Mode']
$resp.Headers['X-Chaos-Html-Note']
$resp.Content.Substring(0, [Math]::Min(400, $resp.Content.Length))