GET /html
/htmlWell-formed HTML control. Tags nest correctly, declared charset matches body bytes, doctype matches the markup version, no tag is left open. Use this as the parser-passes baseline; flip hostname to chaos.catastrophic.io to exercise the four failure modes.
expect: 200 OK, Content-Type: text/html; charset=utf-8. Document parses cleanly under any HTML5 or XHTML parser. X-Chaos-Origin: control. X-Chaos-Counterpart points at chaos.catastrophic.io/html.
curl -i 'https://not.catastrophic.io/html'
# html.parser is permissive; html5lib mimics a browser; lxml is strict.
import urllib.request
from html.parser import HTMLParser
raw = urllib.request.urlopen("https://not.catastrophic.io/html").read().decode("utf-8", errors="replace")
class TagTracker(HTMLParser):
def __init__(self):
super().__init__()
self.stack = []
self.errors = []
def handle_starttag(self, tag, attrs):
self.stack.append(tag)
def handle_endtag(self, tag):
if not self.stack or self.stack[-1] != tag:
self.errors.append(f"unexpected , stack: {self.stack}")
else:
self.stack.pop()
t = TagTracker()
t.feed(raw)
print("Tag errors:", t.errors)
print("Unclosed at EOF:", t.stack)
// Browser: DOMParser is lenient and will silently recover.
// Node: use a strict parser like @parse5/parse5 with errorReporting on.
const res = await fetch("https://not.catastrophic.io/html");
const raw = await res.text();
const doc = new DOMParser().parseFromString(raw, "text/html");
console.log("Title:", doc.title);
console.log("Body bytes recovered:", doc.body.innerHTML.length);
console.log("Mode header:", res.headers.get("X-Chaos-Html-Mode"));
package main
import (
"fmt"
"io"
"net/http"
"strings"
"golang.org/x/net/html"
)
func main() {
resp, _ := http.Get("https://not.catastrophic.io/html")
defer resp.Body.Close()
raw, _ := io.ReadAll(resp.Body)
fmt.Println("Mode:", resp.Header.Get("X-Chaos-Html-Mode"))
// golang.org/x/net/html follows the HTML5 spec — it accepts
// mismatched and unclosed tags. To detect them, walk tokens
// and track the open-tag stack manually.
_, err := html.Parse(strings.NewReader(string(raw)))
fmt.Println("Parse error:", err)
}
// Cargo.toml: reqwest = { version = "0.12", features = ["blocking"] }
// html5ever = "0.27"
use html5ever::parse_document;
use html5ever::tendril::TendrilSink;
fn main() -> Result<(), Box> {
let raw = reqwest::blocking::get("https://not.catastrophic.io/html")?.text()?;
let dom = parse_document(
html5ever::rcdom::RcDom::default(),
Default::default(),
).from_utf8().read_from(&mut raw.as_bytes())?;
println!("Parse errors: {:?}", dom.errors);
Ok(())
}
// Requires Jsoup: org.jsoup:jsoup
import org.jsoup.Jsoup;
import org.jsoup.parser.Parser;
import java.net.URI;
import java.net.http.*;
public class HtmlChaos {
public static void main(String[] args) throws Exception {
var client = HttpClient.newHttpClient();
var req = HttpRequest.newBuilder(URI.create("https://not.catastrophic.io/html")).build();
var resp = client.send(req, HttpResponse.BodyHandlers.ofString());
var parser = Parser.htmlParser().setTrackErrors(50);
var doc = parser.parseInput(resp.body(), "https://not.catastrophic.io/html");
System.out.println("Mode: " + resp.headers().firstValue("X-Chaos-Html-Mode").orElse(""));
System.out.println("Parse errors: " + parser.getErrors());
System.out.println("Title: " + doc.title());
}
}
// Requires HtmlAgilityPack.
using HtmlAgilityPack;
using var client = new HttpClient();
var resp = await client.GetAsync("https://not.catastrophic.io/html");
var raw = await resp.Content.ReadAsStringAsync();
var doc = new HtmlDocument();
doc.OptionCheckSyntax = true;
doc.LoadHtml(raw);
Console.WriteLine($"Mode: {resp.Headers.GetValues("X-Chaos-Html-Mode").First()}");
foreach (var err in doc.ParseErrors)
Console.WriteLine($" {err.Code} at {err.Line}:{err.LinePosition}");
require "net/http"
require "nokogiri"
resp = Net::HTTP.get_response(URI("https://not.catastrophic.io/html"))
raw = resp.body
puts "Mode: #{resp['X-Chaos-Html-Mode']}"
# Nokogiri::HTML is lenient; Nokogiri::XML rejects malformed markup.
doc = Nokogiri::HTML(raw) { |c| c.strict.recover }
puts "Errors: #{doc.errors}"
# Invoke-WebRequest exposes ParsedHtml on Windows PowerShell but not pwsh.
# Inspect the raw body and headers directly.
$resp = Invoke-WebRequest -Uri 'https://not.catastrophic.io/html' -SkipHttpErrorCheck
$resp.Headers['X-Chaos-Html-Mode']
$resp.Headers['X-Chaos-Html-Note']
$resp.Content.Substring(0, [Math]::Min(400, $resp.Content.Length))
headers
body