online / endpoints 18 / categories 4 / rate 60/min/ip /

GET /sitemap.xml

GET /sitemap.xml

A sitemap that crawlers will follow into bad places: 404 URLs, future-dated lastmod values, circular sitemap-index references, or a body that claims gzip encoding but isn't gzipped.

mode dead-urls (default; every <loc> 404s), future-lastmod (lastmod dates in 2099+), circular-index (sitemap-index referencing itself), wrong-encoding (Content-Encoding: gzip header on plain-text body).
build a request:

expect: 200 OK with Content-Type: application/xml. Body content depends on mode. wrong-encoding additionally sets Content-Encoding: gzip on a plain-text body, which will fail any client that respects the header.

bash
curl -i 'https://bots.catastrophic.io/sitemap.xml?mode=dead-urls'
import urllib.request
resp = urllib.request.urlopen("https://bots.catastrophic.io/sitemap.xml?mode=dead-urls")
print(resp.status, resp.headers["X-Chaos-Sitemap-Mode"])
print(resp.read().decode())
const res = await fetch("https://bots.catastrophic.io/sitemap.xml?mode=dead-urls");
console.log(res.status, res.headers.get("x-chaos-sitemap-mode"));
console.log(await res.text());
package main

import (
    "fmt"
    "io"
    "net/http"
)

func main() {
    resp, _ := http.Get("https://bots.catastrophic.io/sitemap.xml?mode=dead-urls")
    defer resp.Body.Close()
    body, _ := io.ReadAll(resp.Body)
    fmt.Println(resp.StatusCode, resp.Header.Get("X-Chaos-Sitemap-Mode"))
    fmt.Println(string(body))
}
// Cargo.toml: reqwest = { version = "0.12", features = ["blocking"] }
fn main() -> Result<(), Box> {
    let resp = reqwest::blocking::get("https://bots.catastrophic.io/sitemap.xml?mode=dead-urls")?;
    println!("{} {:?}", resp.status(), resp.headers().get("x-chaos-sitemap-mode"));
    println!("{}", resp.text()?);
    Ok(())
}
import java.net.URI;
import java.net.http.*;

public class BotsSitemap {
    public static void main(String[] args) throws Exception {
        var client = HttpClient.newHttpClient();
        var req = HttpRequest.newBuilder(URI.create("https://bots.catastrophic.io/sitemap.xml?mode=dead-urls")).build();
        var resp = client.send(req, HttpResponse.BodyHandlers.ofString());
        System.out.println(resp.statusCode() + " " +
            resp.headers().firstValue("X-Chaos-Sitemap-Mode").orElse(""));
        System.out.println(resp.body());
    }
}
using var client = new HttpClient();
var resp = await client.GetAsync("https://bots.catastrophic.io/sitemap.xml?mode=dead-urls");
resp.Headers.TryGetValues("X-Chaos-Sitemap-Mode", out var mode);
Console.WriteLine($"{(int)resp.StatusCode} {mode?.FirstOrDefault()}");
Console.WriteLine(await resp.Content.ReadAsStringAsync());
require "net/http"
res = Net::HTTP.get_response(URI("https://bots.catastrophic.io/sitemap.xml?mode=dead-urls"))
puts "#{res.code} #{res['X-Chaos-Sitemap-Mode']}"
puts res.body
$r = Invoke-WebRequest -Uri 'https://bots.catastrophic.io/sitemap.xml?mode=dead-urls'
$r.Headers['X-Chaos-Sitemap-Mode']
$r.Content