GET /sitemap.xml
A sitemap that crawlers will follow into bad places: 404 URLs, future-dated lastmod values, circular sitemap-index references, or a body that claims gzip encoding but isn't gzipped.
mode
dead-urls (default; every <loc> 404s), future-lastmod (lastmod dates in 2099+), circular-index (sitemap-index referencing itself), wrong-encoding (Content-Encoding: gzip header on plain-text body).
build a request:
expect: 200 OK with Content-Type: application/xml. Body content depends on mode. wrong-encoding additionally sets Content-Encoding: gzip on a plain-text body, which will fail any client that respects the header.
curl -i 'https://bots.catastrophic.io/sitemap.xml?mode=dead-urls'
import urllib.request
resp = urllib.request.urlopen("https://bots.catastrophic.io/sitemap.xml?mode=dead-urls")
print(resp.status, resp.headers["X-Chaos-Sitemap-Mode"])
print(resp.read().decode())
const res = await fetch("https://bots.catastrophic.io/sitemap.xml?mode=dead-urls");
console.log(res.status, res.headers.get("x-chaos-sitemap-mode"));
console.log(await res.text());
package main
import (
"fmt"
"io"
"net/http"
)
func main() {
resp, _ := http.Get("https://bots.catastrophic.io/sitemap.xml?mode=dead-urls")
defer resp.Body.Close()
body, _ := io.ReadAll(resp.Body)
fmt.Println(resp.StatusCode, resp.Header.Get("X-Chaos-Sitemap-Mode"))
fmt.Println(string(body))
}
// Cargo.toml: reqwest = { version = "0.12", features = ["blocking"] }
fn main() -> Result<(), Box> {
let resp = reqwest::blocking::get("https://bots.catastrophic.io/sitemap.xml?mode=dead-urls")?;
println!("{} {:?}", resp.status(), resp.headers().get("x-chaos-sitemap-mode"));
println!("{}", resp.text()?);
Ok(())
}
import java.net.URI;
import java.net.http.*;
public class BotsSitemap {
public static void main(String[] args) throws Exception {
var client = HttpClient.newHttpClient();
var req = HttpRequest.newBuilder(URI.create("https://bots.catastrophic.io/sitemap.xml?mode=dead-urls")).build();
var resp = client.send(req, HttpResponse.BodyHandlers.ofString());
System.out.println(resp.statusCode() + " " +
resp.headers().firstValue("X-Chaos-Sitemap-Mode").orElse(""));
System.out.println(resp.body());
}
}
using var client = new HttpClient();
var resp = await client.GetAsync("https://bots.catastrophic.io/sitemap.xml?mode=dead-urls");
resp.Headers.TryGetValues("X-Chaos-Sitemap-Mode", out var mode);
Console.WriteLine($"{(int)resp.StatusCode} {mode?.FirstOrDefault()}");
Console.WriteLine(await resp.Content.ReadAsStringAsync());
require "net/http"
res = Net::HTTP.get_response(URI("https://bots.catastrophic.io/sitemap.xml?mode=dead-urls"))
puts "#{res.code} #{res['X-Chaos-Sitemap-Mode']}"
puts res.body
$r = Invoke-WebRequest -Uri 'https://bots.catastrophic.io/sitemap.xml?mode=dead-urls'
$r.Headers['X-Chaos-Sitemap-Mode']
$r.Content
headers
body