diff --git a/AGENTS.md b/AGENTS.md
index dbfb6a00..1738a8eb 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -78,17 +78,17 @@ Two pieces per engine:
 
 ## Build & test
 
-A configured build dir already exists (`cmake-build-debug`, also `…-release`,
-`…-relwithdebinfo`). Typical loop:
+A configured build dir already exists (`cmake-build-relwithdebinfo`, also `…-debug`,
+`…-release`). Typical loop:
 
 ```bash
 # library
-cmake --build cmake-build-debug --target odr
+cmake --build cmake-build-relwithdebinfo --target odr
 # tests (the ODR_TEST option is on in this build dir)
-cmake --build cmake-build-debug --target odr_test
-./cmake-build-debug/test/odr_test --gtest_filter='OldMs.*'
+cmake --build cmake-build-relwithdebinfo --target odr_test
+./cmake-build-relwithdebinfo/test/odr_test --gtest_filter='OldMs.*'
 # CLI (renders a file to a directory of HTML)
-cmake --build cmake-build-debug --target translate
+cmake --build cmake-build-relwithdebinfo --target translate
 ```
 
 Notable CMake options (`CMakeLists.txt`): `ODR_TEST`, `ODR_CLI`,
diff --git a/src/odr/internal/html/pdf_file.cpp b/src/odr/internal/html/pdf_file.cpp
index 5229021d..0ef78d0c 100644
--- a/src/odr/internal/html/pdf_file.cpp
+++ b/src/odr/internal/html/pdf_file.cpp
@@ -5,6 +5,7 @@
 #include <odr/html.hpp>
 
 #include <odr/internal/abstract/file.hpp>
+#include <odr/internal/html/common.hpp>
 #include <odr/internal/html/html_service.hpp>
 #include <odr/internal/html/html_writer.hpp>
 #include <odr/internal/pdf/pdf_document.hpp>
@@ -143,7 +144,7 @@ class HtmlServiceImpl final : public HtmlService {
                 o << "bottom:" << offset[1] / 72.0 << "in;";
                 o << "font-size:" << size << "pt;";
               }));
-          out.write_raw(unicode);
+          out.write_raw(escape_text(unicode));
           out.write_element_end("span");
         } else if (op.type ==
                    pdf::GraphicsOperatorType::show_text_manual_spacing) {
diff --git a/src/odr/internal/ooxml/ooxml_util.cpp b/src/odr/internal/ooxml/ooxml_util.cpp
index 37d365eb..f8c8af7d 100644
--- a/src/odr/internal/ooxml/ooxml_util.cpp
+++ b/src/odr/internal/ooxml/ooxml_util.cpp
@@ -94,7 +94,7 @@ ooxml::read_pct_attribute(const pugi::xml_attribute attribute) {
   // potentially this should be moved to a table parser
 
   std::string val = attribute.value();
-  util::string::trim(val);
+  util::string::trim_inplace(val);
 
   if (val.find('%') != std::string::npos) {
     util::string::replace_all(val, "%", "");
diff --git a/src/odr/internal/pdf/AGENTS.md b/src/odr/internal/pdf/AGENTS.md
index 1cfa284f..ab9d2445 100644
--- a/src/odr/internal/pdf/AGENTS.md
+++ b/src/odr/internal/pdf/AGENTS.md
@@ -9,7 +9,8 @@ module (poppler / pdf2htmlEX, behind `ODR_WITH_PDF2HTMLEX`) is the
 production-quality alternative engine.
 
 **Scope today.** Parse the PDF object/file structure (classic cross-reference
-tables, cross-reference streams, object streams, hybrid files), build the page
+tables, cross-reference streams, object streams, hybrid files, with a
+forward-scan recovery path for broken cross-references), build the page
 tree with fonts and annotations, tokenize page content streams into graphics
 operators, and emit a **proof-of-concept HTML rendering**: absolutely positioned
 text spans per `Tj`, pages sized from `MediaBox`. Encrypted files are decrypted
@@ -42,6 +43,15 @@ not production-quality — the HTML path still contains debug `std::cout` output
   Lenient where the wild demands: `/Type /XRef` only warns, references to free
   or absent objects resolve to null with a `Logger` warning, `n g obj` need not
   end with a newline.
+- **Cross-reference recovery**: when the trailer-chain walk throws (missing or
+  garbage `startxref`, a broken `Prev` chain) or the document fails to build
+  (no `/Root`, offsets pointing at the wrong objects), the whole file is
+  forward-scanned for `n g obj` starts, rebuilding a synthetic xref (last
+  definition of an id wins). `trailer` dictionaries are collected for `/Root`,
+  `/Encrypt`, `/ID`; recovered `/Type /ObjStm` members are indexed as
+  compressed entries; and, when no trailer supplied a `/Root`, a `/Type
+  /Catalog` object is searched. Handles e.g. an HTTP response saved as `.pdf`
+  (every offset shifted by the header).
 - **Page tree**: `Catalog` → `Pages` (recursive) → `Page` with per-page
   `Resources` (fonts only) and `Annots` (raw dictionary only). Objects cached by
   reference (`DocumentParser::m_objects`).
@@ -186,7 +196,9 @@ surprises **throw** `std::runtime_error` (missing `obj`/`endobj`/`stream`/
 operators ignored by `execute`, annotations keep their raw dictionary, CMap
 `codespacerange`/`bfrange` parsed past without effect. References to free/absent
 objects resolve to null with a warning; unknown xref-stream entry types treated
-as absent (7.5.8.3).
+as absent (7.5.8.3). A structural throw in the cross-reference layer is not
+fatal, though: it is caught once and the file is forward-scanned to rebuild the
+table (*Cross-reference recovery* above) before giving up.
 
 **Debug output still in place.** `html/pdf_file.cpp`, `pdf_graphics_state.cpp`,
 `pdf_graphics_operator_parser.cpp` and `pdf_cmap_parser.cpp` print diagnostics
@@ -222,11 +234,16 @@ and routes its warnings through it — new diagnostics should do the same.
   variants), plus inherited-page-attribute coverage (a multi-level `Pages` tree:
   per-page resolved `MediaBox`/`CropBox`/`Rotate`/`Resources`, override vs.
   inheritance, the `CropBox` ← `MediaBox` default, the missing-`MediaBox`
-  US-Letter lenience). End-to-end: the classic fixture
+  US-Letter lenience), plus cross-reference-recovery coverage (inline broken
+  mini-PDFs: garbage prepended, a bad `startxref`, no trailer at all → catalog
+  scan, a duplicate id → last definition wins, a page tree living in an object
+  stream). End-to-end: the classic fixture
   `odr-public/pdf/style-various-1.pdf`, plus decryption of
   `odr-public/pdf/Casio_WVA-M650-7AJF.pdf` (RC4, empty password) and
   `odr-private/pdf/encrypted_fontfile3_opentype.pdf` (AES-256; skipped when the
-  private submodule is absent). The `odr-private` xref-stream/objstm/hybrid
+  private submodule is absent), and recovery of the real
+  `odr-private/pdf/order-EK52VKL0.pdf` (an HTTP response saved as `.pdf`;
+  likewise skipped when absent). The `odr-private` xref-stream/objstm/hybrid
   fixtures (`basic_text.pdf`, `geneve_1564.pdf`, `test_fail.pdf`, `Kayla….pdf`,
   `svg_background…issue402.pdf`, `Core_v5.1.pdf`, `onepage.pdf`) were verified
   manually but are not pinned in unit tests. Also still contains the original
@@ -248,31 +265,16 @@ are ordered by what they unlock; 0–2 are roughly sequential, 3 and 4 are
 independent, 5 builds on whatever pages already render. Each stage gets its own
 detailed design before implementation.
 
-## Stage 0 — file-format compatibility (prerequisite) — **mostly done**
-
-Modern producers write PDF 1.5+ structures the original parser rejected.
-Cross-reference/object streams + hybrid files, the filter framework (incl. PNG
-predictors), inherited page attributes, and encryption (RC4 / AES-128 / AES-256)
-are **all implemented** (see *What works*). The one remaining piece:
-
-**Xref recovery for broken files** (post-stage-0; the WP2 code left room):
-- Trigger: any structural throw during xref-chain walking or a failed object
-  lookup (`startxref` missing/garbage, offsets wrong).
-- Recovery: a single forward scan for `n g obj` line starts (the existing
-  sequential `read_entry` machinery is most of this), building a synthetic
-  `Xref` (last definition of an id wins), collecting `trailer` dicts and
-  `/Type /Catalog` objects as `Root` candidates; objstm members indexed by
-  scanning recovered object streams.
-- Tests fit inline strings well: the scan ignores xref offsets, so a broken
-  mini-PDF needs no offset bookkeeping — write a literal with a garbage
-  `startxref`, duplicate ids, or a missing trailer, and assert what got rebuilt.
-  Real-world fixture: `odr-private/pdf/order-EK52VKL0.pdf` — an HTTP response
-  accidentally saved as `.pdf` (starts with `HTTP/1.0 200 OK`).
-
-Remaining encryption edge cases (deferred until a real file needs them):
-per-stream `/Crypt` filter `Name` overrides, the `EncryptMetadata false`
-metadata-stream `Identity` special case, and `Perms` (Algorithm 13) validation;
-the public-key security handler and R 5 are out of scope.
+## Stage 0 — file-format compatibility (prerequisite) — **done**
+
+The prerequisite for everything below: read the structures modern producers
+write that the original parser rejected, so a real-world `.pdf` reaches the page
+tree at all. All of it has landed (see *What works*): the stream-filter
+framework (incl. PNG predictors), PDF 1.5+ cross-reference/object streams and
+hybrid files, inherited page attributes, encryption (RC4 / AES-128 / AES-256),
+and last-resort cross-reference recovery for broken files. Remaining odds and
+ends are folded into *Other known gaps* below; the staged renderer work now
+builds on a parser that opens the common corpus.
 
 ## Stage 1 — text extraction: the code → Unicode chain
 
@@ -441,6 +443,15 @@ tree, little else.
 
 ## Other known gaps
 
+- **Encryption edge cases** (deferred from stage 0 until a real file needs
+  them): per-stream `/Crypt` filter `Name` overrides, the `EncryptMetadata
+  false` metadata-stream `Identity` special case, and `Perms` (Algorithm 13)
+  validation. The public-key security handler and revision 5 are out of scope.
+- **Recovery limitations** (deferred from stage 0): when several `/Type
+  /Catalog` objects survive, the first in id order is picked rather than the
+  newest; the constructor-triggered recovery path cannot decode object streams
+  in an *encrypted* broken file (no decryptor yet), so such members go
+  unindexed. Both are edge cases beyond the corpus seen so far.
 - **Linearized files** are not handled specially (the tail-first read usually
   still works, but hint streams are ignored).
 - **CMap coverage**: only single-byte `bfchar`; `bfrange`/`codespacerange`
diff --git a/src/odr/internal/pdf/pdf_document_parser.cpp b/src/odr/internal/pdf/pdf_document_parser.cpp
index 1cc217c4..5f934135 100644
--- a/src/odr/internal/pdf/pdf_document_parser.cpp
+++ b/src/odr/internal/pdf/pdf_document_parser.cpp
@@ -9,10 +9,17 @@
 #include <odr/internal/pdf/pdf_file_parser.hpp>
 #include <odr/internal/pdf/pdf_filter.hpp>
 
+#include <odr/internal/util/stream_util.hpp>
+#include <odr/internal/util/string_util.hpp>
+
+#include <cctype>
 #include <optional>
 #include <ranges>
 #include <set>
 #include <sstream>
+#include <string_view>
+#include <utility>
+#include <vector>
 
 namespace odr::internal::pdf {
 
@@ -144,17 +151,19 @@ Resources *parse_resources(DocumentParser &parser, const Object &object,
   return resources;
 }
 
+Annotation *parse_annotation(Document &document, const Dictionary &dictionary) {
+  auto *annotation = document.create_element<Annotation>();
+  annotation->object = Object(dictionary);
+  return annotation;
+}
+
 Annotation *parse_annotation(DocumentParser &parser,
                              const ObjectReference &reference,
                              Document &document) {
-  auto *annotation = document.create_element<Annotation>();
-
   IndirectObject object = parser.read_object(reference);
-  const Dictionary &dictionary = object.object.as_dictionary();
-
+  Annotation *annotation =
+      parse_annotation(document, object.object.as_dictionary());
   annotation->object_reference = reference;
-  annotation->object = Object(dictionary);
-
   return annotation;
 }
 
@@ -175,12 +184,19 @@ Page *parse_page(DocumentParser &parser, const ObjectReference &reference,
   const Object resources = attributes.resolve_into(*page, parser, reference);
   page->resources = parse_resources(parser, resources, document);
 
-  if (dictionary["Contents"].is_reference()) {
-    page->contents_reference = {dictionary["Contents"].as_reference()};
-  } else {
-    for (const Object &e : dictionary["Contents"].as_array()) {
+  // /Contents is a content stream or an array of them, supplied directly or
+  // through an indirect reference (7.7.3.3). Resolve a reference first so that
+  // a reference to an array is expanded into its stream references rather than
+  // mistaken for a single stream.
+  const Object &contents = dictionary["Contents"];
+  const Object resolved_contents =
+      contents.is_reference() ? parser.resolve_object_copy(contents) : contents;
+  if (resolved_contents.is_array()) {
+    for (const Object &e : resolved_contents.as_array()) {
       page->contents_reference.push_back(e.as_reference());
     }
+  } else if (contents.is_reference()) {
+    page->contents_reference = {contents.as_reference()};
   }
 
   if (dictionary.has_key("Annots")) {
@@ -188,8 +204,15 @@ Page *parse_page(DocumentParser &parser, const ObjectReference &reference,
     Array annotations =
         parser.resolve_object_copy(dictionary["Annots"]).as_array();
     for (const Object &annotation : annotations) {
-      page->annotations.push_back(
-          parse_annotation(parser, annotation.as_reference(), document));
+      // entries are usually indirect references, but inline annotation
+      // dictionaries are equally valid (12.5.2)
+      if (annotation.is_reference()) {
+        page->annotations.push_back(
+            parse_annotation(parser, annotation.as_reference(), document));
+      } else if (annotation.is_dictionary()) {
+        page->annotations.push_back(
+            parse_annotation(document, annotation.as_dictionary()));
+      }
     }
   }
 
@@ -258,9 +281,16 @@ DocumentParser::DocumentParser(std::unique_ptr<std::istream> in,
                                std::optional<Decryptor> decryptor,
                                const Logger &logger)
     : m_stream(std::move(in)), m_parser(*m_stream), m_logger{&logger} {
-  auto [xref, trailer] = read_trailer_chain();
-  m_xref = std::move(xref);
-  m_trailer = std::move(trailer);
+  try {
+    auto [xref, trailer] = read_trailer_chain();
+    m_xref = std::move(xref);
+    m_trailer = std::move(trailer);
+  } catch (const std::exception &e) {
+    ODR_WARNING(*m_logger, "pdf: cross-reference parsing failed ("
+                               << e.what()
+                               << "), scanning the file to recover");
+    recover_xref();
+  }
 
   if (m_trailer.has_key("Encrypt")) {
     // Build an `Authenticator` from the trailer `/Encrypt` and `/ID`
@@ -571,6 +601,217 @@ std::pair<Xref, Dictionary> DocumentParser::read_trailer_chain() {
   return {std::move(result_xref), std::move(result_trailer).value()};
 }
 
+namespace {
+
+/// Trim leading and trailing PDF whitespace from `line`, returning the offset
+/// of the first non-whitespace byte (so the caller can map back to a file
+/// position) and a view of the trimmed content.
+std::pair<std::size_t, std::string_view> trim_line(const std::string &line) {
+  const std::string_view content =
+      util::string::trim_view(line, &ObjectParser::is_whitespace);
+  // `content` is a subrange of `line`, so the leading offset is the distance
+  // between their data pointers.
+  return {static_cast<std::size_t>(content.data() - line.data()), content};
+}
+
+/// Recognize an `n g obj` object header at the start of `content` (already
+/// trimmed). The dictionary/value may follow on the same line (`12 0 obj<<`),
+/// so only the leading `id gen obj` token is required.
+std::optional<ObjectReference> match_object_start(std::string_view content) {
+  util::stream::ViewStreamBuf buffer(content);
+  std::istream stream(&buffer);
+  ObjectParser parser(stream);
+
+  // `peek_unsigned_integer` guards each read so a non-matching line is rejected
+  // without `read_unsigned_integer` throwing (the common case while scanning).
+  if (!parser.peek_unsigned_integer()) {
+    return std::nullopt;
+  }
+  const UnsignedInteger id = parser.read_unsigned_integer();
+  if (!parser.peek_whitespace()) {
+    return std::nullopt;
+  }
+  parser.skip_whitespace();
+  if (!parser.peek_unsigned_integer()) {
+    return std::nullopt;
+  }
+  const UnsignedInteger gen = parser.read_unsigned_integer();
+  if (!parser.peek_whitespace()) {
+    return std::nullopt;
+  }
+  parser.skip_whitespace();
+
+  // the `obj` keyword must follow; guard against identifiers like `object`
+  // that merely start with `obj`
+  const std::string rest = parser.read_line();
+  const std::string_view tail(rest);
+  if (!tail.starts_with("obj")) {
+    return std::nullopt;
+  }
+  if (tail.size() > 3 &&
+      (std::isalnum(static_cast<unsigned char>(tail[3])) || tail[3] == '.')) {
+    return std::nullopt;
+  }
+  return ObjectReference(id, gen);
+}
+
+/// True if `content` (already trimmed) ends with the `stream` keyword on a word
+/// boundary. This covers both a bare `stream` line and a compact object that
+/// inlines its dictionary and the `stream` token on one line
+/// (`N G obj<<...>>stream`). The boundary check rejects `endstream` and
+/// identifiers that merely end in `stream`.
+bool opens_stream_body(std::string_view content) {
+  constexpr std::string_view keyword = "stream";
+  if (!content.ends_with(keyword)) {
+    return false;
+  }
+  const std::size_t begin = content.size() - keyword.size();
+  return begin == 0 ||
+         !std::isalnum(static_cast<unsigned char>(content[begin - 1]));
+}
+
+} // namespace
+
+void DocumentParser::recover_xref() {
+  // Offsets from the failed attempt may be wrong, so anything cached from it is
+  // suspect.
+  m_objects.clear();
+  m_object_streams.clear();
+  m_recovered = true;
+
+  ObjectParser &p = parser().parser();
+  std::istream &stream = in();
+  stream.clear();
+  stream.seekg(0, std::ios::end);
+  const auto size = static_cast<std::uint32_t>(stream.tellg());
+
+  Xref xref;
+  Dictionary trailer;
+
+  stream.seekg(0);
+  while (true) {
+    stream.clear(); // drop any eofbit set by the previous read_line
+    const std::int64_t tell = stream.tellg();
+    if (tell < 0 || static_cast<std::uint32_t>(tell) >= size) {
+      break;
+    }
+    const auto position = static_cast<std::uint32_t>(tell);
+
+    const std::string line = p.read_line();
+    const auto [lead, content] = trim_line(line);
+
+    if (std::optional<ObjectReference> ref = match_object_start(content)) {
+      // last definition of an id wins (operator[] overwrites)
+      xref.table[*ref] = Xref::Entry(
+          Xref::UsedEntry{static_cast<std::uint32_t>(position + lead)});
+      // A compact object may inline its dictionary and the `stream` token on
+      // this same line; fall through to skip the body below. Otherwise the
+      // header is fully consumed and we advance to the next line.
+      if (!opens_stream_body(content)) {
+        continue;
+      }
+    }
+
+    if (opens_stream_body(content)) {
+      // Skip the stream body so its (possibly object-shaped) bytes are not
+      // mis-scanned. The length is unknown here, so scan past `endstream`.
+      stream.clear();
+      p.skip_past("endstream");
+      continue;
+    }
+
+    if (content.starts_with("trailer")) {
+      const std::int64_t after = stream.tellg(); // start of the next line
+      try {
+        stream.clear();
+        stream.seekg(static_cast<std::int64_t>(position + lead) +
+                     7); // "trailer"
+        p.skip_whitespace();
+        for (const Dictionary dict = p.read_dictionary();
+             const auto &[key, value] : dict) {
+          trailer[key] = value; // last trailer wins per key
+        }
+      } catch (const std::exception &) {
+        // ignore a malformed trailer and keep scanning
+      }
+      stream.clear();
+      if (after >= 0) {
+        stream.seekg(after);
+      }
+      continue;
+    }
+  }
+
+  m_xref = std::move(xref);
+  m_trailer = std::move(trailer);
+
+  index_object_streams();
+
+  if (!m_trailer.has_key("Root")) {
+    recover_root();
+  }
+}
+
+void DocumentParser::index_object_streams() {
+  // Snapshot the directly recovered objects: reading object streams adds
+  // compressed entries, which would invalidate an in-flight iterator.
+  std::vector<ObjectReference> candidates;
+  for (const auto &[reference, entry] : m_xref.table) {
+    if (entry.is_used()) {
+      candidates.push_back(reference);
+    }
+  }
+
+  for (const ObjectReference &reference : candidates) {
+    try {
+      const IndirectObject &object = read_object(reference);
+      if (!object.has_stream || !object.object.is_dictionary()) {
+        continue;
+      }
+      const Dictionary &dictionary = object.object.as_dictionary();
+      if (!dictionary.has_key("Type") || !dictionary["Type"].is_name() ||
+          dictionary["Type"].as_name() != "ObjStm") {
+        continue;
+      }
+      const ObjectStream &members = load_object_stream(reference.id);
+      for (std::size_t i = 0; i < members.size(); ++i) {
+        // a directly recovered object wins over its compressed copy
+        m_xref.table.try_emplace(ObjectReference(members[i].id, 0),
+                                 Xref::Entry(Xref::CompressedEntry{
+                                     static_cast<std::uint32_t>(reference.id),
+                                     static_cast<std::uint32_t>(i)}));
+      }
+    } catch (const std::exception &) {
+      // an unreadable (or, when encrypted, undecryptable) object stream is
+      // simply not indexed
+    }
+  }
+}
+
+void DocumentParser::recover_root() {
+  for (const auto &[reference, entry] : m_xref.table) {
+    if (entry.is_free()) {
+      continue;
+    }
+    try {
+      const IndirectObject &object = read_object(reference);
+      if (!object.object.is_dictionary()) {
+        continue;
+      }
+      const Dictionary &dictionary = object.object.as_dictionary();
+      if (dictionary.has_key("Type") && dictionary["Type"].is_name() &&
+          dictionary["Type"].as_name() == "Catalog") {
+        ODR_WARNING(*m_logger, "pdf: recovered document catalog " << reference);
+        m_trailer["Root"] = Object(reference);
+        return;
+      }
+    } catch (const std::exception &) {
+      // skip objects that fail to read during the catalog search
+    }
+  }
+  ODR_WARNING(*m_logger, "pdf: recovery found no document catalog");
+}
+
 void DocumentParser::decrypt_strings(Object &object,
                                      const ObjectReference &reference) {
   if (object.is_standard_string()) {
@@ -599,7 +840,21 @@ DocumentParser::build_document(const Dictionary &trailer) {
 }
 
 std::unique_ptr<Document> DocumentParser::parse_document() {
-  return build_document(m_trailer);
+  try {
+    return build_document(m_trailer);
+  } catch (const std::exception &e) {
+    // The cross-reference table parsed cleanly but does not describe a usable
+    // document (no `/Root`, offsets pointing at the wrong objects, …). Scan the
+    // file once and retry; if recovery already ran, give up.
+    if (m_recovered) {
+      throw;
+    }
+    ODR_WARNING(*m_logger, "pdf: building the document failed ("
+                               << e.what()
+                               << "), scanning the file to recover");
+    recover_xref();
+    return build_document(m_trailer);
+  }
 }
 
 void DocumentParser::resolve_object(Object &object) {
diff --git a/src/odr/internal/pdf/pdf_document_parser.hpp b/src/odr/internal/pdf/pdf_document_parser.hpp
index ffd60556..e22502f2 100644
--- a/src/odr/internal/pdf/pdf_document_parser.hpp
+++ b/src/odr/internal/pdf/pdf_document_parser.hpp
@@ -110,6 +110,21 @@ class DocumentParser {
   /// table together with the newest (first-seen) trailer dictionary.
   [[nodiscard]] std::pair<Xref, Dictionary> read_trailer_chain();
 
+  /// Last-resort cross-reference recovery for broken files (missing/garbage
+  /// `startxref`, wrong offsets, a damaged chain): forward-scan the whole file
+  /// for `n g obj` starts, rebuilding `m_xref` (last definition of an id wins)
+  /// and collecting `trailer` dictionaries into `m_trailer`. Then object-stream
+  /// members are indexed (`index_object_streams`) and, if no `trailer` supplied
+  /// a `/Root`, a `/Type /Catalog` object is searched (`recover_root`). Sets
+  /// `m_recovered`. Any object cached from the failed attempt is dropped first.
+  void recover_xref();
+  /// Index the members of every recovered `/Type /ObjStm` object as compressed
+  /// cross-reference entries (additive; an existing direct entry wins).
+  void index_object_streams();
+  /// Search the recovered objects for a `/Type /Catalog` and install it as the
+  /// trailer `/Root`.
+  void recover_root();
+
   [[nodiscard]] std::unique_ptr<Document>
   build_document(const Dictionary &trailer);
 
@@ -125,6 +140,7 @@ class DocumentParser {
 
   Xref m_xref;
   Dictionary m_trailer;
+  bool m_recovered{false};
 
   bool m_is_encrypted{false};
   std::optional<Authenticator> m_authenticator;
diff --git a/src/odr/internal/pdf/pdf_file_parser.cpp b/src/odr/internal/pdf/pdf_file_parser.cpp
index 4938f069..4696b730 100644
--- a/src/odr/internal/pdf/pdf_file_parser.cpp
+++ b/src/odr/internal/pdf/pdf_file_parser.cpp
@@ -31,7 +31,10 @@ IndirectObject FileParser::read_indirect_object() {
   result.object = m_parser.read_object();
   m_parser.skip_whitespace();
 
-  const std::string next = m_parser.read_line();
+  // the keyword may carry trailing whitespace (`endobj \n`) or a CR from a
+  // CRLF line ending (`stream\r\n`), so compare against the trimmed token
+  std::string next = m_parser.read_line();
+  util::string::rtrim_inplace(next);
 
   if (next == "endobj") {
     m_parser.skip_whitespace();
@@ -125,7 +128,7 @@ std::string FileParser::read_stream(const std::int32_t size) {
     // TODO improve poor solution
     while (true) {
       std::string line = m_parser.read_line(true);
-      if (line == "endstream\n") {
+      if (util::string::trim(line) == "endstream") {
         result.pop_back();
         break;
       }
@@ -134,10 +137,7 @@ std::string FileParser::read_stream(const std::int32_t size) {
   }
 
   m_parser.skip_whitespace();
-  if (const std::string line = m_parser.read_line(); line != "endobj") {
-    throw std::runtime_error("expected endobj");
-  }
-
+  m_parser.expect_characters("endobj");
   m_parser.skip_whitespace();
 
   return result;
diff --git a/src/odr/internal/pdf/pdf_graphics_operator_parser.cpp b/src/odr/internal/pdf/pdf_graphics_operator_parser.cpp
index 7621f05d..cb9e3d0b 100644
--- a/src/odr/internal/pdf/pdf_graphics_operator_parser.cpp
+++ b/src/odr/internal/pdf/pdf_graphics_operator_parser.cpp
@@ -166,9 +166,33 @@ GraphicsOperator GraphicsOperatorParser::read_operator() {
     std::cerr << "unknown operator: " << operator_name << std::endl;
   }
 
+  // After `ID` the raw image bytes follow inline; consume them up to `EI` so
+  // they are not mis-tokenized as operators (which corrupts the parse state).
+  if (result.type == GraphicsOperatorType::begin_inline_image_data) {
+    skip_inline_image_data();
+  }
+
   m_parser.skip_whitespace();
 
   return result;
 }
 
+void GraphicsOperatorParser::skip_inline_image_data() {
+  // Exactly one white-space character separates `ID` from the data (8.9.7).
+  if (m_parser.geti() != eof) {
+    m_parser.bumpc();
+  }
+
+  // The length is not encoded, so scan for the `EI` terminator. `EI` also
+  // occurs inside the raw image bytes, so only accept one that is followed by
+  // white-space or eof; otherwise keep scanning past it.
+  while (m_parser.skip_past("EI")) {
+    const int_type after = m_parser.geti();
+    if (after == eof ||
+        ObjectParser::is_whitespace(static_cast<char_type>(after))) {
+      return;
+    }
+  }
+}
+
 } // namespace odr::internal::pdf
diff --git a/src/odr/internal/pdf/pdf_graphics_operator_parser.hpp b/src/odr/internal/pdf/pdf_graphics_operator_parser.hpp
index 3adf02a1..5a34c957 100644
--- a/src/odr/internal/pdf/pdf_graphics_operator_parser.hpp
+++ b/src/odr/internal/pdf/pdf_graphics_operator_parser.hpp
@@ -18,6 +18,10 @@ class GraphicsOperatorParser {
   [[nodiscard]] GraphicsOperator read_operator();
 
 private:
+  // Consume the binary image data of an inline image, from just after the `ID`
+  // keyword up to and including its `EI` terminator (8.9.7).
+  void skip_inline_image_data();
+
   ObjectParser m_parser;
 };
 
diff --git a/src/odr/internal/pdf/pdf_graphics_state.cpp b/src/odr/internal/pdf/pdf_graphics_state.cpp
index b242e2b3..c7a790d9 100644
--- a/src/odr/internal/pdf/pdf_graphics_state.cpp
+++ b/src/odr/internal/pdf/pdf_graphics_state.cpp
@@ -63,7 +63,7 @@ void GraphicsState::execute(const GraphicsOperator &op) {
     std::cout << "dash pattern not implemented" << std::endl;
     break;
   case GraphicsOperatorType::set_color_rendering_intent:
-    current().general.color_rendering_intent = op.arguments.at(0).as_real();
+    current().general.color_rendering_intent = op.arguments.at(0).as_name();
     break;
   case GraphicsOperatorType::set_flatness_tolerance:
     current().general.flatness_tolerance = op.arguments.at(0).as_real();
diff --git a/src/odr/internal/pdf/pdf_graphics_state.hpp b/src/odr/internal/pdf/pdf_graphics_state.hpp
index 6e4ea9e0..b65079f2 100644
--- a/src/odr/internal/pdf/pdf_graphics_state.hpp
+++ b/src/odr/internal/pdf/pdf_graphics_state.hpp
@@ -22,7 +22,7 @@ struct GraphicsState {
     int join_style{};
     double miter_limit{};
     int dash_pattern{};
-    double color_rendering_intent{};
+    std::string color_rendering_intent;
     double flatness_tolerance{};
     std::string graphics_state_parameters;
     std::array<double, 6> transform_matrix{1, 0, 0, 1, 0, 0};
diff --git a/src/odr/internal/pdf/pdf_object_parser.cpp b/src/odr/internal/pdf/pdf_object_parser.cpp
index 84a6540d..357cf5ea 100644
--- a/src/odr/internal/pdf/pdf_object_parser.cpp
+++ b/src/odr/internal/pdf/pdf_object_parser.cpp
@@ -7,6 +7,7 @@
 #include <sstream>
 #include <stdexcept>
 #include <utility>
+#include <vector>
 
 namespace odr::internal::pdf {
 
@@ -121,6 +122,45 @@ std::string ObjectParser::read_line(const bool inclusive) {
   return util::stream::read_line(in(), inclusive);
 }
 
+bool ObjectParser::skip_past(const std::string_view marker) {
+  if (marker.empty()) {
+    return true;
+  }
+
+  // KMP failure function over the (typically tiny) marker, so the streaming
+  // scan stays correct even when the marker has internal repetition (e.g. the
+  // two `e`s in `endstream`).
+  std::vector<std::size_t> fail(marker.size(), 0);
+  for (std::size_t i = 1, k = 0; i < marker.size(); ++i) {
+    while (k > 0 && marker[i] != marker[k]) {
+      k = fail[k - 1];
+    }
+    if (marker[i] == marker[k]) {
+      ++k;
+    }
+    fail[i] = k;
+  }
+
+  std::size_t matched = 0;
+  while (true) {
+    const int_type c = sb().sbumpc();
+    if (c == eof) {
+      in().setstate(std::ios::eofbit);
+      return false;
+    }
+    const auto ch = static_cast<char_type>(c);
+    while (matched > 0 && ch != marker[matched]) {
+      matched = fail[matched - 1];
+    }
+    if (ch == marker[matched]) {
+      ++matched;
+      if (matched == marker.size()) {
+        return true;
+      }
+    }
+  }
+}
+
 void ObjectParser::expect_characters(const std::string &string) {
   const std::string observed = bumpnc(string.size());
   if (observed != string) {
@@ -135,20 +175,38 @@ bool ObjectParser::peek_number() {
   return c != eof && (c == '+' || c == '-' || c == '.' || std::isdigit(c));
 }
 
-UnsignedInteger ObjectParser::read_unsigned_integer() {
+bool ObjectParser::peek_unsigned_integer() {
+  const int_type c = geti();
+  return c != eof && std::isdigit(c);
+}
+
+std::pair<UnsignedInteger, std::uint32_t>
+ObjectParser::read_unsigned_integer_and_count() {
   UnsignedInteger result = 0;
+  std::uint32_t count = 0;
 
   while (true) {
     const int_type c = geti();
     if (c == eof) {
-      return result;
+      break;
     }
     if (!std::isdigit(c)) {
-      return result;
+      break;
     }
     result = result * 10 + (c - '0');
+    ++count;
     bumpc();
   }
+
+  if (count == 0) {
+    throw std::runtime_error("expected unsigned integer, but got none");
+  }
+
+  return {result, count};
+}
+
+UnsignedInteger ObjectParser::read_unsigned_integer() {
+  return read_unsigned_integer_and_count().first;
 }
 
 Integer ObjectParser::read_integer() {
@@ -172,23 +230,34 @@ Real ObjectParser::read_number() {
 }
 
 std::variant<Integer, Real> ObjectParser::read_integer_or_real() {
-  Integer i = 0;
+  Integer sign = 1;
+  if (geti() == '-') {
+    sign = -1;
+    bumpc();
+  } else if (geti() == '+') {
+    bumpc();
+  }
 
-  if (char_type c = getc(); c != '.') {
-    i = read_integer();
-    c = getc();
-    if (c != '.') {
-      return i;
-    }
+  UnsignedInteger i = 0;
+
+  if (geti() != '.') {
+    i = read_unsigned_integer();
+  }
+  if (geti() != '.') {
+    return static_cast<Integer>(sign * i);
   }
   bumpc();
 
-  const pos_type begin = in().tellg();
-  const UnsignedInteger i2 = read_unsigned_integer();
-  const pos_type end = in().tellg();
+  Real r = static_cast<Real>(i);
+
+  if (peek_unsigned_integer()) {
+    const auto [fraction, decimals] = read_unsigned_integer_and_count();
+    // `decimals` is unsigned; negate as floating point to avoid wrap-around.
+    r += static_cast<Real>(fraction) *
+         std::pow(10.0, -static_cast<Real>(decimals));
+  }
 
-  return static_cast<Real>(i) +
-         static_cast<Real>(i2) * std::pow(10.0, begin - end);
+  return static_cast<Real>(sign) * r;
 }
 
 bool ObjectParser::peek_name() {
diff --git a/src/odr/internal/pdf/pdf_object_parser.hpp b/src/odr/internal/pdf/pdf_object_parser.hpp
index 4558bc4b..8ee00ab4 100644
--- a/src/odr/internal/pdf/pdf_object_parser.hpp
+++ b/src/odr/internal/pdf/pdf_object_parser.hpp
@@ -5,6 +5,7 @@
 #include <array>
 #include <istream>
 #include <stdexcept>
+#include <string_view>
 #include <variant>
 
 namespace odr::internal::pdf {
@@ -45,9 +46,16 @@ class ObjectParser {
   void skip_whitespace();
   void skip_line();
   std::string read_line(bool inclusive = false);
+  /// Advance the cursor just past the next occurrence of `marker`. Returns true
+  /// if it was found; on false the stream has been consumed to eof. Operates on
+  /// raw bytes, so the marker may straddle line breaks.
+  bool skip_past(std::string_view marker);
   void expect_characters(const std::string &string);
 
   [[nodiscard]] bool peek_number();
+  [[nodiscard]] bool peek_unsigned_integer();
+  [[nodiscard]] std::pair<UnsignedInteger, std::uint32_t>
+  read_unsigned_integer_and_count();
   [[nodiscard]] UnsignedInteger read_unsigned_integer();
   [[nodiscard]] Integer read_integer();
   [[nodiscard]] Real read_number();
diff --git a/src/odr/internal/util/stream_util.hpp b/src/odr/internal/util/stream_util.hpp
index 04ebbcc5..01cd065f 100644
--- a/src/odr/internal/util/stream_util.hpp
+++ b/src/odr/internal/util/stream_util.hpp
@@ -1,10 +1,26 @@
 #pragma once
 
 #include <iosfwd>
+#include <streambuf>
 #include <string>
+#include <string_view>
 
 namespace odr::internal::util::stream {
 
+/// Read-only stream buffer over an existing `string_view`, so a `std::istream`
+/// can scan it without copying into a `std::string`/`std::istringstream`. The
+/// view must outlive the buffer. Only the get area is exposed; seeking is not
+/// supported.
+class ViewStreamBuf : public std::streambuf {
+public:
+  explicit ViewStreamBuf(std::string_view view) {
+    // The get area is only ever read, never written through, so dropping the
+    // `const` is safe.
+    char *begin = const_cast<char *>(view.data());
+    setg(begin, begin, begin + view.size());
+  }
+};
+
 std::string read(std::istream &in);
 std::string read(std::istream &in, std::size_t size);
 
diff --git a/src/odr/internal/util/string_util.cpp b/src/odr/internal/util/string_util.cpp
index 9a926a6a..661e2fa5 100644
--- a/src/odr/internal/util/string_util.cpp
+++ b/src/odr/internal/util/string_util.cpp
@@ -1,6 +1,7 @@
 #include <odr/internal/util/string_util.hpp>
 
 #include <algorithm>
+#include <cctype>
 #include <cstdint>
 #include <iomanip>
 #include <sstream>
@@ -19,22 +20,61 @@ bool string::ends_with(const std::string &string, const std::string &with) {
              0;
 }
 
-void string::ltrim(std::string &s) {
-  s.erase(s.begin(), std::ranges::find_if(s, [](const std::uint8_t ch) {
-            return !std::isspace(ch);
+bool string::is_ascii_space(const char c) {
+  return std::isspace(static_cast<unsigned char>(c)) != 0;
+}
+
+void string::ltrim_inplace(std::string &s, const CharPredicate is_space) {
+  s.erase(s.begin(), std::ranges::find_if(s, [is_space](const char ch) {
+            return !is_space(ch);
           }));
 }
 
-void string::rtrim(std::string &s) {
+void string::rtrim_inplace(std::string &s, const CharPredicate is_space) {
   s.erase(std::find_if(s.rbegin(), s.rend(),
-                       [](const std::uint8_t ch) { return !std::isspace(ch); })
+                       [is_space](const char ch) { return !is_space(ch); })
               .base(),
           s.end());
 }
 
-void string::trim(std::string &s) {
-  rtrim(s);
-  ltrim(s);
+void string::trim_inplace(std::string &s, const CharPredicate is_space) {
+  rtrim_inplace(s, is_space);
+  ltrim_inplace(s, is_space);
+}
+
+std::string string::ltrim(const std::string &s, const CharPredicate is_space) {
+  return std::string(ltrim_view(s, is_space));
+}
+
+std::string string::rtrim(const std::string &s, const CharPredicate is_space) {
+  return std::string(rtrim_view(s, is_space));
+}
+
+std::string string::trim(const std::string &s, const CharPredicate is_space) {
+  return std::string(trim_view(s, is_space));
+}
+
+std::string_view string::ltrim_view(std::string_view s,
+                                    const CharPredicate is_space) {
+  std::size_t begin = 0;
+  while (begin < s.size() && is_space(s[begin])) {
+    ++begin;
+  }
+  return s.substr(begin);
+}
+
+std::string_view string::rtrim_view(std::string_view s,
+                                    const CharPredicate is_space) {
+  std::size_t end = s.size();
+  while (end > 0 && is_space(s[end - 1])) {
+    --end;
+  }
+  return s.substr(0, end);
+}
+
+std::string_view string::trim_view(std::string_view s,
+                                   const CharPredicate is_space) {
+  return ltrim_view(rtrim_view(s, is_space), is_space);
 }
 
 void string::replace_all(std::string &string, const std::string &search,
diff --git a/src/odr/internal/util/string_util.hpp b/src/odr/internal/util/string_util.hpp
index d91e2525..42f8fb83 100644
--- a/src/odr/internal/util/string_util.hpp
+++ b/src/odr/internal/util/string_util.hpp
@@ -2,6 +2,7 @@
 
 #include <functional>
 #include <string>
+#include <string_view>
 #include <vector>
 
 namespace odr::internal::util::string {
@@ -9,9 +10,33 @@ namespace odr::internal::util::string {
 bool starts_with(const std::string &string, const std::string &with);
 bool ends_with(const std::string &string, const std::string &with);
 
-void ltrim(std::string &s);
-void rtrim(std::string &s);
-void trim(std::string &s);
+/// Predicate deciding whether a byte counts as whitespace for the `*_view`
+/// trims. Takes a single `char`; implementations must handle the full byte
+/// range without relying on the sign of `char`.
+using CharPredicate = bool (*)(char);
+
+/// `std::isspace` for the default C locale, made safe for any `char` value.
+bool is_ascii_space(char c);
+
+void ltrim_inplace(std::string &s, CharPredicate is_space = is_ascii_space);
+void rtrim_inplace(std::string &s, CharPredicate is_space = is_ascii_space);
+void trim_inplace(std::string &s, CharPredicate is_space = is_ascii_space);
+
+std::string ltrim(const std::string &s,
+                  CharPredicate is_space = is_ascii_space);
+std::string rtrim(const std::string &s,
+                  CharPredicate is_space = is_ascii_space);
+std::string trim(const std::string &s, CharPredicate is_space = is_ascii_space);
+
+/// Trim leading/trailing whitespace and return a view into `s`. The result is a
+/// subrange of `s`, so the leading offset is recoverable as
+/// `result.data() - s.data()`.
+std::string_view ltrim_view(std::string_view s,
+                            CharPredicate is_space = is_ascii_space);
+std::string_view rtrim_view(std::string_view s,
+                            CharPredicate is_space = is_ascii_space);
+std::string_view trim_view(std::string_view s,
+                           CharPredicate is_space = is_ascii_space);
 
 void replace_all(std::string &string, const std::string &search,
                  const std::string &replace);
diff --git a/test/data/reference-output/odr-private b/test/data/reference-output/odr-private
index c6cad8af..f8f00288 160000
--- a/test/data/reference-output/odr-private
+++ b/test/data/reference-output/odr-private
@@ -1 +1 @@
-Subproject commit c6cad8afe5795d343d3d8cfc634368694d40fc3b
+Subproject commit f8f00288248d2d7aef0e113289feaf5fdc69510b
diff --git a/test/data/reference-output/odr-public b/test/data/reference-output/odr-public
index f62e13cd..9b3c8a3c 160000
--- a/test/data/reference-output/odr-public
+++ b/test/data/reference-output/odr-public
@@ -1 +1 @@
-Subproject commit f62e13cdba20b099622b0091e6abbcc0675f378b
+Subproject commit 9b3c8a3c5c7d97afd206ba29f5682c37faa527b4
diff --git a/test/src/html_output_test.cpp b/test/src/html_output_test.cpp
index 2c55789b..b15cab82 100644
--- a/test/src/html_output_test.cpp
+++ b/test/src/html_output_test.cpp
@@ -70,14 +70,6 @@ TEST_P(HtmlOutputTests, html_meta) {
     GTEST_SKIP();
   }
 
-  // TODO fix pdf implementation
-  if (engine == DecoderEngine::odr &&
-      test_file.type == FileType::portable_document_format &&
-      (test_file.short_path.starts_with("odr-private") ||
-       test_file.short_path == "odr-public/pdf/Casio_WVA-M650-7AJF.pdf")) {
-    GTEST_SKIP();
-  }
-
   DecodePreference decode_preference;
   decode_preference.as_file_type = test_file.type;
   decode_preference.with_engine = engine;
diff --git a/test/src/internal/pdf/pdf_document_parser.cpp b/test/src/internal/pdf/pdf_document_parser.cpp
index 32798714..5c9ce769 100644
--- a/test/src/internal/pdf/pdf_document_parser.cpp
+++ b/test/src/internal/pdf/pdf_document_parser.cpp
@@ -10,6 +10,7 @@
 
 #include <internal/pdf/pdf_test_file_builder.hpp>
 
+#include <cstring>
 #include <filesystem>
 #include <memory>
 #include <optional>
@@ -208,6 +209,135 @@ TEST(DocumentParser, inherited_page_attributes) {
   EXPECT_EQ(page6->rotate, 90);
 }
 
+// Recovery: a valid file with garbage prepended (the real fixture
+// `order-EK52VKL0.pdf` is an HTTP response saved as `.pdf`) has every xref
+// offset and the `startxref` shifted, so the chain walk fails. A forward scan
+// rebuilds the table from the actual object positions.
+TEST(DocumentParser, recovers_from_prepended_garbage) {
+  const std::string pdf =
+      "HTTP/1.0 200 OK\r\nContent-Type: application/pdf\r\n\r\n" +
+      two_object_mini_pdf(true);
+  check_mini_pdf(pdf);
+}
+
+// Recovery: the `startxref` points nowhere, so locating the table fails.
+TEST(DocumentParser, recovers_from_garbage_startxref) {
+  std::string pdf = two_object_mini_pdf(true);
+  const std::size_t pos = pdf.find("startxref\n") + std::strlen("startxref\n");
+  pdf.replace(pos, pdf.find('\n', pos) - pos, "999999");
+  check_mini_pdf(pdf);
+}
+
+// Recovery: no `xref`/`trailer`/`startxref` at all — the catalog is found by
+// scanning the recovered objects for `/Type /Catalog`.
+TEST(DocumentParser, recovers_root_from_catalog_scan) {
+  const std::string pdf =
+      "%PDF-1.7\n"
+      "1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n"
+      "2 0 obj\n<< /Type /Pages /Kids [3 0 R] /Count 1 >>\nendobj\n"
+      "3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] "
+      "/Resources << >> /Contents 4 0 R >>\nendobj\n"
+      "4 0 obj\n<< /Length 5 >>\nstream\nBT ET\nendstream\nendobj\n"
+      "%%EOF\n";
+  check_mini_pdf(pdf);
+}
+
+// Recovery: an id defined more than once (e.g. a botched incremental update)
+// resolves to the last definition in the file.
+TEST(DocumentParser, recovery_last_definition_wins) {
+  const std::string pdf =
+      "%PDF-1.7\n"
+      "1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n"
+      "2 0 obj\n<< /Type /Pages /Kids [3 0 R] /Count 1 >>\nendobj\n"
+      "3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 100 100] "
+      "/Resources << >> /Contents 4 0 R >>\nendobj\n"
+      "4 0 obj\n<< /Length 5 >>\nstream\nBT ET\nendstream\nendobj\n"
+      "3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 200 200] "
+      "/Resources << >> /Contents 4 0 R >>\nendobj\n"
+      "trailer\n<< /Root 1 0 R >>\n%%EOF\n";
+
+  DocumentParser parser(std::make_unique<std::istringstream>(pdf));
+  const std::unique_ptr<Document> document = parser.parse_document();
+  const std::vector<Page *> pages = document->collect_pages();
+
+  ASSERT_EQ(pages.size(), 1);
+  EXPECT_EQ(pages[0]->media_box.as_array()[2].as_real(), 200.0);
+}
+
+// Recovery: an object that inlines its dictionary and the `stream` token on a
+// single line (`N G obj<<...>>stream`) must still have its body skipped, so
+// object-shaped bytes inside the stream (here a fake `1 0 obj`) do not
+// overwrite the real recovered entry.
+TEST(DocumentParser, recovery_skips_same_line_stream_body) {
+  const std::string pdf =
+      "%PDF-1.7\n"
+      "1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n"
+      "2 0 obj\n<< /Type /Pages /Kids [3 0 R] /Count 1 >>\nendobj\n"
+      "3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] "
+      "/Resources << >> /Contents 4 0 R >>\nendobj\n"
+      // dictionary and `stream` keyword share the object's first line; the body
+      // contains a decoy `1 0 obj` that must not be recorded
+      "4 0 obj<< /Length 20 >>stream\n1 0 obj garbage BT "
+      "ET\nendstream\nendobj\n"
+      "trailer\n<< /Root 1 0 R >>\n%%EOF\n";
+
+  DocumentParser parser(std::make_unique<std::istringstream>(pdf));
+  const std::unique_ptr<Document> document = parser.parse_document();
+  const std::vector<Page *> pages = document->collect_pages();
+
+  ASSERT_EQ(pages.size(), 1);
+  // the real catalog (object 1) survived; the decoy inside the stream did not
+  // clobber it
+  EXPECT_EQ(pages[0]->media_box.as_array()[2].as_real(), 612.0);
+}
+
+// Recovery: the page tree lives in an (uncompressed) object stream. After the
+// forward scan finds the stream, its members are indexed as compressed entries
+// so the catalog and pages resolve.
+TEST(DocumentParser, recovers_object_stream_members) {
+  const std::vector<std::pair<int, std::string>> members = {
+      {2, "<< /Type /Catalog /Pages 3 0 R >>"},
+      {3, "<< /Type /Pages /Kids [4 0 R] /Count 1 >>"},
+      {4, "<< /Type /Page /Parent 3 0 R /MediaBox [0 0 612 792] "
+          "/Resources << >> /Contents 6 0 R >>"}};
+
+  std::string header;
+  std::string payload;
+  for (const auto &[id, body] : members) {
+    header += std::to_string(id) + " " + std::to_string(payload.size()) + " ";
+    payload += body + " ";
+  }
+  const std::string objstm = header + payload;
+
+  std::string pdf = "%PDF-1.7\n";
+  pdf += "5 0 obj\n<< /Type /ObjStm /N " + std::to_string(members.size()) +
+         " /First " + std::to_string(header.size()) + " /Length " +
+         std::to_string(objstm.size()) + " >>\nstream\n" + objstm +
+         "\nendstream\nendobj\n";
+  pdf += "6 0 obj\n<< /Length 5 >>\nstream\nBT ET\nendstream\nendobj\n";
+  pdf += "%%EOF\n";
+
+  DocumentParser parser(std::make_unique<std::istringstream>(pdf));
+  const std::unique_ptr<Document> document = parser.parse_document();
+  const std::vector<Page *> pages = document->collect_pages();
+
+  ASSERT_EQ(pages.size(), 1);
+  EXPECT_EQ(pages[0]->media_box.as_array()[2].as_real(), 612.0);
+  EXPECT_EQ(parser.read_decoded_stream(pages[0]->contents_reference.front()),
+            "BT ET");
+}
+
+// Real-world recovery: an HTTP response accidentally saved as `.pdf` — the body
+// is a valid PDF but the leading `HTTP/1.0 200 OK …` header shifts every
+// offset. Skipped when the private submodule is absent.
+TEST(DocumentParser, recovers_http_response_fixture) {
+  const std::string path = "odr-private/pdf/order-EK52VKL0.pdf";
+  if (!std::filesystem::exists(TestData::test_file_path(path))) {
+    GTEST_SKIP() << "private fixture not available";
+  }
+  check_fixture_parses(path);
+}
+
 TEST(DocumentParser, missing_media_box_defaults_to_us_letter) {
   PdfFileBuilder builder;
   builder.object("<< /Type /Catalog /Pages 2 0 R >>")
diff --git a/test/src/internal/pdf/pdf_object_parser.cpp b/test/src/internal/pdf/pdf_object_parser.cpp
index a9cd15f9..8d56fc85 100644
--- a/test/src/internal/pdf/pdf_object_parser.cpp
+++ b/test/src/internal/pdf/pdf_object_parser.cpp
@@ -2,6 +2,8 @@
 
 #include <sstream>
 #include <string>
+#include <string_view>
+#include <utility>
 
 #include <gtest/gtest.h>
 
@@ -13,8 +15,119 @@ std::string read_hex_string(const std::string &input) {
   ObjectParser parser(in);
   return std::get<HexString>(parser.read_string()).string;
 }
+
+Real read_number(const std::string &input) {
+  std::istringstream in(input);
+  ObjectParser parser(in);
+  return parser.read_number();
+}
+
+UnsignedInteger read_unsigned_integer(const std::string &input) {
+  std::istringstream in(input);
+  ObjectParser parser(in);
+  return parser.read_unsigned_integer();
+}
+
+bool peek_unsigned_integer(const std::string &input) {
+  std::istringstream in(input);
+  ObjectParser parser(in);
+  return parser.peek_unsigned_integer();
+}
+
+Integer read_integer(const std::string &input) {
+  std::istringstream in(input);
+  ObjectParser parser(in);
+  return parser.read_integer();
+}
+
+// Runs skip_past(marker) on `input` and reports whether the marker was found
+// together with the bytes left after the cursor, so a test can pin both the
+// result and the resulting position.
+std::pair<bool, std::string> skip_past(const std::string &input,
+                                       const std::string_view marker) {
+  std::istringstream in(input);
+  ObjectParser parser(in);
+  const bool found = parser.skip_past(marker);
+  std::string rest;
+  while (parser.geti() != ObjectParser::eof) {
+    rest.push_back(parser.bumpc());
+  }
+  return {found, rest};
+}
 } // namespace
 
+// 7.3.3: a real is an optional integer part, a `.`, and an optional fractional
+// part; either part may be absent (but not both).
+TEST(PdfObjectParser, read_number) {
+  EXPECT_DOUBLE_EQ(read_number("3.14"), 3.14);
+  EXPECT_DOUBLE_EQ(read_number("0.5"), 0.5);
+  EXPECT_DOUBLE_EQ(read_number("42."), 42.0);
+  EXPECT_DOUBLE_EQ(read_number(".25"), 0.25);
+  EXPECT_DOUBLE_EQ(read_number("10"), 10.0);
+  // sign applies to the whole magnitude, and either part may be absent
+  EXPECT_DOUBLE_EQ(read_number("-1.5"), -1.5);
+  EXPECT_DOUBLE_EQ(read_number("-.5"), -0.5);
+  EXPECT_DOUBLE_EQ(read_number("+.5"), 0.5);
+  EXPECT_DOUBLE_EQ(read_number("-7"), -7.0);
+}
+
+// 7.3.3: an unsigned integer is one or more digits. A missing number is an
+// error rather than a silent 0.
+TEST(PdfObjectParser, read_unsigned_integer) {
+  EXPECT_EQ(read_unsigned_integer("123"), 123u);
+  EXPECT_EQ(read_unsigned_integer("0"), 0u);
+  EXPECT_EQ(read_unsigned_integer("007"), 7u);
+  EXPECT_EQ(read_unsigned_integer("42 0 obj"), 42u);
+  EXPECT_ANY_THROW(read_unsigned_integer("abc"));
+  EXPECT_ANY_THROW(read_unsigned_integer("-5"));
+  EXPECT_ANY_THROW(read_unsigned_integer(""));
+}
+
+TEST(PdfObjectParser, peek_unsigned_integer) {
+  EXPECT_TRUE(peek_unsigned_integer("5"));
+  EXPECT_TRUE(peek_unsigned_integer("0xyz"));
+  EXPECT_FALSE(peek_unsigned_integer("-5"));
+  EXPECT_FALSE(peek_unsigned_integer("+5"));
+  EXPECT_FALSE(peek_unsigned_integer(".5"));
+  EXPECT_FALSE(peek_unsigned_integer("x"));
+  EXPECT_FALSE(peek_unsigned_integer(""));
+}
+
+// 7.3.3: a signed integer is an optional `+`/`-` followed by digits.
+TEST(PdfObjectParser, read_integer) {
+  EXPECT_EQ(read_integer("123"), 123);
+  EXPECT_EQ(read_integer("-5"), -5);
+  EXPECT_EQ(read_integer("+7"), 7);
+  EXPECT_EQ(read_integer("0"), 0);
+}
+
+// skip_past advances just past the first occurrence of the marker and reports
+// whether it was found; on a miss it consumes the whole stream.
+TEST(PdfObjectParser, skip_past) {
+  using Result = std::pair<bool, std::string>;
+
+  // cursor lands immediately after the marker
+  EXPECT_EQ(skip_past("hello world", "world"), Result(true, ""));
+  EXPECT_EQ(skip_past("abcXYdef", "XY"), Result(true, "def"));
+
+  // the first occurrence wins
+  EXPECT_EQ(skip_past("aXYbXYc", "XY"), Result(true, "bXYc"));
+
+  // not found: the stream is consumed to eof
+  EXPECT_EQ(skip_past("abcdef", "XY"), Result(false, ""));
+
+  // an empty marker matches immediately and consumes nothing
+  EXPECT_EQ(skip_past("abc", ""), Result(true, "abc"));
+
+  // markers with internal repetition must still match across an overlapping
+  // partial match (KMP correctness): "aab" in "aaab", and the doubled-`e` /
+  // doubled prefix cases for "endstream"
+  EXPECT_EQ(skip_past("aaab rest", "aab"), Result(true, " rest"));
+  EXPECT_EQ(skip_past("eendstream!", "endstream"), Result(true, "!"));
+  EXPECT_EQ(skip_past("<<...>>stream\nbytes\nendstreamX", "endstream"),
+            Result(true, "X"));
+}
+
 // 7.3.4.3: a hex string is the bytes of its hex digits, with whitespace ignored
 // and an odd final digit assumed to be followed by a 0.
 TEST(PdfObjectParser, hex_string_basic) {
diff --git a/test/src/internal/util/string_util_test.cpp b/test/src/internal/util/string_util_test.cpp
index d6864589..9d6ca1c5 100644
--- a/test/src/internal/util/string_util_test.cpp
+++ b/test/src/internal/util/string_util_test.cpp
@@ -69,3 +69,55 @@ TEST(string_util, split) {
     EXPECT_EQ(strings[2], "y");
   }
 }
+
+TEST(string_util, trim_view) {
+  EXPECT_EQ(trim_view("  abc  "), "abc");
+  EXPECT_EQ(ltrim_view("  abc  "), "abc  ");
+  EXPECT_EQ(rtrim_view("  abc  "), "  abc");
+
+  // No surrounding whitespace leaves the content untouched.
+  EXPECT_EQ(trim_view("abc"), "abc");
+
+  // Interior whitespace is preserved.
+  EXPECT_EQ(trim_view("\t a b c \n"), "a b c");
+
+  // Empty and all-whitespace inputs collapse to an empty view.
+  EXPECT_EQ(trim_view(""), "");
+  EXPECT_EQ(trim_view(" \t\r\n "), "");
+
+  // The result is a subrange of the input, so the leading offset is the
+  // distance between the data pointers.
+  {
+    const std::string_view input = "   abc ";
+    const std::string_view trimmed = trim_view(input);
+    EXPECT_EQ(trimmed, "abc");
+    EXPECT_EQ(trimmed.data() - input.data(), 3);
+  }
+}
+
+TEST(string_util, trim_view_custom_predicate) {
+  const auto is_dot = [](const char c) { return c == '.'; };
+
+  EXPECT_EQ(trim_view("..abc..", is_dot), "abc");
+  // The default ASCII-space predicate does not treat '.' as whitespace.
+  EXPECT_EQ(trim_view("..abc.."), "..abc..");
+  // Conversely, the dot predicate does not strip spaces.
+  EXPECT_EQ(trim_view("  abc  ", is_dot), "  abc  ");
+}
+
+TEST(string_util, trim_owning_delegates_to_view) {
+  EXPECT_EQ(trim("  abc  "), "abc");
+  EXPECT_EQ(ltrim("  abc  "), "abc  ");
+  EXPECT_EQ(rtrim("  abc  "), "  abc");
+
+  const auto is_dot = [](const char c) { return c == '.'; };
+  EXPECT_EQ(trim("..abc..", is_dot), "abc");
+
+  std::string s = "  abc  ";
+  trim_inplace(s);
+  EXPECT_EQ(s, "abc");
+
+  std::string d = "..abc..";
+  trim_inplace(d, is_dot);
+  EXPECT_EQ(d, "abc");
+}