From 888a7680ce5032b30b95051307b08daec192eea9 Mon Sep 17 00:00:00 2001
From: Sai Asish Y
Date: Mon, 18 May 2026 23:39:00 -0700
Subject: [PATCH] fix(Readability): treat S and BDI as phrasing content
---
Readability.js | 2 ++
test/test-readability.js | 13 +++++++++++++
2 files changed, 15 insertions(+)
diff --git a/Readability.js b/Readability.js
index 5cff4540..b6de5ca0 100644
--- a/Readability.js
+++ b/Readability.js
@@ -223,6 +223,7 @@ Readability.prototype = {
"ABBR",
"AUDIO",
"B",
+ "BDI",
"BDO",
"BR",
"BUTTON",
@@ -247,6 +248,7 @@ Readability.prototype = {
"PROGRESS",
"Q",
"RUBY",
+ "S",
"SAMP",
"SCRIPT",
"SELECT",
diff --git a/test/test-readability.js b/test/test-readability.js
index ebd4e618..10fc0eda 100644
--- a/test/test-readability.js
+++ b/test/test-readability.js
@@ -340,6 +340,19 @@ describe("Readability API", function () {
expect(content).eql(expected_xhtml);
});
+ it("should keep S and BDI inline within paragraphs", function () {
+ var dom = new JSDOM(
+ "Lorem ipsum dolor sit amet, consectetur adipiscing elit. " +
+ "Nunc mollis leo lacus, vitae semper nisl " +
+ "ullamcorper ut praesent in lectus eu nibh dapibus tincidunt.
"
+ );
+ var content = new Readability(dom.window.document, {
+ charThreshold: 20,
+ }).parse().content;
+ expect(content).not.to.contain("
");
+ expect(content).not.to.contain("");
+ });
+
it("should use custom video regex sent as option", function () {
var dom = new JSDOM(
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc mollis leo lacus, vitae semper nisl ullamcorper ut.
" +