From 888a7680ce5032b30b95051307b08daec192eea9 Mon Sep 17 00:00:00 2001 From: Sai Asish Y Date: Mon, 18 May 2026 23:39:00 -0700 Subject: [PATCH] fix(Readability): treat S and BDI as phrasing content --- Readability.js | 2 ++ test/test-readability.js | 13 +++++++++++++ 2 files changed, 15 insertions(+) diff --git a/Readability.js b/Readability.js index 5cff4540..b6de5ca0 100644 --- a/Readability.js +++ b/Readability.js @@ -223,6 +223,7 @@ Readability.prototype = { "ABBR", "AUDIO", "B", + "BDI", "BDO", "BR", "BUTTON", @@ -247,6 +248,7 @@ Readability.prototype = { "PROGRESS", "Q", "RUBY", + "S", "SAMP", "SCRIPT", "SELECT", diff --git a/test/test-readability.js b/test/test-readability.js index ebd4e618..10fc0eda 100644 --- a/test/test-readability.js +++ b/test/test-readability.js @@ -340,6 +340,19 @@ describe("Readability API", function () { expect(content).eql(expected_xhtml); }); + it("should keep S and BDI inline within paragraphs", function () { + var dom = new JSDOM( + "
Lorem ipsum dolor sit amet, consectetur adipiscing elit. " + + "Nunc mollis leo lacus, vitae semper nisl " + + "ullamcorper ut praesent in lectus eu nibh dapibus tincidunt.
" + ); + var content = new Readability(dom.window.document, { + charThreshold: 20, + }).parse().content; + expect(content).not.to.contain("

"); + expect(content).not.to.contain("

"); + }); + it("should use custom video regex sent as option", function () { var dom = new JSDOM( "

Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc mollis leo lacus, vitae semper nisl ullamcorper ut.

" +