diff --git a/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java b/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java index d935f8d2..07d97296 100644 --- a/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java @@ -76,6 +76,7 @@ public class DocumentParser implements ParserState { private final List linkProcessors; private final Set linkMarkers; private final IncludeSourceSpans includeSourceSpans; + private final int maxOpenBlockParsers; private final DocumentBlockParser documentBlockParser; private final Definitions definitions = new Definitions(); @@ -84,7 +85,8 @@ public class DocumentParser implements ParserState { public DocumentParser(List blockParserFactories, InlineParserFactory inlineParserFactory, List inlineContentParserFactories, List delimiterProcessors, - List linkProcessors, Set linkMarkers, IncludeSourceSpans includeSourceSpans) { + List linkProcessors, Set linkMarkers, + IncludeSourceSpans includeSourceSpans, int maxOpenBlockParsers) { this.blockParserFactories = blockParserFactories; this.inlineParserFactory = inlineParserFactory; this.inlineContentParserFactories = inlineContentParserFactories; @@ -92,6 +94,7 @@ public DocumentParser(List blockParserFactories, InlineParse this.linkProcessors = linkProcessors; this.linkMarkers = linkMarkers; this.includeSourceSpans = includeSourceSpans; + this.maxOpenBlockParsers = maxOpenBlockParsers; this.documentBlockParser = new DocumentBlockParser(); activateBlockParser(new OpenBlockParser(documentBlockParser, 0)); @@ -461,6 +464,9 @@ private void addSourceSpans() { } private BlockStartImpl findBlockStart(BlockParser blockParser) { + if (openBlockParsers.size() > maxOpenBlockParsers) { + return null; + } MatchedBlockParser matchedBlockParser = new MatchedBlockParserImpl(blockParser); for (BlockParserFactory blockParserFactory : blockParserFactories) { BlockStart result = blockParserFactory.tryStart(this, matchedBlockParser); diff --git a/commonmark/src/main/java/org/commonmark/parser/Parser.java b/commonmark/src/main/java/org/commonmark/parser/Parser.java index b98d0581..8faac789 100644 --- a/commonmark/src/main/java/org/commonmark/parser/Parser.java +++ b/commonmark/src/main/java/org/commonmark/parser/Parser.java @@ -37,6 +37,7 @@ public class Parser { private final InlineParserFactory inlineParserFactory; private final List postProcessors; private final IncludeSourceSpans includeSourceSpans; + private final int maxOpenBlockParsers; private Parser(Builder builder) { this.blockParserFactories = DocumentParser.calculateBlockParserFactories(builder.blockParserFactories, builder.enabledBlockTypes); @@ -47,6 +48,7 @@ private Parser(Builder builder) { this.linkProcessors = builder.linkProcessors; this.linkMarkers = builder.linkMarkers; this.includeSourceSpans = builder.includeSourceSpans; + this.maxOpenBlockParsers = builder.maxOpenBlockParsers; // Try to construct an inline parser. Invalid configuration might result in an exception, which we want to // detect as soon as possible. @@ -106,7 +108,7 @@ public Node parseReader(Reader input) throws IOException { private DocumentParser createDocumentParser() { return new DocumentParser(blockParserFactories, inlineParserFactory, inlineContentParserFactories, - delimiterProcessors, linkProcessors, linkMarkers, includeSourceSpans); + delimiterProcessors, linkProcessors, linkMarkers, includeSourceSpans, maxOpenBlockParsers); } private Node postProcess(Node document) { @@ -129,6 +131,7 @@ public static class Builder { private Set> enabledBlockTypes = DocumentParser.getDefaultBlockParserTypes(); private InlineParserFactory inlineParserFactory; private IncludeSourceSpans includeSourceSpans = IncludeSourceSpans.NONE; + private int maxOpenBlockParsers = Integer.MAX_VALUE; /** * @return the configured {@link Parser} @@ -200,6 +203,27 @@ public Builder includeSourceSpans(IncludeSourceSpans includeSourceSpans) { return this; } + /** + * Limit how many block parsers may be open at once while parsing. + *

+ * Once the limit is reached, additional block starts are treated as plain text instead of + * creating deeper nested block structure. + *

+ * The document root parser is not counted. The default is unlimited, so callers that keep + * using {@code Parser.builder().build()} preserve behavior. + * + * @param maxOpenBlockParsers maximum number of open non-document block parsers, must be + * zero or greater + * @return {@code this} + */ + public Builder maxOpenBlockParsers(int maxOpenBlockParsers) { + if (maxOpenBlockParsers < 0) { + throw new IllegalArgumentException("maxOpenBlockParsers must be >= 0"); + } + this.maxOpenBlockParsers = maxOpenBlockParsers; + return this; + } + /** * Add a custom block parser factory. *

diff --git a/commonmark/src/test/java/org/commonmark/test/ParserTest.java b/commonmark/src/test/java/org/commonmark/test/ParserTest.java index c119b5e2..337196c5 100644 --- a/commonmark/src/test/java/org/commonmark/test/ParserTest.java +++ b/commonmark/src/test/java/org/commonmark/test/ParserTest.java @@ -2,8 +2,8 @@ import org.commonmark.node.*; import org.commonmark.parser.*; -import org.commonmark.parser.block.*; import org.commonmark.renderer.html.HtmlRenderer; +import org.commonmark.renderer.markdown.MarkdownRenderer; import org.commonmark.testutil.TestResources; import org.junit.jupiter.api.Test; @@ -15,8 +15,6 @@ import java.util.HashSet; import java.util.List; import java.util.Set; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; @@ -135,6 +133,76 @@ public void threading() throws Exception { } } + @Test + public void maxOpenBlockParsersMustBeZeroOrGreater() { + assertThatThrownBy(() -> + Parser.builder().maxOpenBlockParsers(-1)).isInstanceOf(IllegalArgumentException.class); + } + + @Test + public void maxOpenBlockParsersIsOptIn() { + var parser = Parser.builder().build(); + + var document = parser.parse(alternatingNestedList(9)); + + assertThat(renderText(deepestStructuredParagraph(document, 9))).isEqualTo("level9"); + } + + @Test + public void maxOpenBlockParsersPreservesSevenLogicalListLevelsAtSeventeenBlocks() { + var parser = Parser.builder().maxOpenBlockParsers(17).build(); + + var document = parser.parse(alternatingNestedList(7)); + + assertThat(renderText(deepestStructuredParagraph(document, 7))).isEqualTo("level7"); + } + + @Test + public void maxOpenBlockParsersPreservesEightLogicalListLevelsAtSeventeenBlocks() { + var parser = Parser.builder().maxOpenBlockParsers(17).build(); + + var document = parser.parse(alternatingNestedList(8)); + + assertThat(renderText(deepestStructuredParagraph(document, 8))).isEqualTo("level8"); + } + + @Test + public void maxOpenBlockParsersDegradesTheNinthLogicalListLevelToPlainText() { + var parser = Parser.builder().maxOpenBlockParsers(17).build(); + + var document = parser.parse(alternatingNestedList(9)); + var deepestParagraph = deepestStructuredParagraph(document, 8); + + assertThat(renderText(deepestParagraph)).isEqualTo("level8\n\\- level9"); + assertThat(deepestParagraph.getNext()).isNull(); + } + + @Test + public void maxOpenBlockParsersAlsoLimitsMixedListAndBlockQuoteNesting() { + var parser = Parser.builder().maxOpenBlockParsers(5).build(); + + var document = parser.parse(String.join("\n", + "- level1", + " > level2", + " > > level3", + " > > > level4")); + + var listBlock = document.getFirstChild(); + assertThat(listBlock).isInstanceOf(BulletList.class); + + var listItem = listBlock.getFirstChild(); + var blockQuote1 = listItem.getLastChild(); + assertThat(blockQuote1).isInstanceOf(BlockQuote.class); + + var blockQuote2 = blockQuote1.getLastChild(); + assertThat(blockQuote2).isInstanceOf(BlockQuote.class); + + var deepestParagraph = blockQuote2.getLastChild(); + assertThat(deepestParagraph).isInstanceOf(Paragraph.class); + assertThat(renderText(deepestParagraph)).isEqualTo("level3\n\\> level4"); + assertThat(deepestParagraph.getNext()).isNull(); + } + private String firstText(Node n) { while (!(n instanceof Text)) { assertThat(n).isNotNull(); @@ -142,4 +210,44 @@ private String firstText(Node n) { } return ((Text) n).getLiteral(); } + + private Paragraph deepestStructuredParagraph(Node document, int levels) { + Node node = document.getFirstChild(); + for (int level = 1; level <= levels; level++) { + assertThat(node).isInstanceOf(ListBlock.class); + var listItem = node.getFirstChild(); + assertThat(listItem).isNotNull(); + if (level == levels) { + assertThat(listItem.getFirstChild()).isInstanceOf(Paragraph.class); + return (Paragraph) listItem.getFirstChild(); + } + node = listItem.getLastChild(); + } + throw new AssertionError("unreachable"); + } + + private String renderText(Node node) { + return MarkdownRenderer.builder().build().render(node).trim(); + } + + private String alternatingNestedList(int levels) { + int indent = 0; + var lines = new ArrayList(); + for (int level = 1; level <= levels; level++) { + var ordered = level % 2 == 0; + var marker = ordered ? "1. " : "- "; + lines.add(" ".repeat(indent) + marker + "level" + level); + indent += marker.length(); + } + return String.join("\n", lines); + } + + private int depth(Node node) { + int depth = 0; + while (node.getParent() != null) { + node = node.getParent(); + depth++; + } + return depth; + } }