From 339900083e62a7e8b375d8c0c1e176686996219f Mon Sep 17 00:00:00 2001 From: tompng Date: Tue, 26 May 2026 04:09:46 +0900 Subject: [PATCH 1/2] XPath nodeset always in document-order In XPath 1.0 spec, nodeset are unordered set, but many operations are required to use document ordered. Functions such as local-name should use first node in document order. Filter expressions such as `(preceding::foo)[1]` need to use document-order. JavaScript's `XPathResult.ORDERED_NODE_ITERATOR_TYPE` `XPathResult.FIRST_ORDERED_NODE_TYPE` always orders in document order. This commit will make REXML match to this ordering, and also fixes inconsistent ordering bug: same xpath may return document-ordered nodes or reverse-document-ordered nodes depending on `nodesets.size`. --- lib/rexml/xpath_parser.rb | 40 +++++++------- test/xpath/test_axis_preceding_sibling.rb | 2 +- test/xpath/test_base.rb | 65 +++++++++++++++++------ 3 files changed, 67 insertions(+), 40 deletions(-) diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb index c5d420ce..75394706 100644 --- a/lib/rexml/xpath_parser.rb +++ b/lib/rexml/xpath_parser.rb @@ -325,7 +325,7 @@ def expr( path_stack, nodeset, context=nil ) nodesets end when :preceding_sibling - nodeset = step(path_stack, order: :reverse) do + nodeset = step(path_stack) do nodesets = [] nodeset.each do |node| raw_node = node.raw_node @@ -342,7 +342,7 @@ def expr( path_stack, nodeset, context=nil ) nodesets end when :preceding - nodeset = step(path_stack, order: :reverse) do + nodeset = step(path_stack) do unnode(nodeset) do |node| preceding(node) end @@ -449,7 +449,7 @@ def expr( path_stack, nodeset, context=nil ) leave(:expr, path_stack, nodeset) if @debug end - def step(path_stack, any_type: :element, order: :forward) + def step(path_stack, any_type: :element) nodesets = yield begin enter(:step, path_stack, nodesets) if @debug @@ -459,21 +459,19 @@ def step(path_stack, any_type: :element, order: :forward) predicate_expression = path_stack.shift.dclone nodesets = evaluate_predicate(predicate_expression, nodesets) end - if nodesets.size == 1 - ordered_nodeset = nodesets[0] - else - seen = {}.compare_by_identity - raw_nodes = [] - nodesets.each do |nodeset| - nodeset.each do |node| - raw_node = node.respond_to?(:raw_node) ? node.raw_node : node - next if seen.key?(raw_node) - seen[raw_node] = true - raw_nodes << raw_node - end + + seen = {}.compare_by_identity + raw_nodes = [] + nodesets.each do |nodeset| + nodeset.each do |node| + raw_node = node.respond_to?(:raw_node) ? node.raw_node : node + next if seen.key?(raw_node) + seen[raw_node] = true + raw_nodes << raw_node end - ordered_nodeset = sort(raw_nodes, order) end + ordered_nodeset = sort(raw_nodes) + new_nodeset = [] ordered_nodeset.each do |node| new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1) @@ -660,7 +658,9 @@ def leave(tag, *args) # in and out of function calls. If I knew what the index of the nodes was, # I wouldn't have to do this. Maybe add a document IDX for each node? # Problems with mutable documents. Or, rewrite everything. - def sort(array_of_nodes, order) + def sort(array_of_nodes) + return array_of_nodes if array_of_nodes.size <= 1 + new_arry = [] array_of_nodes.each { |node| node_idx = [] @@ -672,11 +672,7 @@ def sort(array_of_nodes, order) new_arry << [ node_idx.reverse, node ] } ordered = new_arry.sort_by do |index, node| - if order == :forward - index - else - index.map(&:-@) - end + index end ordered.collect do |_index, node| node diff --git a/test/xpath/test_axis_preceding_sibling.rb b/test/xpath/test_axis_preceding_sibling.rb index 9c44ad63..2f0ba5d1 100644 --- a/test/xpath/test_axis_preceding_sibling.rb +++ b/test/xpath/test_axis_preceding_sibling.rb @@ -23,7 +23,7 @@ def test_preceding_sibling_axis assert_equal "6", context.attributes["id"] prev = XPath.first(context, "preceding-sibling::f") - assert_equal "5", prev.attributes["id"] + assert_equal "3", prev.attributes["id"] prev = XPath.first(context, "preceding-sibling::f[1]") assert_equal "5", prev.attributes["id"] diff --git a/test/xpath/test_base.rb b/test/xpath/test_base.rb index 1c6eb624..b9e56c5e 100644 --- a/test/xpath/test_base.rb +++ b/test/xpath/test_base.rb @@ -382,32 +382,39 @@ def test_preceding start = XPath.first( d, "/a/b[@id='1']" ) assert_equal 'b', start.name c = XPath.first( start, "preceding::c" ) - assert_equal '2', c.attributes['id'] + assert_equal '0', c.attributes['id'] - c1, c0 = XPath.match( d, "/a/b/c[@id='2']/preceding::node()" ) - assert_equal '1', c1.attributes['id'] + b0, b2, c0, c1 = XPath.match( d, "/a/b/c[@id='2']/preceding::node()" ) + assert_equal 'b', b0.name + assert_equal 'b', b2.name + assert_equal 'c', c0.name + assert_equal 'c', c1.name + + assert_equal '0', b0.attributes['id'] + assert_equal '2', b2.attributes['id'] assert_equal '0', c0.attributes['id'] + assert_equal '1', c1.attributes['id'] - c2, c1, c0, b, b2, b0 = XPath.match( start, "preceding::node()" ) + b0, b2, b, c0, c1, c2 = XPath.match( start, "preceding::node()" ) - assert_equal 'c', c2.name - assert_equal 'c', c1.name assert_equal 'c', c0.name - assert_equal 'b', b.name - assert_equal 'b', b2.name + assert_equal 'c', c1.name + assert_equal 'c', c2.name assert_equal 'b', b0.name + assert_equal 'b', b2.name + assert_equal 'b', b.name - assert_equal '2', c2.attributes['id'] - assert_equal '1', c1.attributes['id'] assert_equal '0', c0.attributes['id'] - assert b.attributes.empty? - assert_equal '2', b2.attributes['id'] + assert_equal '1', c1.attributes['id'] + assert_equal '2', c2.attributes['id'] assert_equal '0', b0.attributes['id'] + assert_equal '2', b2.attributes['id'] + assert b.attributes.empty? d = REXML::Document.new("") matches = REXML::XPath.match(d, "/a/d/preceding::node()") - assert_equal("c", matches[0].name) - assert_equal("b", matches[1].name) + assert_equal("b", matches[0].name) + assert_equal("c", matches[1].name) s = "" d = REXML::Document.new(s) @@ -425,7 +432,7 @@ def test_preceding_multiple XML doc = REXML::Document.new(source) matches = REXML::XPath.match(doc, "a/d/preceding::*") - assert_equal(["d", "c", "b"], matches.map(&:name)) + assert_equal(["b", "c", "d"], matches.map(&:name)) end def test_following_multiple @@ -498,7 +505,7 @@ def test_preceding_sibling_across_multiple_nodes XML doc = REXML::Document.new(source) matches = REXML::XPath.match(doc, "a/b/x/preceding-sibling::*") - assert_equal(["e", "d", "c"], matches.map(&:name)) + assert_equal(["c", "d", "e"], matches.map(&:name)) end def test_preceding_sibling_within_single_node @@ -511,7 +518,7 @@ def test_preceding_sibling_within_single_node XML doc = REXML::Document.new(source) matches = REXML::XPath.match(doc, "a/b/x/preceding-sibling::*") - assert_equal(["e", "x", "d", "c"], matches.map(&:name)) + assert_equal(["c", "d", "x", "e"], matches.map(&:name)) end def test_following @@ -856,6 +863,30 @@ def test_ordering r.collect {|element| element.attribute("id").value}) end + def test_order_consisntency + doc = REXML::Document.new('') + assert_equal('a', REXML::XPath.first(doc, '//d/ancestor::*').name) + assert_equal('a', REXML::XPath.first(doc, '//d[@id="2"]/ancestor::*').name) + assert_equal(%w[a b c d], REXML::XPath.match(doc, '//d/ancestor::*').map(&:name)) + assert_equal(%w[a b c d], REXML::XPath.match(doc, '//d[@id="2"]/ancestor::*').map(&:name)) + + assert_equal('a', REXML::XPath.first(doc, '//d/ancestor-or-self::*').name) + assert_equal('a', REXML::XPath.first(doc, '//d[@id="2"]/ancestor-or-self::*').name) + assert_equal(%w[a b c d d], REXML::XPath.match(doc, '//d/ancestor-or-self::*').map(&:name)) + assert_equal(%w[a b c d d], REXML::XPath.match(doc, '//d[@id="2"]/ancestor-or-self::*').map(&:name)) + + doc = REXML::Document.new('') + assert_equal('b', REXML::XPath.first(doc, '//d/preceding::*').name) + assert_equal('b', REXML::XPath.first(doc, '//d[@id="2"]/preceding::*').name) + assert_equal(%w[b c d], REXML::XPath.match(doc, '//d/preceding::*').map(&:name)) + assert_equal(%w[b c d], REXML::XPath.match(doc, '//d[@id="2"]/preceding::*').map(&:name)) + + assert_equal('b', REXML::XPath.first(doc, '//d/preceding-sibling::*').name) + assert_equal('b', REXML::XPath.first(doc, '//d[@id="2"]/preceding-sibling::*').name) + assert_equal(%w[b c d], REXML::XPath.match(doc, '//d/preceding-sibling::*').map(&:name)) + assert_equal(%w[b c d], REXML::XPath.match(doc, '//d[@id="2"]/preceding-sibling::*').map(&:name)) + end + def test_descendant_or_self_ordering source = " From bdcf5012f1f28a2aa483734faceb0c36abad602a Mon Sep 17 00:00:00 2001 From: tomoya ishida Date: Thu, 28 May 2026 12:46:30 +0900 Subject: [PATCH 2/2] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- test/xpath/test_base.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/xpath/test_base.rb b/test/xpath/test_base.rb index b9e56c5e..2f0a9f09 100644 --- a/test/xpath/test_base.rb +++ b/test/xpath/test_base.rb @@ -863,7 +863,7 @@ def test_ordering r.collect {|element| element.attribute("id").value}) end - def test_order_consisntency + def test_order_consistency doc = REXML::Document.new('') assert_equal('a', REXML::XPath.first(doc, '//d/ancestor::*').name) assert_equal('a', REXML::XPath.first(doc, '//d[@id="2"]/ancestor::*').name)