From b50424558700da0f91336f51f5754ccd616c5491 Mon Sep 17 00:00:00 2001 From: tompng Date: Tue, 26 May 2026 04:09:46 +0900 Subject: [PATCH 1/3] pull320 --- lib/rexml/xpath_parser.rb | 40 +++++++------- test/xpath/test_axis_preceding_sibling.rb | 2 +- test/xpath/test_base.rb | 65 +++++++++++++++++------ 3 files changed, 67 insertions(+), 40 deletions(-) diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb index c5d420ce..75394706 100644 --- a/lib/rexml/xpath_parser.rb +++ b/lib/rexml/xpath_parser.rb @@ -325,7 +325,7 @@ def expr( path_stack, nodeset, context=nil ) nodesets end when :preceding_sibling - nodeset = step(path_stack, order: :reverse) do + nodeset = step(path_stack) do nodesets = [] nodeset.each do |node| raw_node = node.raw_node @@ -342,7 +342,7 @@ def expr( path_stack, nodeset, context=nil ) nodesets end when :preceding - nodeset = step(path_stack, order: :reverse) do + nodeset = step(path_stack) do unnode(nodeset) do |node| preceding(node) end @@ -449,7 +449,7 @@ def expr( path_stack, nodeset, context=nil ) leave(:expr, path_stack, nodeset) if @debug end - def step(path_stack, any_type: :element, order: :forward) + def step(path_stack, any_type: :element) nodesets = yield begin enter(:step, path_stack, nodesets) if @debug @@ -459,21 +459,19 @@ def step(path_stack, any_type: :element, order: :forward) predicate_expression = path_stack.shift.dclone nodesets = evaluate_predicate(predicate_expression, nodesets) end - if nodesets.size == 1 - ordered_nodeset = nodesets[0] - else - seen = {}.compare_by_identity - raw_nodes = [] - nodesets.each do |nodeset| - nodeset.each do |node| - raw_node = node.respond_to?(:raw_node) ? node.raw_node : node - next if seen.key?(raw_node) - seen[raw_node] = true - raw_nodes << raw_node - end + + seen = {}.compare_by_identity + raw_nodes = [] + nodesets.each do |nodeset| + nodeset.each do |node| + raw_node = node.respond_to?(:raw_node) ? node.raw_node : node + next if seen.key?(raw_node) + seen[raw_node] = true + raw_nodes << raw_node end - ordered_nodeset = sort(raw_nodes, order) end + ordered_nodeset = sort(raw_nodes) + new_nodeset = [] ordered_nodeset.each do |node| new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1) @@ -660,7 +658,9 @@ def leave(tag, *args) # in and out of function calls. If I knew what the index of the nodes was, # I wouldn't have to do this. Maybe add a document IDX for each node? # Problems with mutable documents. Or, rewrite everything. - def sort(array_of_nodes, order) + def sort(array_of_nodes) + return array_of_nodes if array_of_nodes.size <= 1 + new_arry = [] array_of_nodes.each { |node| node_idx = [] @@ -672,11 +672,7 @@ def sort(array_of_nodes, order) new_arry << [ node_idx.reverse, node ] } ordered = new_arry.sort_by do |index, node| - if order == :forward - index - else - index.map(&:-@) - end + index end ordered.collect do |_index, node| node diff --git a/test/xpath/test_axis_preceding_sibling.rb b/test/xpath/test_axis_preceding_sibling.rb index 9c44ad63..2f0ba5d1 100644 --- a/test/xpath/test_axis_preceding_sibling.rb +++ b/test/xpath/test_axis_preceding_sibling.rb @@ -23,7 +23,7 @@ def test_preceding_sibling_axis assert_equal "6", context.attributes["id"] prev = XPath.first(context, "preceding-sibling::f") - assert_equal "5", prev.attributes["id"] + assert_equal "3", prev.attributes["id"] prev = XPath.first(context, "preceding-sibling::f[1]") assert_equal "5", prev.attributes["id"] diff --git a/test/xpath/test_base.rb b/test/xpath/test_base.rb index 1c6eb624..b9e56c5e 100644 --- a/test/xpath/test_base.rb +++ b/test/xpath/test_base.rb @@ -382,32 +382,39 @@ def test_preceding start = XPath.first( d, "/a/b[@id='1']" ) assert_equal 'b', start.name c = XPath.first( start, "preceding::c" ) - assert_equal '2', c.attributes['id'] + assert_equal '0', c.attributes['id'] - c1, c0 = XPath.match( d, "/a/b/c[@id='2']/preceding::node()" ) - assert_equal '1', c1.attributes['id'] + b0, b2, c0, c1 = XPath.match( d, "/a/b/c[@id='2']/preceding::node()" ) + assert_equal 'b', b0.name + assert_equal 'b', b2.name + assert_equal 'c', c0.name + assert_equal 'c', c1.name + + assert_equal '0', b0.attributes['id'] + assert_equal '2', b2.attributes['id'] assert_equal '0', c0.attributes['id'] + assert_equal '1', c1.attributes['id'] - c2, c1, c0, b, b2, b0 = XPath.match( start, "preceding::node()" ) + b0, b2, b, c0, c1, c2 = XPath.match( start, "preceding::node()" ) - assert_equal 'c', c2.name - assert_equal 'c', c1.name assert_equal 'c', c0.name - assert_equal 'b', b.name - assert_equal 'b', b2.name + assert_equal 'c', c1.name + assert_equal 'c', c2.name assert_equal 'b', b0.name + assert_equal 'b', b2.name + assert_equal 'b', b.name - assert_equal '2', c2.attributes['id'] - assert_equal '1', c1.attributes['id'] assert_equal '0', c0.attributes['id'] - assert b.attributes.empty? - assert_equal '2', b2.attributes['id'] + assert_equal '1', c1.attributes['id'] + assert_equal '2', c2.attributes['id'] assert_equal '0', b0.attributes['id'] + assert_equal '2', b2.attributes['id'] + assert b.attributes.empty? d = REXML::Document.new("") matches = REXML::XPath.match(d, "/a/d/preceding::node()") - assert_equal("c", matches[0].name) - assert_equal("b", matches[1].name) + assert_equal("b", matches[0].name) + assert_equal("c", matches[1].name) s = "" d = REXML::Document.new(s) @@ -425,7 +432,7 @@ def test_preceding_multiple XML doc = REXML::Document.new(source) matches = REXML::XPath.match(doc, "a/d/preceding::*") - assert_equal(["d", "c", "b"], matches.map(&:name)) + assert_equal(["b", "c", "d"], matches.map(&:name)) end def test_following_multiple @@ -498,7 +505,7 @@ def test_preceding_sibling_across_multiple_nodes XML doc = REXML::Document.new(source) matches = REXML::XPath.match(doc, "a/b/x/preceding-sibling::*") - assert_equal(["e", "d", "c"], matches.map(&:name)) + assert_equal(["c", "d", "e"], matches.map(&:name)) end def test_preceding_sibling_within_single_node @@ -511,7 +518,7 @@ def test_preceding_sibling_within_single_node XML doc = REXML::Document.new(source) matches = REXML::XPath.match(doc, "a/b/x/preceding-sibling::*") - assert_equal(["e", "x", "d", "c"], matches.map(&:name)) + assert_equal(["c", "d", "x", "e"], matches.map(&:name)) end def test_following @@ -856,6 +863,30 @@ def test_ordering r.collect {|element| element.attribute("id").value}) end + def test_order_consisntency + doc = REXML::Document.new('') + assert_equal('a', REXML::XPath.first(doc, '//d/ancestor::*').name) + assert_equal('a', REXML::XPath.first(doc, '//d[@id="2"]/ancestor::*').name) + assert_equal(%w[a b c d], REXML::XPath.match(doc, '//d/ancestor::*').map(&:name)) + assert_equal(%w[a b c d], REXML::XPath.match(doc, '//d[@id="2"]/ancestor::*').map(&:name)) + + assert_equal('a', REXML::XPath.first(doc, '//d/ancestor-or-self::*').name) + assert_equal('a', REXML::XPath.first(doc, '//d[@id="2"]/ancestor-or-self::*').name) + assert_equal(%w[a b c d d], REXML::XPath.match(doc, '//d/ancestor-or-self::*').map(&:name)) + assert_equal(%w[a b c d d], REXML::XPath.match(doc, '//d[@id="2"]/ancestor-or-self::*').map(&:name)) + + doc = REXML::Document.new('') + assert_equal('b', REXML::XPath.first(doc, '//d/preceding::*').name) + assert_equal('b', REXML::XPath.first(doc, '//d[@id="2"]/preceding::*').name) + assert_equal(%w[b c d], REXML::XPath.match(doc, '//d/preceding::*').map(&:name)) + assert_equal(%w[b c d], REXML::XPath.match(doc, '//d[@id="2"]/preceding::*').map(&:name)) + + assert_equal('b', REXML::XPath.first(doc, '//d/preceding-sibling::*').name) + assert_equal('b', REXML::XPath.first(doc, '//d[@id="2"]/preceding-sibling::*').name) + assert_equal(%w[b c d], REXML::XPath.match(doc, '//d/preceding-sibling::*').map(&:name)) + assert_equal(%w[b c d], REXML::XPath.match(doc, '//d[@id="2"]/preceding-sibling::*').map(&:name)) + end + def test_descendant_or_self_ordering source = " From 9ea2ef4be15180009569b8cdd279c770d5cc7eb2 Mon Sep 17 00:00:00 2001 From: tompng Date: Fri, 22 May 2026 23:56:51 +0900 Subject: [PATCH 2/3] pull317 --- lib/rexml/parsers/xpathparser.rb | 2 +- lib/rexml/xpath_parser.rb | 23 +++++++++++++++-------- test/xpath/test_base.rb | 27 +++++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 9 deletions(-) diff --git a/lib/rexml/parsers/xpathparser.rb b/lib/rexml/parsers/xpathparser.rb index a6d76fdc..c4b8531f 100644 --- a/lib/rexml/parsers/xpathparser.rb +++ b/lib/rexml/parsers/xpathparser.rb @@ -654,7 +654,7 @@ def PrimaryExpr path, parsed contents = contents[1..-2] n = [] OrExpr( contents, n ) - parsed.concat(n) + parsed.push(:group, n) end path end diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb index 75394706..c839d265 100644 --- a/lib/rexml/xpath_parser.rb +++ b/lib/rexml/xpath_parser.rb @@ -439,7 +439,18 @@ def expr( path_stack, nodeset, context=nil ) end Functions.context = target_context return Functions.send(func_name, *args) - + when :group + sub_expression = path_stack.shift + result = expr(sub_expression, nodeset, context) + if result.is_a?(Array) + # If result is a nodeset, apply following predicates + path_stack.unshift(:node) + nodeset = step(path_stack) do + [result] + end + else + return result + end else raise "[BUG] Unexpected path: <#{op.inspect}>: <#{path_stack.inspect}>" end @@ -592,7 +603,6 @@ def filter_nodeset(nodeset) def evaluate_predicate(expression, nodesets) enter(:predicate, expression, nodesets) if @debug - new_nodeset_count = 0 new_nodesets = nodesets.collect do |nodeset| new_nodeset = [] subcontext = { :size => nodeset.size } @@ -609,20 +619,17 @@ def evaluate_predicate(expression, nodesets) result = result[0] if result.kind_of? Array and result.length == 1 if result.kind_of? Numeric if result == node.position - new_nodeset_count += 1 - new_nodeset << XPathNode.new(node, position: new_nodeset_count) + new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1) end elsif result.instance_of? Array if result.size > 0 and result.inject(false) {|k,s| s or k} if result.size > 0 - new_nodeset_count += 1 - new_nodeset << XPathNode.new(node, position: new_nodeset_count) + new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1) end end else if result - new_nodeset_count += 1 - new_nodeset << XPathNode.new(node, position: new_nodeset_count) + new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1) end end end diff --git a/test/xpath/test_base.rb b/test/xpath/test_base.rb index b9e56c5e..205711ee 100644 --- a/test/xpath/test_base.rb +++ b/test/xpath/test_base.rb @@ -582,18 +582,45 @@ def test_nested_predicates matches = XPath.match(doc, '(/div/div/test[3])').map(&:text) assert_equal [], matches + matches = XPath.match(doc, '/div/div/test[1][1]').map(&:text) + assert_equal ["ab", "ef", "hi"], matches matches = XPath.match(doc, '(/div/div/test[1])[1]').map(&:text) assert_equal ["ab"], matches + matches = XPath.match(doc, '/div/div/test[1][2]').map(&:text) + assert_equal [], matches matches = XPath.match(doc, '(/div/div/test[1])[2]').map(&:text) assert_equal ["ef"], matches matches = XPath.match(doc, '(/div/div/test[1])[3]').map(&:text) assert_equal ["hi"], matches + matches = XPath.match(doc, '/div/div/test[2][1]').map(&:text) + assert_equal ["cd", "gh"], matches matches = XPath.match(doc, '(/div/div/test[2])[1]').map(&:text) assert_equal ["cd"], matches + matches = XPath.match(doc, '/div/div/test[2][2]').map(&:text) + assert_equal [], matches matches = XPath.match(doc, '(/div/div/test[2])[2]').map(&:text) assert_equal ["gh"], matches matches = XPath.match(doc, '(/div/div/test[2])[3]').map(&:text) assert_equal [], matches + matches = XPath.match(doc, '//div[1]/test|//div[2]/test[2]').map(&:text) + assert_equal ["ab", "cd", "gh"], matches + matches = XPath.match(doc, '(//div[1]/test|//div[2]/test)[2]').map(&:text) + assert_equal ["cd"], matches + + xpath = '/div/div/test/preceding::*' + without_parentheses = XPath.match(doc, xpath).map(&:text) + with_parentheses = XPath.match(doc, "(#{xpath})").map(&:text) + assert_equal without_parentheses, with_parentheses + + xpath = '/div/div/test/preceding-sibling::*' + without_parentheses = XPath.match(doc, xpath).map(&:text) + with_parentheses = XPath.match(doc, "(#{xpath})").map(&:text) + assert_equal without_parentheses, with_parentheses + + xpath = '/div/div/test/ancestor::*' + without_parentheses = XPath.match(doc, xpath).map(&:text) + with_parentheses = XPath.match(doc, "(#{xpath})").map(&:text) + assert_equal without_parentheses, with_parentheses end # Contributed by Mike Stok From 73dd6703fc47e51aeb733230a30b7ceb56d3543f Mon Sep 17 00:00:00 2001 From: tompng Date: Thu, 21 May 2026 02:12:55 +0900 Subject: [PATCH 3/3] Optimize XPath step If a predicate of xpath is position-independent, we don't need to create nodesets that has many duplicated nodes with different positions. --- lib/rexml/xpath_parser.rb | 602 ++++++++++++++-------- test/xpath/test_attribute.rb | 13 + test/xpath/test_axis_preceding_sibling.rb | 68 +++ test/xpath/test_base.rb | 10 +- test/xpath/test_predicate.rb | 38 ++ 5 files changed, 512 insertions(+), 219 deletions(-) diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb index c839d265..45ed6707 100644 --- a/lib/rexml/xpath_parser.rb +++ b/lib/rexml/xpath_parser.rb @@ -199,28 +199,24 @@ def expr( path_stack, nodeset, context=nil ) nodeset = [XPathNode.new(first_raw_node.root_node, position: 1)] when :self nodeset = step(path_stack) do - [nodeset] + [:iterate_raw_nodesets, [nodeset.map(&:raw_node)]] end when :child nodeset = step(path_stack) do - child(nodeset) + [:iterate_raw_nodesets, child(nodeset)] end when :literal trace(:literal, path_stack, nodeset) if @debug return path_stack.shift when :attribute nodeset = step(path_stack, any_type: :attribute) do - nodesets = [] - nodeset.each do |node| + raw_nodesets = nodeset.map do |node| raw_node = node.raw_node next unless raw_node.node_type == :element attributes = raw_node.attributes - next if attributes.empty? - nodesets << attributes.each_attribute.collect.with_index do |attribute, i| - XPathNode.new(attribute, position: i + 1) - end - end - nodesets + attributes.each_attribute.to_a unless attributes.empty? + end.compact + [:iterate_raw_nodesets, raw_nodesets] end when :namespace pre_defined_namespaces = { @@ -245,11 +241,13 @@ def expr( path_stack, nodeset, context=nil ) end end end - nodesets + # Not working at all, so just return an empty nodesets for now. + # Needs Namespace-node class + [:iterate_raw_nodesets, []] end when :parent nodeset = step(path_stack) do - nodesets = [] + parents = {}.compare_by_identity nodeset.each do |node| raw_node = node.raw_node if raw_node.node_type == :attribute @@ -257,101 +255,16 @@ def expr( path_stack, nodeset, context=nil ) else parent = raw_node.parent end - nodesets << [XPathNode.new(parent, position: 1)] if parent - end - nodesets - end - when :ancestor - nodeset = step(path_stack) do - nodesets = [] - # new_nodes = {} - nodeset.each do |node| - raw_node = node.raw_node - new_nodeset = [] - while raw_node.parent - raw_node = raw_node.parent - # next if new_nodes.key?(node) - new_nodeset << XPathNode.new(raw_node, - position: new_nodeset.size + 1) - # new_nodes[node] = true - end - nodesets << new_nodeset unless new_nodeset.empty? - end - nodesets - end - when :ancestor_or_self - nodeset = step(path_stack) do - nodesets = [] - # new_nodes = {} - nodeset.each do |node| - raw_node = node.raw_node - next unless raw_node.node_type == :element - new_nodeset = [XPathNode.new(raw_node, position: 1)] - # new_nodes[node] = true - while raw_node.parent - raw_node = raw_node.parent - # next if new_nodes.key?(node) - new_nodeset << XPathNode.new(raw_node, - position: new_nodeset.size + 1) - # new_nodes[node] = true - end - nodesets << new_nodeset unless new_nodeset.empty? - end - nodesets - end - when :descendant_or_self - nodeset = step(path_stack) do - descendant(nodeset, true) - end - when :descendant - nodeset = step(path_stack) do - descendant(nodeset, false) - end - when :following_sibling - nodeset = step(path_stack) do - nodesets = [] - nodeset.each do |node| - raw_node = node.raw_node - next unless raw_node.respond_to?(:parent) - next if raw_node.parent.nil? - all_siblings = raw_node.parent.children - current_index = all_siblings.index(raw_node) - following_siblings = all_siblings[(current_index + 1)..-1] - next if following_siblings.empty? - nodesets << following_siblings.collect.with_index do |sibling, i| - XPathNode.new(sibling, position: i + 1) - end - end - nodesets - end - when :preceding_sibling - nodeset = step(path_stack) do - nodesets = [] - nodeset.each do |node| - raw_node = node.raw_node - next unless raw_node.respond_to?(:parent) - next if raw_node.parent.nil? - all_siblings = raw_node.parent.children - current_index = all_siblings.index(raw_node) - preceding_siblings = all_siblings[0, current_index].reverse - next if preceding_siblings.empty? - nodesets << preceding_siblings.collect.with_index do |sibling, i| - XPathNode.new(sibling, position: i + 1) - end - end - nodesets - end - when :preceding - nodeset = step(path_stack) do - unnode(nodeset) do |node| - preceding(node) + parents[parent] = true if parent end + [:iterate_raw_nodesets, parents.keys.map {|parent| [parent] }] end - when :following + when :ancestor, :ancestor_or_self, + :descendant, :descendant_or_self, + :preceding, :preceding_sibling, + :following, :following_sibling nodeset = step(path_stack) do - unnode(nodeset) do |node| - following(node) - end + [op, nodeset.map(&:raw_node)] end when :variable var_name = path_stack.shift @@ -446,7 +359,7 @@ def expr( path_stack, nodeset, context=nil ) # If result is a nodeset, apply following predicates path_stack.unshift(:node) nodeset = step(path_stack) do - [result] + [:iterate_raw_nodesets, [unnode(result)]] end else return result @@ -460,136 +373,360 @@ def expr( path_stack, nodeset, context=nil ) leave(:expr, path_stack, nodeset) if @debug end + # Determines if a predicate expression is dependent on the position of nodes. + # nil if the predicate is position-independent, + # :simple if the predicate is a simple position query that can be optimized in axis scanning + # :complex if the predicate is a complex query that might be dependent on the position of nodes + def position_dependency(predicate_expr) + # [number], [position()=number], [position() < number], [position() > number] + return :simple if position_operation(predicate_expr) + + # expressions that return number. + return :complex if %i[div mod mult plus minus neg].include?(predicate_expr[0]) + return :complex if predicate_expr[0] == :function && %w[number ceiling round floor string-length sum count].include?(predicate_expr[1]) + # Numeric literal including Integer and Float: [2] means [position() = 2] + return :complex if predicate_expr[0] == :literal && Numeric === predicate_expr[1] + # A variable could resolve to a number at runtime: [$n] means [position() = $n]. + return :complex if predicate_expr[0] == :variable + + # expressions that contain position-dependent functions + return :complex if calls_position_dependent_function?(predicate_expr) + end + + # Recursively checks if the expression contains position-dependent functions such as position() or last() + def calls_position_dependent_function?(expr) + return false unless Array === expr + return true if expr[0] == :function && (expr[1] == 'position' || expr[1] == 'last') + expr.any? {|part| calls_position_dependent_function?(part) } + end + + # Detects simple position-based predicates that can be optimized in axis scanning, such as [1], [position()=1], [position() < 2], [position() > 3] + # Returns operators and values such as [:==, 1], [:<, 2], [:>, 3] + # Returns nil if the predicate is not a simple position-based predicate + def position_operation(predicate_expr) + return [:==, predicate_expr[1]] if predicate_expr[0] == :literal && predicate_expr[1].is_a?(Integer) + + op, left, right = predicate_expr + return unless op == :eq || op == :lt || op == :lteq || op == :gt || op == :gteq + return unless [left, right].include?([:function, 'position', []]) + + literal = [left, right].find {|part| part[0] == :literal && part[1].is_a?(Integer) } + return unless literal + + value = literal[1] + case op + when :eq + [:==, value] + when :lt + literal == right ? [:<, value] : [:>, value] + when :lteq + literal == right ? [:<, value + 1] : [:>, value - 1] + when :gt + literal == right ? [:>, value]: [:<, value] + when :gteq + literal == right ? [:>, value - 1] : [:<, value + 1] + end + end + + # Pseudo scanner for axis scanning step that nodesets are already collected + def iterate_raw_nodesets(raw_nodesets, tester, selector) + non_optimized_raw_nodesets_select(raw_nodesets, tester, selector) + end + + # Scanner for ancestor-or-self axis + def ancestor_or_self(raw_nodes, tester, selector) + ancestor(raw_nodes, tester, selector, include_self: true) + end + + # Scanner for preceding-sibling axis + def preceding_sibling(raw_nodes, tester, selector) + preceding_following_sibling(raw_nodes, tester, selector, reverse: true) + end + + # Scanner for following-sibling axis + def following_sibling(raw_nodes, tester, selector) + preceding_following_sibling(raw_nodes, tester, selector, reverse: false) + end + + def preceding_following_sibling(raw_nodes, tester, selector, reverse:) + raw_nodes = raw_nodes.select {|node| node.respond_to?(:parent) && node.parent } + case selector + when :uniq + raw_nodes.group_by(&:parent).flat_map do |parent, sibling_nodes| + sets = {}.compare_by_identity + sibling_nodes.each {|sibling| sets[sibling] = true } + children = parent.children + children = children.reverse if reverse + children.drop_while {|child| !sets.key?(child) }.drop(1) + end.select(&tester) + when :nodesets + raw_nodesets = raw_nodes.map do |raw_node| + parent = raw_node.parent + index = parent.children.index(raw_node) + reverse ? parent.children[0...index].reverse : parent.children[index + 1..-1] + end + non_optimized_raw_nodesets_select(raw_nodesets, tester, selector) + else + operator, value = selector + raw_nodes.group_by(&:parent).flat_map do |parent, sibling_nodes| + anchors = {}.compare_by_identity + sibling_nodes.each {|sibling| anchors[sibling] = true } + children = parent.children + children = children.reverse if reverse + followings = children.drop_while {|child| !anchors.key?(child) }.drop(1) + anchor_indexes = { 0 => true } + last_anchor = 0 + index = 0 + matched = [] + followings.each do |node| + if tester.call(node) + case operator + when :== + matched << node if anchor_indexes.include?(index - value + 1) + when :< + # Position from the last anchor will be the minimum possible position for the node + matched << node if index - last_anchor + 1 < value + when :> + # Position from the first anchor(==0) will be the maximum possible position for the node + matched << node if index + 1 > value + end + index += 1 + end + if anchors.key?(node) + anchor_indexes[index] = true + last_anchor = index + end + end + matched + end + end + end + + # Scanner for ancestor axis + def ancestor(raw_nodes, tester, selector, include_self: false) + raw_nodes = raw_nodes.select {|node| node.respond_to?(:parent) && node.parent } + case selector + when :uniq + ancestors = {}.compare_by_identity + raw_nodes.each do |raw_node| + ancestors[raw_node] = true if include_self + parent = raw_node.parent + while parent + break if ancestors.key?(parent) + ancestors[parent] = true + parent = parent.parent + end + end + ancestors.keys.select(&tester) + else + # Slow pass + raw_nodesets = raw_nodes.map do |raw_node| + ancestors = [] + ancestors << raw_node if include_self + parent = raw_node.parent + while parent + ancestors << parent + parent = parent.parent + end + ancestors + end + non_optimized_raw_nodesets_select(raw_nodesets, tester, selector) + end + end + + # Scanner fallback step for axis that is not optimized for position-based predicates. + def non_optimized_raw_nodesets_select(raw_nodesets, tester, selector) + nodesets = raw_nodesets.map do |nodeset| + nodeset.select(&tester).map.with_index(1) do |node, position| + XPathNode.new(node, position: position) + end + end.reject(&:empty?) + case selector + when :nodesets + nodesets + when :uniq + seen = {}.compare_by_identity + nodesets.flatten.each {|node| seen[node.raw_node] = true } + seen.keys + else + operator, value = selector + nodes = nodesets.flatten + nodes = + case operator + when :== + nodes.select {|node| node.position == value } + when :< + nodes.select {|node| node.position < value } + when :> + nodes.select {|node| node.position > value } + end + seen = {}.compare_by_identity + nodes.each {|node| seen[node.raw_node] = true } + seen.keys + end + end + + # Split predicates into several groups based on their dependency on the position of nodes + # If there are no position-based predicates, + # return [position_independent_predicates, nil, [], nil] + # If there are only one simple position-based predicate, + # return [position_independent_predicates, position_operator, post_position_independent_predicates, nil] + # If there are multiple position-based predicates or complex position-based predicates, + # return [position_independent_predicates, nil, nil, complex_predicates] + def split_positional_predicates(predicates) + pre_independent = predicates.take_while {|predicate| position_dependency(predicate).nil? } + predicates = predicates.drop(pre_independent.size) + return [pre_independent, nil, [], nil] if predicates.empty? + + op = position_operation(predicates.first) + if op && predicates[1..-1].all? {|predicate| position_dependency(predicate).nil? } + [pre_independent, op, predicates[1..-1], nil] + else + [pre_independent, nil, nil, predicates] + end + end + + # Performs an axis scanning step. + # The caller provides a scanner method and its argument, which determines the axis to scan and the nodes to scan from: + # step(path_stack) { [scanner_method, scanner_argument] } + # Scanner method are called with `(scanner_argument, tester_block, selector)` + # selector is a flag for the scanner to determine how to return the scan result. + # It can be: `:uniq`, `:nodesets` or `[position_comparator, value]`. + # `:uniq` means the scanner should return unique nodes. Predicates are position-independent. + # `:nodesets` means the scanner should return nodesets. Predicates are complex position queries that can't be optimized in axis scanning. + # `[position_comparator, value]` means the scanner should return nodes matching the position comparator and value. + # Each scanner method can implement optimized scanning strategy for each selector. + def step(path_stack, any_type: :element) - nodesets = yield + scanner, scanner_argument = yield begin - enter(:step, path_stack, nodesets) if @debug - nodesets = node_test(path_stack, nodesets, any_type: any_type) - while path_stack[0] == :predicate - path_stack.shift # :predicate - predicate_expression = path_stack.shift.dclone - nodesets = evaluate_predicate(predicate_expression, nodesets) + enter(:step, path_stack, scanner, scanner_argument) if @debug + tester = node_test(path_stack, any_type: any_type) + predicates = [] + while path_stack.first == :predicate + path_stack.shift + predicates << path_stack.shift + end + pre_predicates, position_operator, post_predicates, complex_predicates = split_positional_predicates(predicates) + + if pre_predicates.any? + original_tester = tester + tester = -> (raw_node) do + original_tester.call(raw_node) && + pre_predicates.all? do |predicate_expr| + evaluate_predicate(predicate_expr.dclone, [[XPathNode.new(raw_node, position: 1)]]).flatten.size == 1 + end + end + end + if complex_predicates + nodesets = send(scanner, scanner_argument, tester, :nodesets) + elsif position_operator + nodeset = send(scanner, scanner_argument, tester, position_operator).map.with_index(1) do |raw_node, position| + XPathNode.new(raw_node, position: position) + end + nodesets = [nodeset] + else + nodeset = send(scanner, scanner_argument, tester, :uniq).map.with_index(1) do |raw_node, position| + XPathNode.new(raw_node, position: position) + end + nodesets = [nodeset] end + (complex_predicates || post_predicates).each do |predicate_expr| + nodesets = evaluate_predicate(predicate_expr.dclone, nodesets) + end seen = {}.compare_by_identity - raw_nodes = [] nodesets.each do |nodeset| nodeset.each do |node| raw_node = node.respond_to?(:raw_node) ? node.raw_node : node - next if seen.key?(raw_node) seen[raw_node] = true - raw_nodes << raw_node end end - ordered_nodeset = sort(raw_nodes) - - new_nodeset = [] - ordered_nodeset.each do |node| - new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1) + ordered = sort(seen.keys) + ordered.map.with_index(1) do |raw_node, position| + XPathNode.new(raw_node, position: position) end - new_nodeset ensure - leave(:step, path_stack, new_nodeset) if @debug + leave(:step, path_stack, ordered) if @debug end end - def node_test(path_stack, nodesets, any_type: :element) - enter(:node_test, path_stack, nodesets) if @debug + def node_test(path_stack, any_type: :element) + enter(:node_test, path_stack) if @debug operator = path_stack.shift case operator when :qname prefix = path_stack.shift name = path_stack.shift - new_nodesets = nodesets.collect do |nodeset| - filter_nodeset(nodeset) do |node| - raw_node = node.raw_node - case raw_node.node_type - when :element - if prefix.nil? - raw_node.name == name - elsif prefix.empty? - if strict? - raw_node.name == name and raw_node.namespace == "" - else - raw_node.name == name and raw_node.namespace == get_namespace(raw_node, prefix) - end - else - raw_node.name == name and raw_node.namespace == get_namespace(raw_node, prefix) - end - when :attribute - if prefix.nil? - raw_node.name == name - elsif prefix.empty? + ->(raw_node) do + case raw_node.node_type + when :element + if prefix.nil? + raw_node.name == name + elsif prefix.empty? + if strict? raw_node.name == name and raw_node.namespace == "" else - raw_node.name == name and raw_node.namespace == get_namespace(raw_node.element, prefix) + raw_node.name == name and raw_node.namespace == get_namespace(raw_node, prefix) end else - false + raw_node.name == name and raw_node.namespace == get_namespace(raw_node, prefix) end + when :attribute + if prefix.nil? + raw_node.name == name + elsif prefix.empty? + raw_node.name == name and raw_node.namespace == "" + else + raw_node.name == name and raw_node.namespace == get_namespace(raw_node.element, prefix) + end + else + false end end when :namespace prefix = path_stack.shift - new_nodesets = nodesets.collect do |nodeset| - filter_nodeset(nodeset) do |node| - raw_node = node.raw_node - case raw_node.node_type - when :element - namespaces = @namespaces || raw_node.namespaces - raw_node.namespace == namespaces[prefix] - when :attribute - namespaces = @namespaces || raw_node.element.namespaces - raw_node.namespace == namespaces[prefix] - else - false - end + ->(raw_node) do + case raw_node.node_type + when :element + namespaces = @namespaces || raw_node.namespaces + raw_node.namespace == namespaces[prefix] + when :attribute + namespaces = @namespaces || raw_node.element.namespaces + raw_node.namespace == namespaces[prefix] + else + false end end when :any - new_nodesets = nodesets.collect do |nodeset| - filter_nodeset(nodeset) do |node| - raw_node = node.raw_node - raw_node.node_type == any_type - end + ->(raw_node) do + raw_node.node_type == any_type end when :comment - new_nodesets = nodesets.collect do |nodeset| - filter_nodeset(nodeset) do |node| - raw_node = node.raw_node - raw_node.node_type == :comment - end + ->(raw_node) do + raw_node.node_type == :comment end when :text - new_nodesets = nodesets.collect do |nodeset| - filter_nodeset(nodeset) do |node| - raw_node = node.raw_node - raw_node.node_type == :text - end + ->(raw_node) do + raw_node.node_type == :text end when :processing_instruction target = path_stack.shift - new_nodesets = nodesets.collect do |nodeset| - filter_nodeset(nodeset) do |node| - raw_node = node.raw_node - (raw_node.node_type == :processing_instruction) and - (target.empty? or (raw_node.target == target)) - end + ->(raw_node) do + (raw_node.node_type == :processing_instruction) and + (target.empty? or (raw_node.target == target)) end when :node - new_nodesets = nodesets.collect do |nodeset| - filter_nodeset(nodeset) do |node| - true - end + ->(raw_node) do + true end else message = "[BUG] Unexpected node test: " + "<#{operator.inspect}>: <#{path_stack.inspect}>" raise message end - new_nodesets ensure - leave(:node_test, path_stack, new_nodesets) if @debug + leave(:node_test, path_stack) if @debug end def filter_nodeset(nodeset) @@ -673,7 +810,7 @@ def sort(array_of_nodes) node_idx = [] np = node.node_type == :attribute ? node.element : node while np.parent and np.parent.node_type == :element - node_idx << np.parent.index( np ) + node_idx << np.parent.index(np) np = np.parent end new_arry << [ node_idx.reverse, node ] @@ -686,21 +823,49 @@ def sort(array_of_nodes) end end - def descendant(nodeset, include_self) - nodesets = [] - nodeset.each do |node| - new_nodeset = [] - new_nodes = {} - descendant_recursive(node.raw_node, new_nodeset, new_nodes, include_self) - nodesets << new_nodeset unless new_nodeset.empty? + # Scanner for descendant-or-self axis + def descendant_or_self(raw_nodes, tester, selector) + descendant(raw_nodes, tester, selector, include_self: true) + end + + # Scanner for descendant axis + def descendant(raw_nodes, tester, selector, include_self: false) + raw_nodes = raw_nodes.select {|node| node.respond_to?(:children) } + case selector + when :uniq + seen = {}.compare_by_identity + recursive = ->(raw_node) do + node_type = raw_node.node_type + return if seen[raw_node] + seen[raw_node] = true + return unless node_type == :element || node_type == :document + raw_node.children.each do |child| + recursive.call(child) + end + end + raw_nodes.each do |raw_node| + if include_self + recursive.call(raw_node) + else + raw_node.children.each(&recursive) + end + end + seen.keys.select(&tester) + else + raw_nodesets = raw_nodes.map do |raw_node| + new_nodeset = [] + new_nodes = {} + descendant_recursive(raw_node, new_nodeset, new_nodes, include_self) + new_nodeset + end + non_optimized_raw_nodesets_select(raw_nodesets, tester, selector) end - nodesets end def descendant_recursive(raw_node, new_nodeset, new_nodes, include_self) if include_self return if new_nodes.key?(raw_node) - new_nodeset << XPathNode.new(raw_node, position: new_nodeset.size + 1) + new_nodeset << raw_node new_nodes[raw_node] = true end @@ -712,11 +877,17 @@ def descendant_recursive(raw_node, new_nodeset, new_nodes, include_self) end end + # Scanner for preceding axis + def preceding(raw_nodes, tester, selector) + raw_nodesets = raw_nodes.select {|node| node.respond_to?(:parent) }.map {|raw_node| preceding_nodes(raw_node) } + non_optimized_raw_nodesets_select(raw_nodesets, tester, selector) + end + # Builds a nodeset of all of the preceding nodes of the supplied node, # in reverse document order # preceding:: includes every element in the document that precedes this node, # except for ancestors - def preceding(node) + def preceding_nodes(node) ancestors = [] parent = node.parent while parent @@ -730,8 +901,7 @@ def preceding(node) if ancestors.include?(preceding_node) ancestors.delete(preceding_node) else - precedings << XPathNode.new(preceding_node, - position: precedings.size + 1) + precedings << preceding_node end preceding_node = preceding_node_of(preceding_node) end @@ -753,12 +923,19 @@ def preceding_node_of( node ) psn end - def following(node) + # Scanner for following axis + def following(raw_nodes, tester, selector) + raw_nodesets = raw_nodes.select {|node| node.respond_to?(:parent) }.map do |raw_node| + following_nodes(raw_node) + end + non_optimized_raw_nodesets_select(raw_nodesets, tester, selector) + end + + def following_nodes(node) followings = [] following_node = next_sibling_node(node) while following_node - followings << XPathNode.new(following_node, - position: followings.size + 1) + followings << following_node following_node = following_node_of(following_node) end followings @@ -781,30 +958,19 @@ def next_sibling_node(node) end def child(nodeset) - nodesets = [] - nodeset.each do |node| + nodeset.map do |node| raw_node = node.raw_node node_type = raw_node.node_type # trace(:child, node_type, node) case node_type when :element - nodesets << raw_node.children.collect.with_index do |child_node, i| - XPathNode.new(child_node, position: i + 1) - end + raw_node.children when :document - new_nodeset = [] - raw_node.children.each do |child| - case child - when XMLDecl, Text - # Ignore - else - new_nodeset << XPathNode.new(child, position: new_nodeset.size + 1) - end + raw_node.children.reject do |child| + XMLDecl === child || Text === child end - nodesets << new_nodeset unless new_nodeset.empty? end - end - nodesets + end.compact end def norm b diff --git a/test/xpath/test_attribute.rb b/test/xpath/test_attribute.rb index b778ff81..458fda9f 100644 --- a/test/xpath/test_attribute.rb +++ b/test/xpath/test_attribute.rb @@ -32,5 +32,18 @@ def test_no_namespace "nothing" => "") assert_equal(["child2"], children.collect(&:text)) end + + def test_no_error + REXML::XPath.match(@document, '//attribute::*/parent::*') + # Some of those are not yet supported in REXML, but at least it shouldn't raise an error + REXML::XPath.match(@document, '//attribute::*/preceding::*') + REXML::XPath.match(@document, '//attribute::*/following::*') + REXML::XPath.match(@document, '//attribute::*/preceding-sibling::*') + REXML::XPath.match(@document, '//attribute::*/following-sibling::*') + REXML::XPath.match(@document, '//attribute::*/ancestor::*') + REXML::XPath.match(@document, '//attribute::*/descendant::*') + REXML::XPath.match(@document, '//attribute::*/ancestor-or-self::*') + REXML::XPath.match(@document, '//attribute::*/descendant-or-self::*') + end end end diff --git a/test/xpath/test_axis_preceding_sibling.rb b/test/xpath/test_axis_preceding_sibling.rb index 2f0ba5d1..f4b78ebc 100644 --- a/test/xpath/test_axis_preceding_sibling.rb +++ b/test/xpath/test_axis_preceding_sibling.rb @@ -34,5 +34,73 @@ def test_preceding_sibling_axis prev = XPath.first(context, "preceding-sibling::f[3]") assert_equal "3", prev.attributes["id"] end + + def test_preceding_sibling_position_less_than + context = XPath.first(@@doc, "/a/e/f[last()]") + assert_equal([], XPath.match(context, "preceding-sibling::f[position() < 1]")) + assert_equal(["5"], + XPath.match(context, "preceding-sibling::f[position() < 2]").map {|n| n.attributes["id"] }) + assert_equal(["4", "5"], + XPath.match(context, "preceding-sibling::f[position() < 3]").map {|n| n.attributes["id"] }) + end + + def test_preceding_sibling_position_less_than_or_equal + context = XPath.first(@@doc, "/a/e/f[last()]") + assert_equal([], XPath.match(context, "preceding-sibling::f[position() <= 0]")) + assert_equal(["4", "5"], + XPath.match(context, "preceding-sibling::f[position() <= 2]").map {|n| n.attributes["id"] }) + end + + def test_preceding_sibling_position_greater_than + context = XPath.first(@@doc, "/a/e/f[last()]") + assert_equal(["3", "4", "5"], + XPath.match(context, "preceding-sibling::f[position() > 0]").map {|n| n.attributes["id"] }) + assert_equal(["3", "4"], + XPath.match(context, "preceding-sibling::f[position() > 1]").map {|n| n.attributes["id"] }) + assert_equal(["3"], + XPath.match(context, "preceding-sibling::f[position() > 2]").map {|n| n.attributes["id"] }) + end + + def test_preceding_sibling_position_greater_than_or_equal + context = XPath.first(@@doc, "/a/e/f[last()]") + assert_equal(["3", "4", "5"], + XPath.match(context, "preceding-sibling::f[position() >= 0]").map {|n| n.attributes["id"] }) + assert_equal(["3", "4", "5"], + XPath.match(context, "preceding-sibling::f[position() >= 1]").map {|n| n.attributes["id"] }) + assert_equal(["3", "4"], + XPath.match(context, "preceding-sibling::f[position() >= 2]").map {|n| n.attributes["id"] }) + end + + def test_preceding_sibling_multiple_anchors + doc = Document.new(<<~XML) + + + + + + + + + + + + + + + + + + + + + + + + XML + + assert_equal(%w[2 7 9], XPath.match(doc, "/a/anchor/preceding-sibling::b[position() = 3]").map {|n| n.attributes["id"] }) + assert_equal(%w[2 3 4 7 8 9 10 11], XPath.match(doc, "/a/anchor/preceding-sibling::b[position() <= 3]").map {|n| n.attributes["id"] }) + assert_equal(%w[1 2 3 4 5 6 7 8], XPath.match(doc, "/a/anchor/preceding-sibling::b[position() >= 4]").map {|n| n.attributes["id"] }) + end end end diff --git a/test/xpath/test_base.rb b/test/xpath/test_base.rb index 205711ee..02a4303a 100644 --- a/test/xpath/test_base.rb +++ b/test/xpath/test_base.rb @@ -492,6 +492,14 @@ def test_following_sibling_predicates assert_equal(["w", "x", "y", "z"], matches.map(&:name)) end + def test_following_sibling_position_less_than + source = "" + doc = REXML::Document.new(source) + assert_equal([], REXML::XPath.match(doc, "/r/a/following-sibling::*[position() < 1]")) + assert_equal(["b"], REXML::XPath.match(doc, "/r/a/following-sibling::*[position() < 2]").map(&:name)) + assert_equal(["b", "c"], REXML::XPath.match(doc, "/r/a/following-sibling::*[position() < 3]").map(&:name)) + end + def test_preceding_sibling_across_multiple_nodes source = <<-XML @@ -890,7 +898,7 @@ def test_ordering r.collect {|element| element.attribute("id").value}) end - def test_order_consisntency + def test_order_consistency doc = REXML::Document.new('') assert_equal('a', REXML::XPath.first(doc, '//d/ancestor::*').name) assert_equal('a', REXML::XPath.first(doc, '//d[@id="2"]/ancestor::*').name) diff --git a/test/xpath/test_predicate.rb b/test/xpath/test_predicate.rb index 278e3765..f25bd758 100644 --- a/test/xpath/test_predicate.rb +++ b/test/xpath/test_predicate.rb @@ -59,12 +59,50 @@ def test_predicates_multi assert_equal( "1", m[0].attributes["id"] ) end + def test_predicate_multi_position + xml = <<~XML + + + + + XML + doc = REXML::Document.new(xml) + + result = REXML::XPath.match(doc, "/a/b/c[position()>1]") + assert_equal(%w[2 3 4 5 7 8 9 10], result.map { |node| node.attributes["id"] }) + + result = REXML::XPath.match(doc, "/a/b/c[position()>1][position()>1]") + assert_equal(%w[3 4 5 8 9 10], result.map { |node| node.attributes["id"] }) + + result = REXML::XPath.match(doc, "/a/b/c[position()>1][position()>1][@id!='3']") + assert_equal(%w[4 5 8 9 10], result.map { |node| node.attributes["id"] }) + + result = REXML::XPath.match(doc, "/a/b/c[position()>1][position()>1][@id!='3'][position()!=2]") + assert_equal(%w[4 8 10], result.map { |node| node.attributes["id"] }) + end + def do_path( path ) m = REXML::XPath.match( @doc, path ) #puts path, @parser.parse( path ).inspect return m end + def test_predicate_float_literal + doc = REXML::Document.new("") + # [N.0] is equivalent to [position() = N.0] = [position() = N] + assert_equal(["a"], REXML::XPath.match(doc, "/r/*[1.0]").map(&:name)) + assert_equal(["b"], REXML::XPath.match(doc, "/r/*[2.0]").map(&:name)) + # Non-integer numeric literals match no node. + assert_equal([], REXML::XPath.match(doc, "/r/*[1.5]")) + end + + def test_predicate_variable_as_position + doc = REXML::Document.new("") + parser = REXML::XPathParser.new + parser["x"] = 2 + assert_equal(["b"], parser.parse("/r/*[$x]", doc).map(&:name)) + end + def test_get_no_siblings_terminal_nodes source = <<-XML