diff --git a/lib/rexml/parsers/xpathparser.rb b/lib/rexml/parsers/xpathparser.rb
index a6d76fdc..c4b8531f 100644
--- a/lib/rexml/parsers/xpathparser.rb
+++ b/lib/rexml/parsers/xpathparser.rb
@@ -654,7 +654,7 @@ def PrimaryExpr path, parsed
contents = contents[1..-2]
n = []
OrExpr( contents, n )
- parsed.concat(n)
+ parsed.push(:group, n)
end
path
end
diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb
index c5d420ce..45ed6707 100644
--- a/lib/rexml/xpath_parser.rb
+++ b/lib/rexml/xpath_parser.rb
@@ -199,28 +199,24 @@ def expr( path_stack, nodeset, context=nil )
nodeset = [XPathNode.new(first_raw_node.root_node, position: 1)]
when :self
nodeset = step(path_stack) do
- [nodeset]
+ [:iterate_raw_nodesets, [nodeset.map(&:raw_node)]]
end
when :child
nodeset = step(path_stack) do
- child(nodeset)
+ [:iterate_raw_nodesets, child(nodeset)]
end
when :literal
trace(:literal, path_stack, nodeset) if @debug
return path_stack.shift
when :attribute
nodeset = step(path_stack, any_type: :attribute) do
- nodesets = []
- nodeset.each do |node|
+ raw_nodesets = nodeset.map do |node|
raw_node = node.raw_node
next unless raw_node.node_type == :element
attributes = raw_node.attributes
- next if attributes.empty?
- nodesets << attributes.each_attribute.collect.with_index do |attribute, i|
- XPathNode.new(attribute, position: i + 1)
- end
- end
- nodesets
+ attributes.each_attribute.to_a unless attributes.empty?
+ end.compact
+ [:iterate_raw_nodesets, raw_nodesets]
end
when :namespace
pre_defined_namespaces = {
@@ -245,11 +241,13 @@ def expr( path_stack, nodeset, context=nil )
end
end
end
- nodesets
+ # Not working at all, so just return an empty nodesets for now.
+ # Needs Namespace-node class
+ [:iterate_raw_nodesets, []]
end
when :parent
nodeset = step(path_stack) do
- nodesets = []
+ parents = {}.compare_by_identity
nodeset.each do |node|
raw_node = node.raw_node
if raw_node.node_type == :attribute
@@ -257,101 +255,16 @@ def expr( path_stack, nodeset, context=nil )
else
parent = raw_node.parent
end
- nodesets << [XPathNode.new(parent, position: 1)] if parent
- end
- nodesets
- end
- when :ancestor
- nodeset = step(path_stack) do
- nodesets = []
- # new_nodes = {}
- nodeset.each do |node|
- raw_node = node.raw_node
- new_nodeset = []
- while raw_node.parent
- raw_node = raw_node.parent
- # next if new_nodes.key?(node)
- new_nodeset << XPathNode.new(raw_node,
- position: new_nodeset.size + 1)
- # new_nodes[node] = true
- end
- nodesets << new_nodeset unless new_nodeset.empty?
- end
- nodesets
- end
- when :ancestor_or_self
- nodeset = step(path_stack) do
- nodesets = []
- # new_nodes = {}
- nodeset.each do |node|
- raw_node = node.raw_node
- next unless raw_node.node_type == :element
- new_nodeset = [XPathNode.new(raw_node, position: 1)]
- # new_nodes[node] = true
- while raw_node.parent
- raw_node = raw_node.parent
- # next if new_nodes.key?(node)
- new_nodeset << XPathNode.new(raw_node,
- position: new_nodeset.size + 1)
- # new_nodes[node] = true
- end
- nodesets << new_nodeset unless new_nodeset.empty?
+ parents[parent] = true if parent
end
- nodesets
+ [:iterate_raw_nodesets, parents.keys.map {|parent| [parent] }]
end
- when :descendant_or_self
+ when :ancestor, :ancestor_or_self,
+ :descendant, :descendant_or_self,
+ :preceding, :preceding_sibling,
+ :following, :following_sibling
nodeset = step(path_stack) do
- descendant(nodeset, true)
- end
- when :descendant
- nodeset = step(path_stack) do
- descendant(nodeset, false)
- end
- when :following_sibling
- nodeset = step(path_stack) do
- nodesets = []
- nodeset.each do |node|
- raw_node = node.raw_node
- next unless raw_node.respond_to?(:parent)
- next if raw_node.parent.nil?
- all_siblings = raw_node.parent.children
- current_index = all_siblings.index(raw_node)
- following_siblings = all_siblings[(current_index + 1)..-1]
- next if following_siblings.empty?
- nodesets << following_siblings.collect.with_index do |sibling, i|
- XPathNode.new(sibling, position: i + 1)
- end
- end
- nodesets
- end
- when :preceding_sibling
- nodeset = step(path_stack, order: :reverse) do
- nodesets = []
- nodeset.each do |node|
- raw_node = node.raw_node
- next unless raw_node.respond_to?(:parent)
- next if raw_node.parent.nil?
- all_siblings = raw_node.parent.children
- current_index = all_siblings.index(raw_node)
- preceding_siblings = all_siblings[0, current_index].reverse
- next if preceding_siblings.empty?
- nodesets << preceding_siblings.collect.with_index do |sibling, i|
- XPathNode.new(sibling, position: i + 1)
- end
- end
- nodesets
- end
- when :preceding
- nodeset = step(path_stack, order: :reverse) do
- unnode(nodeset) do |node|
- preceding(node)
- end
- end
- when :following
- nodeset = step(path_stack) do
- unnode(nodeset) do |node|
- following(node)
- end
+ [op, nodeset.map(&:raw_node)]
end
when :variable
var_name = path_stack.shift
@@ -439,7 +352,18 @@ def expr( path_stack, nodeset, context=nil )
end
Functions.context = target_context
return Functions.send(func_name, *args)
-
+ when :group
+ sub_expression = path_stack.shift
+ result = expr(sub_expression, nodeset, context)
+ if result.is_a?(Array)
+ # If result is a nodeset, apply following predicates
+ path_stack.unshift(:node)
+ nodeset = step(path_stack) do
+ [:iterate_raw_nodesets, [unnode(result)]]
+ end
+ else
+ return result
+ end
else
raise "[BUG] Unexpected path: <#{op.inspect}>: <#{path_stack.inspect}>"
end
@@ -449,138 +373,360 @@ def expr( path_stack, nodeset, context=nil )
leave(:expr, path_stack, nodeset) if @debug
end
- def step(path_stack, any_type: :element, order: :forward)
- nodesets = yield
+ # Determines if a predicate expression is dependent on the position of nodes.
+ # nil if the predicate is position-independent,
+ # :simple if the predicate is a simple position query that can be optimized in axis scanning
+ # :complex if the predicate is a complex query that might be dependent on the position of nodes
+ def position_dependency(predicate_expr)
+ # [number], [position()=number], [position() < number], [position() > number]
+ return :simple if position_operation(predicate_expr)
+
+ # expressions that return number.
+ return :complex if %i[div mod mult plus minus neg].include?(predicate_expr[0])
+ return :complex if predicate_expr[0] == :function && %w[number ceiling round floor string-length sum count].include?(predicate_expr[1])
+ # Numeric literal including Integer and Float: [2] means [position() = 2]
+ return :complex if predicate_expr[0] == :literal && Numeric === predicate_expr[1]
+ # A variable could resolve to a number at runtime: [$n] means [position() = $n].
+ return :complex if predicate_expr[0] == :variable
+
+ # expressions that contain position-dependent functions
+ return :complex if calls_position_dependent_function?(predicate_expr)
+ end
+
+ # Recursively checks if the expression contains position-dependent functions such as position() or last()
+ def calls_position_dependent_function?(expr)
+ return false unless Array === expr
+ return true if expr[0] == :function && (expr[1] == 'position' || expr[1] == 'last')
+ expr.any? {|part| calls_position_dependent_function?(part) }
+ end
+
+ # Detects simple position-based predicates that can be optimized in axis scanning, such as [1], [position()=1], [position() < 2], [position() > 3]
+ # Returns operators and values such as [:==, 1], [:<, 2], [:>, 3]
+ # Returns nil if the predicate is not a simple position-based predicate
+ def position_operation(predicate_expr)
+ return [:==, predicate_expr[1]] if predicate_expr[0] == :literal && predicate_expr[1].is_a?(Integer)
+
+ op, left, right = predicate_expr
+ return unless op == :eq || op == :lt || op == :lteq || op == :gt || op == :gteq
+ return unless [left, right].include?([:function, 'position', []])
+
+ literal = [left, right].find {|part| part[0] == :literal && part[1].is_a?(Integer) }
+ return unless literal
+
+ value = literal[1]
+ case op
+ when :eq
+ [:==, value]
+ when :lt
+ literal == right ? [:<, value] : [:>, value]
+ when :lteq
+ literal == right ? [:<, value + 1] : [:>, value - 1]
+ when :gt
+ literal == right ? [:>, value]: [:<, value]
+ when :gteq
+ literal == right ? [:>, value - 1] : [:<, value + 1]
+ end
+ end
+
+ # Pseudo scanner for axis scanning step that nodesets are already collected
+ def iterate_raw_nodesets(raw_nodesets, tester, selector)
+ non_optimized_raw_nodesets_select(raw_nodesets, tester, selector)
+ end
+
+ # Scanner for ancestor-or-self axis
+ def ancestor_or_self(raw_nodes, tester, selector)
+ ancestor(raw_nodes, tester, selector, include_self: true)
+ end
+
+ # Scanner for preceding-sibling axis
+ def preceding_sibling(raw_nodes, tester, selector)
+ preceding_following_sibling(raw_nodes, tester, selector, reverse: true)
+ end
+
+ # Scanner for following-sibling axis
+ def following_sibling(raw_nodes, tester, selector)
+ preceding_following_sibling(raw_nodes, tester, selector, reverse: false)
+ end
+
+ def preceding_following_sibling(raw_nodes, tester, selector, reverse:)
+ raw_nodes = raw_nodes.select {|node| node.respond_to?(:parent) && node.parent }
+ case selector
+ when :uniq
+ raw_nodes.group_by(&:parent).flat_map do |parent, sibling_nodes|
+ sets = {}.compare_by_identity
+ sibling_nodes.each {|sibling| sets[sibling] = true }
+ children = parent.children
+ children = children.reverse if reverse
+ children.drop_while {|child| !sets.key?(child) }.drop(1)
+ end.select(&tester)
+ when :nodesets
+ raw_nodesets = raw_nodes.map do |raw_node|
+ parent = raw_node.parent
+ index = parent.children.index(raw_node)
+ reverse ? parent.children[0...index].reverse : parent.children[index + 1..-1]
+ end
+ non_optimized_raw_nodesets_select(raw_nodesets, tester, selector)
+ else
+ operator, value = selector
+ raw_nodes.group_by(&:parent).flat_map do |parent, sibling_nodes|
+ anchors = {}.compare_by_identity
+ sibling_nodes.each {|sibling| anchors[sibling] = true }
+ children = parent.children
+ children = children.reverse if reverse
+ followings = children.drop_while {|child| !anchors.key?(child) }.drop(1)
+ anchor_indexes = { 0 => true }
+ last_anchor = 0
+ index = 0
+ matched = []
+ followings.each do |node|
+ if tester.call(node)
+ case operator
+ when :==
+ matched << node if anchor_indexes.include?(index - value + 1)
+ when :<
+ # Position from the last anchor will be the minimum possible position for the node
+ matched << node if index - last_anchor + 1 < value
+ when :>
+ # Position from the first anchor(==0) will be the maximum possible position for the node
+ matched << node if index + 1 > value
+ end
+ index += 1
+ end
+ if anchors.key?(node)
+ anchor_indexes[index] = true
+ last_anchor = index
+ end
+ end
+ matched
+ end
+ end
+ end
+
+ # Scanner for ancestor axis
+ def ancestor(raw_nodes, tester, selector, include_self: false)
+ raw_nodes = raw_nodes.select {|node| node.respond_to?(:parent) && node.parent }
+ case selector
+ when :uniq
+ ancestors = {}.compare_by_identity
+ raw_nodes.each do |raw_node|
+ ancestors[raw_node] = true if include_self
+ parent = raw_node.parent
+ while parent
+ break if ancestors.key?(parent)
+ ancestors[parent] = true
+ parent = parent.parent
+ end
+ end
+ ancestors.keys.select(&tester)
+ else
+ # Slow pass
+ raw_nodesets = raw_nodes.map do |raw_node|
+ ancestors = []
+ ancestors << raw_node if include_self
+ parent = raw_node.parent
+ while parent
+ ancestors << parent
+ parent = parent.parent
+ end
+ ancestors
+ end
+ non_optimized_raw_nodesets_select(raw_nodesets, tester, selector)
+ end
+ end
+
+ # Scanner fallback step for axis that is not optimized for position-based predicates.
+ def non_optimized_raw_nodesets_select(raw_nodesets, tester, selector)
+ nodesets = raw_nodesets.map do |nodeset|
+ nodeset.select(&tester).map.with_index(1) do |node, position|
+ XPathNode.new(node, position: position)
+ end
+ end.reject(&:empty?)
+ case selector
+ when :nodesets
+ nodesets
+ when :uniq
+ seen = {}.compare_by_identity
+ nodesets.flatten.each {|node| seen[node.raw_node] = true }
+ seen.keys
+ else
+ operator, value = selector
+ nodes = nodesets.flatten
+ nodes =
+ case operator
+ when :==
+ nodes.select {|node| node.position == value }
+ when :<
+ nodes.select {|node| node.position < value }
+ when :>
+ nodes.select {|node| node.position > value }
+ end
+ seen = {}.compare_by_identity
+ nodes.each {|node| seen[node.raw_node] = true }
+ seen.keys
+ end
+ end
+
+ # Split predicates into several groups based on their dependency on the position of nodes
+ # If there are no position-based predicates,
+ # return [position_independent_predicates, nil, [], nil]
+ # If there are only one simple position-based predicate,
+ # return [position_independent_predicates, position_operator, post_position_independent_predicates, nil]
+ # If there are multiple position-based predicates or complex position-based predicates,
+ # return [position_independent_predicates, nil, nil, complex_predicates]
+ def split_positional_predicates(predicates)
+ pre_independent = predicates.take_while {|predicate| position_dependency(predicate).nil? }
+ predicates = predicates.drop(pre_independent.size)
+ return [pre_independent, nil, [], nil] if predicates.empty?
+
+ op = position_operation(predicates.first)
+ if op && predicates[1..-1].all? {|predicate| position_dependency(predicate).nil? }
+ [pre_independent, op, predicates[1..-1], nil]
+ else
+ [pre_independent, nil, nil, predicates]
+ end
+ end
+
+ # Performs an axis scanning step.
+ # The caller provides a scanner method and its argument, which determines the axis to scan and the nodes to scan from:
+ # step(path_stack) { [scanner_method, scanner_argument] }
+ # Scanner method are called with `(scanner_argument, tester_block, selector)`
+ # selector is a flag for the scanner to determine how to return the scan result.
+ # It can be: `:uniq`, `:nodesets` or `[position_comparator, value]`.
+ # `:uniq` means the scanner should return unique nodes. Predicates are position-independent.
+ # `:nodesets` means the scanner should return nodesets. Predicates are complex position queries that can't be optimized in axis scanning.
+ # `[position_comparator, value]` means the scanner should return nodes matching the position comparator and value.
+ # Each scanner method can implement optimized scanning strategy for each selector.
+
+ def step(path_stack, any_type: :element)
+ scanner, scanner_argument = yield
begin
- enter(:step, path_stack, nodesets) if @debug
- nodesets = node_test(path_stack, nodesets, any_type: any_type)
- while path_stack[0] == :predicate
- path_stack.shift # :predicate
- predicate_expression = path_stack.shift.dclone
- nodesets = evaluate_predicate(predicate_expression, nodesets)
- end
- if nodesets.size == 1
- ordered_nodeset = nodesets[0]
- else
- seen = {}.compare_by_identity
- raw_nodes = []
- nodesets.each do |nodeset|
- nodeset.each do |node|
- raw_node = node.respond_to?(:raw_node) ? node.raw_node : node
- next if seen.key?(raw_node)
- seen[raw_node] = true
- raw_nodes << raw_node
+ enter(:step, path_stack, scanner, scanner_argument) if @debug
+ tester = node_test(path_stack, any_type: any_type)
+ predicates = []
+ while path_stack.first == :predicate
+ path_stack.shift
+ predicates << path_stack.shift
+ end
+ pre_predicates, position_operator, post_predicates, complex_predicates = split_positional_predicates(predicates)
+
+ if pre_predicates.any?
+ original_tester = tester
+ tester = -> (raw_node) do
+ original_tester.call(raw_node) &&
+ pre_predicates.all? do |predicate_expr|
+ evaluate_predicate(predicate_expr.dclone, [[XPathNode.new(raw_node, position: 1)]]).flatten.size == 1
end
end
- ordered_nodeset = sort(raw_nodes, order)
end
- new_nodeset = []
- ordered_nodeset.each do |node|
- new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
+ if complex_predicates
+ nodesets = send(scanner, scanner_argument, tester, :nodesets)
+ elsif position_operator
+ nodeset = send(scanner, scanner_argument, tester, position_operator).map.with_index(1) do |raw_node, position|
+ XPathNode.new(raw_node, position: position)
+ end
+ nodesets = [nodeset]
+ else
+ nodeset = send(scanner, scanner_argument, tester, :uniq).map.with_index(1) do |raw_node, position|
+ XPathNode.new(raw_node, position: position)
+ end
+ nodesets = [nodeset]
+ end
+
+ (complex_predicates || post_predicates).each do |predicate_expr|
+ nodesets = evaluate_predicate(predicate_expr.dclone, nodesets)
+ end
+ seen = {}.compare_by_identity
+ nodesets.each do |nodeset|
+ nodeset.each do |node|
+ raw_node = node.respond_to?(:raw_node) ? node.raw_node : node
+ seen[raw_node] = true
+ end
+ end
+ ordered = sort(seen.keys)
+ ordered.map.with_index(1) do |raw_node, position|
+ XPathNode.new(raw_node, position: position)
end
- new_nodeset
ensure
- leave(:step, path_stack, new_nodeset) if @debug
+ leave(:step, path_stack, ordered) if @debug
end
end
- def node_test(path_stack, nodesets, any_type: :element)
- enter(:node_test, path_stack, nodesets) if @debug
+ def node_test(path_stack, any_type: :element)
+ enter(:node_test, path_stack) if @debug
operator = path_stack.shift
case operator
when :qname
prefix = path_stack.shift
name = path_stack.shift
- new_nodesets = nodesets.collect do |nodeset|
- filter_nodeset(nodeset) do |node|
- raw_node = node.raw_node
- case raw_node.node_type
- when :element
- if prefix.nil?
- raw_node.name == name
- elsif prefix.empty?
- if strict?
- raw_node.name == name and raw_node.namespace == ""
- else
- raw_node.name == name and raw_node.namespace == get_namespace(raw_node, prefix)
- end
- else
- raw_node.name == name and raw_node.namespace == get_namespace(raw_node, prefix)
- end
- when :attribute
- if prefix.nil?
- raw_node.name == name
- elsif prefix.empty?
+ ->(raw_node) do
+ case raw_node.node_type
+ when :element
+ if prefix.nil?
+ raw_node.name == name
+ elsif prefix.empty?
+ if strict?
raw_node.name == name and raw_node.namespace == ""
else
- raw_node.name == name and raw_node.namespace == get_namespace(raw_node.element, prefix)
+ raw_node.name == name and raw_node.namespace == get_namespace(raw_node, prefix)
end
else
- false
+ raw_node.name == name and raw_node.namespace == get_namespace(raw_node, prefix)
end
+ when :attribute
+ if prefix.nil?
+ raw_node.name == name
+ elsif prefix.empty?
+ raw_node.name == name and raw_node.namespace == ""
+ else
+ raw_node.name == name and raw_node.namespace == get_namespace(raw_node.element, prefix)
+ end
+ else
+ false
end
end
when :namespace
prefix = path_stack.shift
- new_nodesets = nodesets.collect do |nodeset|
- filter_nodeset(nodeset) do |node|
- raw_node = node.raw_node
- case raw_node.node_type
- when :element
- namespaces = @namespaces || raw_node.namespaces
- raw_node.namespace == namespaces[prefix]
- when :attribute
- namespaces = @namespaces || raw_node.element.namespaces
- raw_node.namespace == namespaces[prefix]
- else
- false
- end
+ ->(raw_node) do
+ case raw_node.node_type
+ when :element
+ namespaces = @namespaces || raw_node.namespaces
+ raw_node.namespace == namespaces[prefix]
+ when :attribute
+ namespaces = @namespaces || raw_node.element.namespaces
+ raw_node.namespace == namespaces[prefix]
+ else
+ false
end
end
when :any
- new_nodesets = nodesets.collect do |nodeset|
- filter_nodeset(nodeset) do |node|
- raw_node = node.raw_node
- raw_node.node_type == any_type
- end
+ ->(raw_node) do
+ raw_node.node_type == any_type
end
when :comment
- new_nodesets = nodesets.collect do |nodeset|
- filter_nodeset(nodeset) do |node|
- raw_node = node.raw_node
- raw_node.node_type == :comment
- end
+ ->(raw_node) do
+ raw_node.node_type == :comment
end
when :text
- new_nodesets = nodesets.collect do |nodeset|
- filter_nodeset(nodeset) do |node|
- raw_node = node.raw_node
- raw_node.node_type == :text
- end
+ ->(raw_node) do
+ raw_node.node_type == :text
end
when :processing_instruction
target = path_stack.shift
- new_nodesets = nodesets.collect do |nodeset|
- filter_nodeset(nodeset) do |node|
- raw_node = node.raw_node
- (raw_node.node_type == :processing_instruction) and
- (target.empty? or (raw_node.target == target))
- end
+ ->(raw_node) do
+ (raw_node.node_type == :processing_instruction) and
+ (target.empty? or (raw_node.target == target))
end
when :node
- new_nodesets = nodesets.collect do |nodeset|
- filter_nodeset(nodeset) do |node|
- true
- end
+ ->(raw_node) do
+ true
end
else
message = "[BUG] Unexpected node test: " +
"<#{operator.inspect}>: <#{path_stack.inspect}>"
raise message
end
- new_nodesets
ensure
- leave(:node_test, path_stack, new_nodesets) if @debug
+ leave(:node_test, path_stack) if @debug
end
def filter_nodeset(nodeset)
@@ -594,7 +740,6 @@ def filter_nodeset(nodeset)
def evaluate_predicate(expression, nodesets)
enter(:predicate, expression, nodesets) if @debug
- new_nodeset_count = 0
new_nodesets = nodesets.collect do |nodeset|
new_nodeset = []
subcontext = { :size => nodeset.size }
@@ -611,20 +756,17 @@ def evaluate_predicate(expression, nodesets)
result = result[0] if result.kind_of? Array and result.length == 1
if result.kind_of? Numeric
if result == node.position
- new_nodeset_count += 1
- new_nodeset << XPathNode.new(node, position: new_nodeset_count)
+ new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
end
elsif result.instance_of? Array
if result.size > 0 and result.inject(false) {|k,s| s or k}
if result.size > 0
- new_nodeset_count += 1
- new_nodeset << XPathNode.new(node, position: new_nodeset_count)
+ new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
end
end
else
if result
- new_nodeset_count += 1
- new_nodeset << XPathNode.new(node, position: new_nodeset_count)
+ new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
end
end
end
@@ -660,44 +802,70 @@ def leave(tag, *args)
# in and out of function calls. If I knew what the index of the nodes was,
# I wouldn't have to do this. Maybe add a document IDX for each node?
# Problems with mutable documents. Or, rewrite everything.
- def sort(array_of_nodes, order)
+ def sort(array_of_nodes)
+ return array_of_nodes if array_of_nodes.size <= 1
+
new_arry = []
array_of_nodes.each { |node|
node_idx = []
np = node.node_type == :attribute ? node.element : node
while np.parent and np.parent.node_type == :element
- node_idx << np.parent.index( np )
+ node_idx << np.parent.index(np)
np = np.parent
end
new_arry << [ node_idx.reverse, node ]
}
ordered = new_arry.sort_by do |index, node|
- if order == :forward
- index
- else
- index.map(&:-@)
- end
+ index
end
ordered.collect do |_index, node|
node
end
end
- def descendant(nodeset, include_self)
- nodesets = []
- nodeset.each do |node|
- new_nodeset = []
- new_nodes = {}
- descendant_recursive(node.raw_node, new_nodeset, new_nodes, include_self)
- nodesets << new_nodeset unless new_nodeset.empty?
+ # Scanner for descendant-or-self axis
+ def descendant_or_self(raw_nodes, tester, selector)
+ descendant(raw_nodes, tester, selector, include_self: true)
+ end
+
+ # Scanner for descendant axis
+ def descendant(raw_nodes, tester, selector, include_self: false)
+ raw_nodes = raw_nodes.select {|node| node.respond_to?(:children) }
+ case selector
+ when :uniq
+ seen = {}.compare_by_identity
+ recursive = ->(raw_node) do
+ node_type = raw_node.node_type
+ return if seen[raw_node]
+ seen[raw_node] = true
+ return unless node_type == :element || node_type == :document
+ raw_node.children.each do |child|
+ recursive.call(child)
+ end
+ end
+ raw_nodes.each do |raw_node|
+ if include_self
+ recursive.call(raw_node)
+ else
+ raw_node.children.each(&recursive)
+ end
+ end
+ seen.keys.select(&tester)
+ else
+ raw_nodesets = raw_nodes.map do |raw_node|
+ new_nodeset = []
+ new_nodes = {}
+ descendant_recursive(raw_node, new_nodeset, new_nodes, include_self)
+ new_nodeset
+ end
+ non_optimized_raw_nodesets_select(raw_nodesets, tester, selector)
end
- nodesets
end
def descendant_recursive(raw_node, new_nodeset, new_nodes, include_self)
if include_self
return if new_nodes.key?(raw_node)
- new_nodeset << XPathNode.new(raw_node, position: new_nodeset.size + 1)
+ new_nodeset << raw_node
new_nodes[raw_node] = true
end
@@ -709,11 +877,17 @@ def descendant_recursive(raw_node, new_nodeset, new_nodes, include_self)
end
end
+ # Scanner for preceding axis
+ def preceding(raw_nodes, tester, selector)
+ raw_nodesets = raw_nodes.select {|node| node.respond_to?(:parent) }.map {|raw_node| preceding_nodes(raw_node) }
+ non_optimized_raw_nodesets_select(raw_nodesets, tester, selector)
+ end
+
# Builds a nodeset of all of the preceding nodes of the supplied node,
# in reverse document order
# preceding:: includes every element in the document that precedes this node,
# except for ancestors
- def preceding(node)
+ def preceding_nodes(node)
ancestors = []
parent = node.parent
while parent
@@ -727,8 +901,7 @@ def preceding(node)
if ancestors.include?(preceding_node)
ancestors.delete(preceding_node)
else
- precedings << XPathNode.new(preceding_node,
- position: precedings.size + 1)
+ precedings << preceding_node
end
preceding_node = preceding_node_of(preceding_node)
end
@@ -750,12 +923,19 @@ def preceding_node_of( node )
psn
end
- def following(node)
+ # Scanner for following axis
+ def following(raw_nodes, tester, selector)
+ raw_nodesets = raw_nodes.select {|node| node.respond_to?(:parent) }.map do |raw_node|
+ following_nodes(raw_node)
+ end
+ non_optimized_raw_nodesets_select(raw_nodesets, tester, selector)
+ end
+
+ def following_nodes(node)
followings = []
following_node = next_sibling_node(node)
while following_node
- followings << XPathNode.new(following_node,
- position: followings.size + 1)
+ followings << following_node
following_node = following_node_of(following_node)
end
followings
@@ -778,30 +958,19 @@ def next_sibling_node(node)
end
def child(nodeset)
- nodesets = []
- nodeset.each do |node|
+ nodeset.map do |node|
raw_node = node.raw_node
node_type = raw_node.node_type
# trace(:child, node_type, node)
case node_type
when :element
- nodesets << raw_node.children.collect.with_index do |child_node, i|
- XPathNode.new(child_node, position: i + 1)
- end
+ raw_node.children
when :document
- new_nodeset = []
- raw_node.children.each do |child|
- case child
- when XMLDecl, Text
- # Ignore
- else
- new_nodeset << XPathNode.new(child, position: new_nodeset.size + 1)
- end
+ raw_node.children.reject do |child|
+ XMLDecl === child || Text === child
end
- nodesets << new_nodeset unless new_nodeset.empty?
end
- end
- nodesets
+ end.compact
end
def norm b
diff --git a/test/xpath/test_attribute.rb b/test/xpath/test_attribute.rb
index b778ff81..458fda9f 100644
--- a/test/xpath/test_attribute.rb
+++ b/test/xpath/test_attribute.rb
@@ -32,5 +32,18 @@ def test_no_namespace
"nothing" => "")
assert_equal(["child2"], children.collect(&:text))
end
+
+ def test_no_error
+ REXML::XPath.match(@document, '//attribute::*/parent::*')
+ # Some of those are not yet supported in REXML, but at least it shouldn't raise an error
+ REXML::XPath.match(@document, '//attribute::*/preceding::*')
+ REXML::XPath.match(@document, '//attribute::*/following::*')
+ REXML::XPath.match(@document, '//attribute::*/preceding-sibling::*')
+ REXML::XPath.match(@document, '//attribute::*/following-sibling::*')
+ REXML::XPath.match(@document, '//attribute::*/ancestor::*')
+ REXML::XPath.match(@document, '//attribute::*/descendant::*')
+ REXML::XPath.match(@document, '//attribute::*/ancestor-or-self::*')
+ REXML::XPath.match(@document, '//attribute::*/descendant-or-self::*')
+ end
end
end
diff --git a/test/xpath/test_axis_preceding_sibling.rb b/test/xpath/test_axis_preceding_sibling.rb
index 9c44ad63..f4b78ebc 100644
--- a/test/xpath/test_axis_preceding_sibling.rb
+++ b/test/xpath/test_axis_preceding_sibling.rb
@@ -23,7 +23,7 @@ def test_preceding_sibling_axis
assert_equal "6", context.attributes["id"]
prev = XPath.first(context, "preceding-sibling::f")
- assert_equal "5", prev.attributes["id"]
+ assert_equal "3", prev.attributes["id"]
prev = XPath.first(context, "preceding-sibling::f[1]")
assert_equal "5", prev.attributes["id"]
@@ -34,5 +34,73 @@ def test_preceding_sibling_axis
prev = XPath.first(context, "preceding-sibling::f[3]")
assert_equal "3", prev.attributes["id"]
end
+
+ def test_preceding_sibling_position_less_than
+ context = XPath.first(@@doc, "/a/e/f[last()]")
+ assert_equal([], XPath.match(context, "preceding-sibling::f[position() < 1]"))
+ assert_equal(["5"],
+ XPath.match(context, "preceding-sibling::f[position() < 2]").map {|n| n.attributes["id"] })
+ assert_equal(["4", "5"],
+ XPath.match(context, "preceding-sibling::f[position() < 3]").map {|n| n.attributes["id"] })
+ end
+
+ def test_preceding_sibling_position_less_than_or_equal
+ context = XPath.first(@@doc, "/a/e/f[last()]")
+ assert_equal([], XPath.match(context, "preceding-sibling::f[position() <= 0]"))
+ assert_equal(["4", "5"],
+ XPath.match(context, "preceding-sibling::f[position() <= 2]").map {|n| n.attributes["id"] })
+ end
+
+ def test_preceding_sibling_position_greater_than
+ context = XPath.first(@@doc, "/a/e/f[last()]")
+ assert_equal(["3", "4", "5"],
+ XPath.match(context, "preceding-sibling::f[position() > 0]").map {|n| n.attributes["id"] })
+ assert_equal(["3", "4"],
+ XPath.match(context, "preceding-sibling::f[position() > 1]").map {|n| n.attributes["id"] })
+ assert_equal(["3"],
+ XPath.match(context, "preceding-sibling::f[position() > 2]").map {|n| n.attributes["id"] })
+ end
+
+ def test_preceding_sibling_position_greater_than_or_equal
+ context = XPath.first(@@doc, "/a/e/f[last()]")
+ assert_equal(["3", "4", "5"],
+ XPath.match(context, "preceding-sibling::f[position() >= 0]").map {|n| n.attributes["id"] })
+ assert_equal(["3", "4", "5"],
+ XPath.match(context, "preceding-sibling::f[position() >= 1]").map {|n| n.attributes["id"] })
+ assert_equal(["3", "4"],
+ XPath.match(context, "preceding-sibling::f[position() >= 2]").map {|n| n.attributes["id"] })
+ end
+
+ def test_preceding_sibling_multiple_anchors
+ doc = Document.new(<<~XML)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ XML
+
+ assert_equal(%w[2 7 9], XPath.match(doc, "/a/anchor/preceding-sibling::b[position() = 3]").map {|n| n.attributes["id"] })
+ assert_equal(%w[2 3 4 7 8 9 10 11], XPath.match(doc, "/a/anchor/preceding-sibling::b[position() <= 3]").map {|n| n.attributes["id"] })
+ assert_equal(%w[1 2 3 4 5 6 7 8], XPath.match(doc, "/a/anchor/preceding-sibling::b[position() >= 4]").map {|n| n.attributes["id"] })
+ end
end
end
diff --git a/test/xpath/test_base.rb b/test/xpath/test_base.rb
index 1c6eb624..02a4303a 100644
--- a/test/xpath/test_base.rb
+++ b/test/xpath/test_base.rb
@@ -382,32 +382,39 @@ def test_preceding
start = XPath.first( d, "/a/b[@id='1']" )
assert_equal 'b', start.name
c = XPath.first( start, "preceding::c" )
- assert_equal '2', c.attributes['id']
+ assert_equal '0', c.attributes['id']
- c1, c0 = XPath.match( d, "/a/b/c[@id='2']/preceding::node()" )
- assert_equal '1', c1.attributes['id']
+ b0, b2, c0, c1 = XPath.match( d, "/a/b/c[@id='2']/preceding::node()" )
+ assert_equal 'b', b0.name
+ assert_equal 'b', b2.name
+ assert_equal 'c', c0.name
+ assert_equal 'c', c1.name
+
+ assert_equal '0', b0.attributes['id']
+ assert_equal '2', b2.attributes['id']
assert_equal '0', c0.attributes['id']
+ assert_equal '1', c1.attributes['id']
- c2, c1, c0, b, b2, b0 = XPath.match( start, "preceding::node()" )
+ b0, b2, b, c0, c1, c2 = XPath.match( start, "preceding::node()" )
- assert_equal 'c', c2.name
- assert_equal 'c', c1.name
assert_equal 'c', c0.name
- assert_equal 'b', b.name
- assert_equal 'b', b2.name
+ assert_equal 'c', c1.name
+ assert_equal 'c', c2.name
assert_equal 'b', b0.name
+ assert_equal 'b', b2.name
+ assert_equal 'b', b.name
- assert_equal '2', c2.attributes['id']
- assert_equal '1', c1.attributes['id']
assert_equal '0', c0.attributes['id']
- assert b.attributes.empty?
- assert_equal '2', b2.attributes['id']
+ assert_equal '1', c1.attributes['id']
+ assert_equal '2', c2.attributes['id']
assert_equal '0', b0.attributes['id']
+ assert_equal '2', b2.attributes['id']
+ assert b.attributes.empty?
d = REXML::Document.new("")
matches = REXML::XPath.match(d, "/a/d/preceding::node()")
- assert_equal("c", matches[0].name)
- assert_equal("b", matches[1].name)
+ assert_equal("b", matches[0].name)
+ assert_equal("c", matches[1].name)
s = ""
d = REXML::Document.new(s)
@@ -425,7 +432,7 @@ def test_preceding_multiple
XML
doc = REXML::Document.new(source)
matches = REXML::XPath.match(doc, "a/d/preceding::*")
- assert_equal(["d", "c", "b"], matches.map(&:name))
+ assert_equal(["b", "c", "d"], matches.map(&:name))
end
def test_following_multiple
@@ -485,6 +492,14 @@ def test_following_sibling_predicates
assert_equal(["w", "x", "y", "z"], matches.map(&:name))
end
+ def test_following_sibling_position_less_than
+ source = ""
+ doc = REXML::Document.new(source)
+ assert_equal([], REXML::XPath.match(doc, "/r/a/following-sibling::*[position() < 1]"))
+ assert_equal(["b"], REXML::XPath.match(doc, "/r/a/following-sibling::*[position() < 2]").map(&:name))
+ assert_equal(["b", "c"], REXML::XPath.match(doc, "/r/a/following-sibling::*[position() < 3]").map(&:name))
+ end
+
def test_preceding_sibling_across_multiple_nodes
source = <<-XML
@@ -498,7 +513,7 @@ def test_preceding_sibling_across_multiple_nodes
XML
doc = REXML::Document.new(source)
matches = REXML::XPath.match(doc, "a/b/x/preceding-sibling::*")
- assert_equal(["e", "d", "c"], matches.map(&:name))
+ assert_equal(["c", "d", "e"], matches.map(&:name))
end
def test_preceding_sibling_within_single_node
@@ -511,7 +526,7 @@ def test_preceding_sibling_within_single_node
XML
doc = REXML::Document.new(source)
matches = REXML::XPath.match(doc, "a/b/x/preceding-sibling::*")
- assert_equal(["e", "x", "d", "c"], matches.map(&:name))
+ assert_equal(["c", "d", "x", "e"], matches.map(&:name))
end
def test_following
@@ -575,18 +590,45 @@ def test_nested_predicates
matches = XPath.match(doc, '(/div/div/test[3])').map(&:text)
assert_equal [], matches
+ matches = XPath.match(doc, '/div/div/test[1][1]').map(&:text)
+ assert_equal ["ab", "ef", "hi"], matches
matches = XPath.match(doc, '(/div/div/test[1])[1]').map(&:text)
assert_equal ["ab"], matches
+ matches = XPath.match(doc, '/div/div/test[1][2]').map(&:text)
+ assert_equal [], matches
matches = XPath.match(doc, '(/div/div/test[1])[2]').map(&:text)
assert_equal ["ef"], matches
matches = XPath.match(doc, '(/div/div/test[1])[3]').map(&:text)
assert_equal ["hi"], matches
+ matches = XPath.match(doc, '/div/div/test[2][1]').map(&:text)
+ assert_equal ["cd", "gh"], matches
matches = XPath.match(doc, '(/div/div/test[2])[1]').map(&:text)
assert_equal ["cd"], matches
+ matches = XPath.match(doc, '/div/div/test[2][2]').map(&:text)
+ assert_equal [], matches
matches = XPath.match(doc, '(/div/div/test[2])[2]').map(&:text)
assert_equal ["gh"], matches
matches = XPath.match(doc, '(/div/div/test[2])[3]').map(&:text)
assert_equal [], matches
+ matches = XPath.match(doc, '//div[1]/test|//div[2]/test[2]').map(&:text)
+ assert_equal ["ab", "cd", "gh"], matches
+ matches = XPath.match(doc, '(//div[1]/test|//div[2]/test)[2]').map(&:text)
+ assert_equal ["cd"], matches
+
+ xpath = '/div/div/test/preceding::*'
+ without_parentheses = XPath.match(doc, xpath).map(&:text)
+ with_parentheses = XPath.match(doc, "(#{xpath})").map(&:text)
+ assert_equal without_parentheses, with_parentheses
+
+ xpath = '/div/div/test/preceding-sibling::*'
+ without_parentheses = XPath.match(doc, xpath).map(&:text)
+ with_parentheses = XPath.match(doc, "(#{xpath})").map(&:text)
+ assert_equal without_parentheses, with_parentheses
+
+ xpath = '/div/div/test/ancestor::*'
+ without_parentheses = XPath.match(doc, xpath).map(&:text)
+ with_parentheses = XPath.match(doc, "(#{xpath})").map(&:text)
+ assert_equal without_parentheses, with_parentheses
end
# Contributed by Mike Stok
@@ -856,6 +898,30 @@ def test_ordering
r.collect {|element| element.attribute("id").value})
end
+ def test_order_consistency
+ doc = REXML::Document.new('')
+ assert_equal('a', REXML::XPath.first(doc, '//d/ancestor::*').name)
+ assert_equal('a', REXML::XPath.first(doc, '//d[@id="2"]/ancestor::*').name)
+ assert_equal(%w[a b c d], REXML::XPath.match(doc, '//d/ancestor::*').map(&:name))
+ assert_equal(%w[a b c d], REXML::XPath.match(doc, '//d[@id="2"]/ancestor::*').map(&:name))
+
+ assert_equal('a', REXML::XPath.first(doc, '//d/ancestor-or-self::*').name)
+ assert_equal('a', REXML::XPath.first(doc, '//d[@id="2"]/ancestor-or-self::*').name)
+ assert_equal(%w[a b c d d], REXML::XPath.match(doc, '//d/ancestor-or-self::*').map(&:name))
+ assert_equal(%w[a b c d d], REXML::XPath.match(doc, '//d[@id="2"]/ancestor-or-self::*').map(&:name))
+
+ doc = REXML::Document.new('')
+ assert_equal('b', REXML::XPath.first(doc, '//d/preceding::*').name)
+ assert_equal('b', REXML::XPath.first(doc, '//d[@id="2"]/preceding::*').name)
+ assert_equal(%w[b c d], REXML::XPath.match(doc, '//d/preceding::*').map(&:name))
+ assert_equal(%w[b c d], REXML::XPath.match(doc, '//d[@id="2"]/preceding::*').map(&:name))
+
+ assert_equal('b', REXML::XPath.first(doc, '//d/preceding-sibling::*').name)
+ assert_equal('b', REXML::XPath.first(doc, '//d[@id="2"]/preceding-sibling::*').name)
+ assert_equal(%w[b c d], REXML::XPath.match(doc, '//d/preceding-sibling::*').map(&:name))
+ assert_equal(%w[b c d], REXML::XPath.match(doc, '//d[@id="2"]/preceding-sibling::*').map(&:name))
+ end
+
def test_descendant_or_self_ordering
source = "
diff --git a/test/xpath/test_predicate.rb b/test/xpath/test_predicate.rb
index 278e3765..f25bd758 100644
--- a/test/xpath/test_predicate.rb
+++ b/test/xpath/test_predicate.rb
@@ -59,12 +59,50 @@ def test_predicates_multi
assert_equal( "1", m[0].attributes["id"] )
end
+ def test_predicate_multi_position
+ xml = <<~XML
+
+
+
+
+ XML
+ doc = REXML::Document.new(xml)
+
+ result = REXML::XPath.match(doc, "/a/b/c[position()>1]")
+ assert_equal(%w[2 3 4 5 7 8 9 10], result.map { |node| node.attributes["id"] })
+
+ result = REXML::XPath.match(doc, "/a/b/c[position()>1][position()>1]")
+ assert_equal(%w[3 4 5 8 9 10], result.map { |node| node.attributes["id"] })
+
+ result = REXML::XPath.match(doc, "/a/b/c[position()>1][position()>1][@id!='3']")
+ assert_equal(%w[4 5 8 9 10], result.map { |node| node.attributes["id"] })
+
+ result = REXML::XPath.match(doc, "/a/b/c[position()>1][position()>1][@id!='3'][position()!=2]")
+ assert_equal(%w[4 8 10], result.map { |node| node.attributes["id"] })
+ end
+
def do_path( path )
m = REXML::XPath.match( @doc, path )
#puts path, @parser.parse( path ).inspect
return m
end
+ def test_predicate_float_literal
+ doc = REXML::Document.new("")
+ # [N.0] is equivalent to [position() = N.0] = [position() = N]
+ assert_equal(["a"], REXML::XPath.match(doc, "/r/*[1.0]").map(&:name))
+ assert_equal(["b"], REXML::XPath.match(doc, "/r/*[2.0]").map(&:name))
+ # Non-integer numeric literals match no node.
+ assert_equal([], REXML::XPath.match(doc, "/r/*[1.5]"))
+ end
+
+ def test_predicate_variable_as_position
+ doc = REXML::Document.new("")
+ parser = REXML::XPathParser.new
+ parser["x"] = 2
+ assert_equal(["b"], parser.parse("/r/*[$x]", doc).map(&:name))
+ end
+
def test_get_no_siblings_terminal_nodes
source = <<-XML