diff --git a/lib/lrama/lexer.rb b/lib/lrama/lexer.rb index d9b53810b..4a5a8dabd 100644 --- a/lib/lrama/lexer.rb +++ b/lib/lrama/lexer.rb @@ -62,6 +62,8 @@ class Lexer %categories %start ).freeze #: Array[String] + SYMBOL_PATTERN = Regexp.new(SYMBOLS.join('|')) #: Regexp + PERCENT_TOKEN_PATTERN = Regexp.new(PERCENT_TOKENS.join('|')) #: Regexp # @rbs (GrammarFile grammar_file) -> void def initialize(grammar_file) @@ -119,9 +121,9 @@ def lex_token case when @scanner.eos? return - when @scanner.scan(/#{SYMBOLS.join('|')}/) + when @scanner.scan(SYMBOL_PATTERN) return [@scanner.matched, Lrama::Lexer::Token::Token.new(s_value: @scanner.matched, location: location)] - when @scanner.scan(/#{PERCENT_TOKENS.join('|')}/) + when @scanner.scan(PERCENT_TOKEN_PATTERN) return [@scanner.matched, Lrama::Lexer::Token::Token.new(s_value: @scanner.matched, location: location)] when @scanner.scan(/[\?\+\*]/) return [@scanner.matched, Lrama::Lexer::Token::Token.new(s_value: @scanner.matched, location: location)] @@ -153,6 +155,11 @@ def lex_token def lex_c_code nested = 0 code = +'' + end_symbol = @end_symbol + raise "end_symbol is not set" unless end_symbol + + end_symbol_pattern = Regexp.new(end_symbol) + escaped_end_symbol_pattern = Regexp.new(Regexp.escape(end_symbol)) reset_first_position until @scanner.eos? do @@ -161,14 +168,14 @@ def lex_c_code code << @scanner.matched nested += 1 when @scanner.scan(/}/) - if nested == 0 && @end_symbol == '}' + if nested == 0 && end_symbol == '}' @scanner.unscan return [:C_DECLARATION, Lrama::Lexer::Token::UserCode.new(s_value: code, location: location)] else code << @scanner.matched nested -= 1 end - when @scanner.check(/#{@end_symbol}/) + when @scanner.check(end_symbol_pattern) return [:C_DECLARATION, Lrama::Lexer::Token::UserCode.new(s_value: code, location: location)] when @scanner.scan(/\n/) code << @scanner.matched @@ -180,14 +187,14 @@ def lex_c_code code << %Q(#{@scanner.matched}) when @scanner.scan(/[^\"'\{\}\n]+/) code << @scanner.matched - when @scanner.scan(/#{Regexp.escape(@end_symbol)}/) # steep:ignore + when @scanner.scan(escaped_end_symbol_pattern) code << @scanner.matched else code << @scanner.getch end end - if @end_symbol == '\Z' + if end_symbol == '\Z' return [:C_DECLARATION, Lrama::Lexer::Token::UserCode.new(s_value: code, location: location)] end diff --git a/lib/lrama/state.rb b/lib/lrama/state.rb index 50912e094..56ac827e5 100644 --- a/lib/lrama/state.rb +++ b/lib/lrama/state.rb @@ -31,6 +31,7 @@ class State # @nterm_transitions: Array[Action::Goto] # @term_transitions: Array[Action::Shift] # @transitions: Array[transition] + # @transitions_by_symbol_number: Hash[Integer, transition] # @internal_dependencies: Hash[Action::Goto, Array[Action::Goto]] # @successor_dependencies: Hash[Action::Goto, Array[Action::Goto]] @@ -177,6 +178,17 @@ def transitions end end + # @rbs () -> Hash[Integer, transition] + def transitions_by_symbol_number + @transitions_by_symbol_number ||= begin + h = {} + transitions.each do |transition| + h[transition.next_sym.number] = transition + end + h + end + end + # @rbs (transition transition, State next_state) -> void def update_transition(transition, next_state) set_items_to_state(transition.to_items, next_state) @@ -197,6 +209,7 @@ def update_transitions_caches(transition) @transitions << new_transition @nterm_transitions = nil @term_transitions = nil + @transitions_by_symbol_number = nil @follow_kernel_items[new_transition] = @follow_kernel_items.delete(transition) @always_follows[new_transition] = @always_follows.delete(transition) @@ -213,13 +226,7 @@ def selected_term_transitions # # @rbs (Grammar::Symbol sym) -> State def transition(sym) - result = nil - - if sym.term? - result = term_transitions.find {|shift| shift.next_sym == sym }.to_state - else - result = nterm_transitions.find {|goto| goto.next_sym == sym }.to_state - end + result = transitions_by_symbol_number[sym.number]&.to_state raise "Can not transit by #{sym} #{self}" if result.nil? diff --git a/lib/lrama/states.rb b/lib/lrama/states.rb index ddce627df..31217834e 100644 --- a/lib/lrama/states.rb +++ b/lib/lrama/states.rb @@ -467,12 +467,19 @@ def compute_follow_sets # @rbs () -> void def compute_la + rules_by_id = {} + rules.each do |rule| + rules_by_id[rule.id] = rule + end + @states.each do |state| lookback_relation_on_state = @lookback_relation[state.id] next unless lookback_relation_on_state - rules.each do |rule| - ary = lookback_relation_on_state[rule.id] - next unless ary + + needs_look_ahead = !(state.reduces.count == 1 && state.term_transitions.count == 0) + + lookback_relation_on_state.each do |rule_id, ary| + look_ahead = 0 ary.each do |goto| # q = state, A -> ω = rule, p = state2, A = nterm @@ -480,18 +487,21 @@ def compute_la next if follows == 0 - @la[state.id] ||= {} - @la[state.id][rule.id] ||= 0 - look_ahead = @la[state.id][rule.id] | follows - @la[state.id][rule.id] |= look_ahead + look_ahead |= follows + end - # No risk of conflict when - # * the state only has single reduce - # * the state only has nterm_transitions (GOTO) - next if state.reduces.count == 1 && state.term_transitions.count == 0 + next if look_ahead == 0 - state.set_look_ahead(rule, bitmap_to_terms(look_ahead)) - end + @la[state.id] ||= {} + @la[state.id][rule_id] ||= 0 + @la[state.id][rule_id] |= look_ahead + + # No risk of conflict when + # * the state only has single reduce + # * the state only has nterm_transitions (GOTO) + next unless needs_look_ahead + + state.set_look_ahead(rules_by_id[rule_id], bitmap_to_terms(@la[state.id][rule_id])) end end end diff --git a/sig/generated/lrama/lexer.rbs b/sig/generated/lrama/lexer.rbs index 232026125..c3d3ad9ed 100644 --- a/sig/generated/lrama/lexer.rbs +++ b/sig/generated/lrama/lexer.rbs @@ -22,6 +22,10 @@ module Lrama PERCENT_TOKENS: Array[String] + SYMBOL_PATTERN: Regexp + + PERCENT_TOKEN_PATTERN: Regexp + # @rbs (GrammarFile grammar_file) -> void def initialize: (GrammarFile grammar_file) -> void diff --git a/sig/generated/lrama/state.rbs b/sig/generated/lrama/state.rbs index 8f585c332..42e65ae32 100644 --- a/sig/generated/lrama/state.rbs +++ b/sig/generated/lrama/state.rbs @@ -32,6 +32,8 @@ module Lrama @transitions: Array[transition] + @transitions_by_symbol_number: Hash[Integer, transition] + @internal_dependencies: Hash[Action::Goto, Array[Action::Goto]] @successor_dependencies: Hash[Action::Goto, Array[Action::Goto]] @@ -112,6 +114,9 @@ module Lrama # @rbs () -> Array[transition] def transitions: () -> Array[transition] + # @rbs () -> Hash[Integer, transition] + def transitions_by_symbol_number: () -> Hash[Integer, transition] + # @rbs (transition transition, State next_state) -> void def update_transition: (transition transition, State next_state) -> void