From dffb3bb699ab451c630b18824a9d103068315ea1 Mon Sep 17 00:00:00 2001 From: ydah Date: Thu, 18 Jun 2026 22:18:08 +0900 Subject: [PATCH 1/5] Optimize lookahead set assignment --- lib/lrama/states.rb | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/lib/lrama/states.rb b/lib/lrama/states.rb index ddce627df..63ba9a3e3 100644 --- a/lib/lrama/states.rb +++ b/lib/lrama/states.rb @@ -467,12 +467,16 @@ def compute_follow_sets # @rbs () -> void def compute_la + rules_by_id = rules.to_h { |rule| [rule.id, rule] } + @states.each do |state| lookback_relation_on_state = @lookback_relation[state.id] next unless lookback_relation_on_state - rules.each do |rule| - ary = lookback_relation_on_state[rule.id] - next unless ary + + needs_look_ahead = !(state.reduces.count == 1 && state.term_transitions.count == 0) + + lookback_relation_on_state.each do |rule_id, ary| + look_ahead = 0 ary.each do |goto| # q = state, A -> ω = rule, p = state2, A = nterm @@ -480,18 +484,21 @@ def compute_la next if follows == 0 - @la[state.id] ||= {} - @la[state.id][rule.id] ||= 0 - look_ahead = @la[state.id][rule.id] | follows - @la[state.id][rule.id] |= look_ahead + look_ahead |= follows + end - # No risk of conflict when - # * the state only has single reduce - # * the state only has nterm_transitions (GOTO) - next if state.reduces.count == 1 && state.term_transitions.count == 0 + next if look_ahead == 0 - state.set_look_ahead(rule, bitmap_to_terms(look_ahead)) - end + @la[state.id] ||= {} + @la[state.id][rule_id] ||= 0 + @la[state.id][rule_id] |= look_ahead + + # No risk of conflict when + # * the state only has single reduce + # * the state only has nterm_transitions (GOTO) + next unless needs_look_ahead + + state.set_look_ahead(rules_by_id[rule_id], bitmap_to_terms(@la[state.id][rule_id])) end end end From fb7c91cf6c24f2e6e1a38fb90e8f6f8b4824270f Mon Sep 17 00:00:00 2001 From: ydah Date: Thu, 18 Jun 2026 22:21:04 +0900 Subject: [PATCH 2/5] Cache state transitions by symbol number --- lib/lrama/state.rb | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/lib/lrama/state.rb b/lib/lrama/state.rb index 50912e094..8518c5c78 100644 --- a/lib/lrama/state.rb +++ b/lib/lrama/state.rb @@ -31,6 +31,7 @@ class State # @nterm_transitions: Array[Action::Goto] # @term_transitions: Array[Action::Shift] # @transitions: Array[transition] + # @transitions_by_symbol_number: Hash[Integer, transition] # @internal_dependencies: Hash[Action::Goto, Array[Action::Goto]] # @successor_dependencies: Hash[Action::Goto, Array[Action::Goto]] @@ -177,6 +178,13 @@ def transitions end end + # @rbs () -> Hash[Integer, transition] + def transitions_by_symbol_number + @transitions_by_symbol_number ||= transitions.to_h do |transition| + [transition.next_sym.number, transition] + end + end + # @rbs (transition transition, State next_state) -> void def update_transition(transition, next_state) set_items_to_state(transition.to_items, next_state) @@ -197,6 +205,7 @@ def update_transitions_caches(transition) @transitions << new_transition @nterm_transitions = nil @term_transitions = nil + @transitions_by_symbol_number = nil @follow_kernel_items[new_transition] = @follow_kernel_items.delete(transition) @always_follows[new_transition] = @always_follows.delete(transition) @@ -213,13 +222,7 @@ def selected_term_transitions # # @rbs (Grammar::Symbol sym) -> State def transition(sym) - result = nil - - if sym.term? - result = term_transitions.find {|shift| shift.next_sym == sym }.to_state - else - result = nterm_transitions.find {|goto| goto.next_sym == sym }.to_state - end + result = transitions_by_symbol_number[sym.number]&.to_state raise "Can not transit by #{sym} #{self}" if result.nil? From 58305770bface1a8ef568683dacb688f6eb0854e Mon Sep 17 00:00:00 2001 From: ydah Date: Thu, 18 Jun 2026 22:25:01 +0900 Subject: [PATCH 3/5] Reuse lexer token patterns --- lib/lrama/lexer.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/lrama/lexer.rb b/lib/lrama/lexer.rb index d9b53810b..7fed19847 100644 --- a/lib/lrama/lexer.rb +++ b/lib/lrama/lexer.rb @@ -62,6 +62,8 @@ class Lexer %categories %start ).freeze #: Array[String] + SYMBOL_PATTERN = Regexp.new(SYMBOLS.join('|')) #: Regexp + PERCENT_TOKEN_PATTERN = Regexp.new(PERCENT_TOKENS.join('|')) #: Regexp # @rbs (GrammarFile grammar_file) -> void def initialize(grammar_file) @@ -119,9 +121,9 @@ def lex_token case when @scanner.eos? return - when @scanner.scan(/#{SYMBOLS.join('|')}/) + when @scanner.scan(SYMBOL_PATTERN) return [@scanner.matched, Lrama::Lexer::Token::Token.new(s_value: @scanner.matched, location: location)] - when @scanner.scan(/#{PERCENT_TOKENS.join('|')}/) + when @scanner.scan(PERCENT_TOKEN_PATTERN) return [@scanner.matched, Lrama::Lexer::Token::Token.new(s_value: @scanner.matched, location: location)] when @scanner.scan(/[\?\+\*]/) return [@scanner.matched, Lrama::Lexer::Token::Token.new(s_value: @scanner.matched, location: location)] From cbb9c1d6778fc4eda6eed8fdd020861db3a334dc Mon Sep 17 00:00:00 2001 From: ydah Date: Thu, 18 Jun 2026 22:27:10 +0900 Subject: [PATCH 4/5] Reuse C code lexer end patterns --- lib/lrama/lexer.rb | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/lib/lrama/lexer.rb b/lib/lrama/lexer.rb index 7fed19847..0c6db9541 100644 --- a/lib/lrama/lexer.rb +++ b/lib/lrama/lexer.rb @@ -155,6 +155,9 @@ def lex_token def lex_c_code nested = 0 code = +'' + end_symbol = @end_symbol + end_symbol_pattern = Regexp.new(end_symbol) + escaped_end_symbol_pattern = Regexp.new(Regexp.escape(end_symbol)) reset_first_position until @scanner.eos? do @@ -163,14 +166,14 @@ def lex_c_code code << @scanner.matched nested += 1 when @scanner.scan(/}/) - if nested == 0 && @end_symbol == '}' + if nested == 0 && end_symbol == '}' @scanner.unscan return [:C_DECLARATION, Lrama::Lexer::Token::UserCode.new(s_value: code, location: location)] else code << @scanner.matched nested -= 1 end - when @scanner.check(/#{@end_symbol}/) + when @scanner.check(end_symbol_pattern) return [:C_DECLARATION, Lrama::Lexer::Token::UserCode.new(s_value: code, location: location)] when @scanner.scan(/\n/) code << @scanner.matched @@ -182,14 +185,14 @@ def lex_c_code code << %Q(#{@scanner.matched}) when @scanner.scan(/[^\"'\{\}\n]+/) code << @scanner.matched - when @scanner.scan(/#{Regexp.escape(@end_symbol)}/) # steep:ignore + when @scanner.scan(escaped_end_symbol_pattern) code << @scanner.matched else code << @scanner.getch end end - if @end_symbol == '\Z' + if end_symbol == '\Z' return [:C_DECLARATION, Lrama::Lexer::Token::UserCode.new(s_value: code, location: location)] end From 9e609547f2a6eaecbfc09b05667b4f3a4aa2f9c8 Mon Sep 17 00:00:00 2001 From: ydah Date: Thu, 18 Jun 2026 22:59:14 +0900 Subject: [PATCH 5/5] Fix CI compatibility for performance optimizations --- lib/lrama/lexer.rb | 2 ++ lib/lrama/state.rb | 8 ++++++-- lib/lrama/states.rb | 5 ++++- sig/generated/lrama/lexer.rbs | 4 ++++ sig/generated/lrama/state.rbs | 5 +++++ 5 files changed, 21 insertions(+), 3 deletions(-) diff --git a/lib/lrama/lexer.rb b/lib/lrama/lexer.rb index 0c6db9541..4a5a8dabd 100644 --- a/lib/lrama/lexer.rb +++ b/lib/lrama/lexer.rb @@ -156,6 +156,8 @@ def lex_c_code nested = 0 code = +'' end_symbol = @end_symbol + raise "end_symbol is not set" unless end_symbol + end_symbol_pattern = Regexp.new(end_symbol) escaped_end_symbol_pattern = Regexp.new(Regexp.escape(end_symbol)) reset_first_position diff --git a/lib/lrama/state.rb b/lib/lrama/state.rb index 8518c5c78..56ac827e5 100644 --- a/lib/lrama/state.rb +++ b/lib/lrama/state.rb @@ -180,8 +180,12 @@ def transitions # @rbs () -> Hash[Integer, transition] def transitions_by_symbol_number - @transitions_by_symbol_number ||= transitions.to_h do |transition| - [transition.next_sym.number, transition] + @transitions_by_symbol_number ||= begin + h = {} + transitions.each do |transition| + h[transition.next_sym.number] = transition + end + h end end diff --git a/lib/lrama/states.rb b/lib/lrama/states.rb index 63ba9a3e3..31217834e 100644 --- a/lib/lrama/states.rb +++ b/lib/lrama/states.rb @@ -467,7 +467,10 @@ def compute_follow_sets # @rbs () -> void def compute_la - rules_by_id = rules.to_h { |rule| [rule.id, rule] } + rules_by_id = {} + rules.each do |rule| + rules_by_id[rule.id] = rule + end @states.each do |state| lookback_relation_on_state = @lookback_relation[state.id] diff --git a/sig/generated/lrama/lexer.rbs b/sig/generated/lrama/lexer.rbs index 232026125..c3d3ad9ed 100644 --- a/sig/generated/lrama/lexer.rbs +++ b/sig/generated/lrama/lexer.rbs @@ -22,6 +22,10 @@ module Lrama PERCENT_TOKENS: Array[String] + SYMBOL_PATTERN: Regexp + + PERCENT_TOKEN_PATTERN: Regexp + # @rbs (GrammarFile grammar_file) -> void def initialize: (GrammarFile grammar_file) -> void diff --git a/sig/generated/lrama/state.rbs b/sig/generated/lrama/state.rbs index 8f585c332..42e65ae32 100644 --- a/sig/generated/lrama/state.rbs +++ b/sig/generated/lrama/state.rbs @@ -32,6 +32,8 @@ module Lrama @transitions: Array[transition] + @transitions_by_symbol_number: Hash[Integer, transition] + @internal_dependencies: Hash[Action::Goto, Array[Action::Goto]] @successor_dependencies: Hash[Action::Goto, Array[Action::Goto]] @@ -112,6 +114,9 @@ module Lrama # @rbs () -> Array[transition] def transitions: () -> Array[transition] + # @rbs () -> Hash[Integer, transition] + def transitions_by_symbol_number: () -> Hash[Integer, transition] + # @rbs (transition transition, State next_state) -> void def update_transition: (transition transition, State next_state) -> void