diff --git a/Gemfile b/Gemfile index cf2982acd..d35624b74 100644 --- a/Gemfile +++ b/Gemfile @@ -48,6 +48,7 @@ gem "net-smtp" gem 'csv' gem 'ostruct' gem 'pstore' +gem "timeout" group :minitest do gem "minitest" diff --git a/Gemfile.lock b/Gemfile.lock index ada025774..0a1f7d0a4 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -231,6 +231,7 @@ DEPENDENCIES steep! tempfile test-unit + timeout BUNDLED WITH 4.0.1 diff --git a/src/lexstate.c b/src/lexstate.c index d65671e5f..f3d35b435 100644 --- a/src/lexstate.c +++ b/src/lexstate.c @@ -134,7 +134,11 @@ bool rbs_next_char(rbs_lexer_t *lexer, unsigned int *codepoint, size_t *byte_len *byte_len = lexer->encoding->char_width((const uint8_t *) start, (ptrdiff_t) (lexer->string.end - start)); - if (*byte_len == 1) { + if (*byte_len == 0) { + // Avoid infinite loop on invalid bytes. + *byte_len = 1; + *codepoint = (unsigned int) (unsigned char) *start; + } else if (*byte_len == 1) { *codepoint = (unsigned int) *start; } else { *codepoint = 12523; // Dummy data for "ル" from "ルビー" (Ruby) in Unicode diff --git a/test/rbs/parser_test.rb b/test/rbs/parser_test.rb index ea160cc9d..fa5b26e46 100644 --- a/test/rbs/parser_test.rb +++ b/test/rbs/parser_test.rb @@ -1,4 +1,5 @@ require "test_helper" +require "timeout" class RBS::ParserTest < Test::Unit::TestCase def buffer(source) @@ -1028,4 +1029,22 @@ class Foo[T < Integer] < Bar # Comment assert_equal [:tTRIVIA, "\n", 56...57], tokens.shift.then { |t| [t[0], t[1].source, t[1].range] } assert_equal [:pEOF, '', 57...57], tokens.shift.then { |t| [t[0], t[1].source, t[1].range] } end + + def test_invalid_utf8_byte_in_comment_does_not_hang + # Regression: invalid UTF-8 byte in a comment used to loop forever in the lexer. + source = "# \xC2".dup.force_encoding(Encoding::UTF_8) + Timeout.timeout(5) do + RBS::Parser._parse_signature(buffer(source), 0, source.bytesize) + end + end + + def test_invalid_utf8_byte_at_top_level_raises + # Regression: invalid UTF-8 byte at top level used to trip RBS_ASSERT in the C extension. + source = "\xFF".dup.force_encoding(Encoding::UTF_8) + Timeout.timeout(5) do + assert_raises(RBS::ParsingError) do + RBS::Parser._parse_signature(buffer(source), 0, source.bytesize) + end + end + end end