File tree Expand file tree Collapse file tree 2 files changed +24
-7
lines changed
parser/testdata/01280_unicode_whitespaces_lexer Expand file tree Collapse file tree 2 files changed +24
-7
lines changed Original file line number Diff line number Diff line change @@ -73,12 +73,33 @@ func (l *Lexer) peekChar() rune {
7373}
7474
7575func (l * Lexer ) skipWhitespace () {
76- // Skip whitespace and BOM (byte order mark U+FEFF)
77- for unicode .IsSpace (l .ch ) || l .ch == '\uFEFF' {
76+ // Skip whitespace, BOM, and other Unicode characters that ClickHouse treats as whitespace.
77+ // See: https://github.com/ClickHouse/ClickHouse/blob/master/src/Parsers/Lexer.cpp
78+ for unicode .IsSpace (l .ch ) || isClickHouseWhitespace (l .ch ) {
7879 l .readChar ()
7980 }
8081}
8182
83+ // isClickHouseWhitespace returns true for characters ClickHouse treats as whitespace
84+ // but Go's unicode.IsSpace does not recognize.
85+ func isClickHouseWhitespace (ch rune ) bool {
86+ switch ch {
87+ case '\uFEFF' : // BOM (Byte Order Mark)
88+ return true
89+ case '\u180E' : // MONGOLIAN VOWEL SEPARATOR
90+ return true
91+ case '\u200B' : // ZERO WIDTH SPACE
92+ return true
93+ case '\u200C' : // ZERO WIDTH NON-JOINER
94+ return true
95+ case '\u200D' : // ZERO WIDTH JOINER
96+ return true
97+ case '\u2060' : // WORD JOINER
98+ return true
99+ }
100+ return false
101+ }
102+
82103// NextToken returns the next token from the input.
83104func (l * Lexer ) NextToken () Item {
84105 l .skipWhitespace ()
Original file line number Diff line number Diff line change 1- {
2- "explain_todo" : {
3- "stmt3" : true
4- }
5- }
1+ {}
You can’t perform that action at this time.
0 commit comments