@@ -30,15 +30,10 @@ const fn is_tab(c: char) -> bool {
30
30
)
31
31
}
32
32
33
- const fn is_newline ( c : char ) -> bool {
33
+ const fn is_line_ending ( c : char ) -> bool {
34
34
matches ! (
35
- c, '\n' // newline
36
- )
37
- }
38
-
39
- const fn is_carriage_return ( c : char ) -> bool {
40
- matches ! (
41
- c, '\r' // carriage return
35
+ c,
36
+ '\n' | '\r' // newline or carriage return
42
37
)
43
38
}
44
39
@@ -81,15 +76,7 @@ impl Cursor<'_> {
81
76
TokenKind :: Tab
82
77
}
83
78
84
- c if is_newline ( c) => {
85
- self . eat_while ( is_newline) ;
86
- TokenKind :: Newline
87
- }
88
-
89
- c if is_carriage_return ( c) => {
90
- self . eat_while ( is_carriage_return) ;
91
- TokenKind :: CarriageReturn
92
- }
79
+ c if is_line_ending ( c) => self . line_ending_sequence ( c) ,
93
80
94
81
c if is_vertical_tab ( c) => {
95
82
self . eat_while ( is_vertical_tab) ;
@@ -254,6 +241,43 @@ impl Cursor<'_> {
254
241
}
255
242
}
256
243
244
+ // invariant: we care about the number of consecutive newlines so we count them.
245
+ //
246
+ // Postgres considers a DOS-style \r\n sequence as two successive newlines, but we care about
247
+ // logical line breaks and consider \r\n as one logical line break
248
+ fn line_ending_sequence ( & mut self , prev : char ) -> TokenKind {
249
+ // already consumed first line ending character (\n or \r)
250
+ let mut line_breaks = 1 ;
251
+
252
+ // started with \r, check if it's part of \r\n
253
+ if prev == '\r' && self . first ( ) == '\n' {
254
+ // consume the \n - \r\n still counts as 1 logical line break
255
+ self . bump ( ) ;
256
+ }
257
+
258
+ // continue checking for more line endings
259
+ loop {
260
+ match self . first ( ) {
261
+ '\r' if self . second ( ) == '\n' => {
262
+ self . bump ( ) ; // consume \r
263
+ self . bump ( ) ; // consume \n
264
+ line_breaks += 1 ;
265
+ }
266
+ '\n' => {
267
+ self . bump ( ) ;
268
+ line_breaks += 1 ;
269
+ }
270
+ '\r' => {
271
+ self . bump ( ) ;
272
+ line_breaks += 1 ;
273
+ }
274
+ _ => break ,
275
+ }
276
+ }
277
+
278
+ TokenKind :: LineEnding { count : line_breaks }
279
+ }
280
+
257
281
fn prefixed_string (
258
282
& mut self ,
259
283
mk_kind : fn ( bool ) -> LiteralKind ,
0 commit comments