@@ -149,8 +149,7 @@ impl<'s> ScriptSource<'s> {
149
149
let mut rest = source. content ;
150
150
151
151
// Whitespace may precede a frontmatter but must end with a newline
152
- const WHITESPACE : [ char ; 4 ] = [ ' ' , '\t' , '\r' , '\n' ] ;
153
- let trimmed = rest. trim_start_matches ( WHITESPACE ) ;
152
+ let trimmed = rest. trim_start_matches ( is_whitespace) ;
154
153
if trimmed. len ( ) != rest. len ( ) {
155
154
let trimmed_len = rest. len ( ) - trimmed. len ( ) ;
156
155
let last_trimmed_index = trimmed_len - 1 ;
@@ -184,7 +183,7 @@ impl<'s> ScriptSource<'s> {
184
183
anyhow:: bail!( "no closing `{fence_pattern}` found for frontmatter" ) ;
185
184
} ;
186
185
let ( info, rest) = rest. split_at ( info_end_index) ;
187
- let info = info. trim_matches ( WHITESPACE ) ;
186
+ let info = info. trim_matches ( is_whitespace ) ;
188
187
if !info. is_empty ( ) {
189
188
source. info = Some ( info) ;
190
189
}
@@ -202,7 +201,7 @@ impl<'s> ScriptSource<'s> {
202
201
let rest = & rest[ frontmatter_nl + nl_fence_pattern. len ( ) ..] ;
203
202
204
203
let ( after_closing_fence, rest) = rest. split_once ( "\n " ) . unwrap_or ( ( rest, "" ) ) ;
205
- let after_closing_fence = after_closing_fence. trim_matches ( WHITESPACE ) ;
204
+ let after_closing_fence = after_closing_fence. trim_matches ( is_whitespace ) ;
206
205
if !after_closing_fence. is_empty ( ) {
207
206
// extra characters beyond the original fence pattern, even if they are extra `-`
208
207
anyhow:: bail!( "trailing characters found after frontmatter close" ) ;
@@ -256,6 +255,40 @@ fn strip_shebang(input: &str) -> Option<usize> {
256
255
None
257
256
}
258
257
258
+ /// True if `c` is considered a whitespace according to Rust language definition.
259
+ /// See [Rust language reference](https://doc.rust-lang.org/reference/whitespace.html)
260
+ /// for definitions of these classes.
261
+ ///
262
+ /// See rust-lang/rust's compiler/rustc_lexer/src/lib.rs `is_whitespace`
263
+ fn is_whitespace ( c : char ) -> bool {
264
+ // This is Pattern_White_Space.
265
+ //
266
+ // Note that this set is stable (ie, it doesn't change with different
267
+ // Unicode versions), so it's ok to just hard-code the values.
268
+
269
+ matches ! (
270
+ c,
271
+ // Usual ASCII suspects
272
+ '\u{0009}' // \t
273
+ | '\u{000A}' // \n
274
+ | '\u{000B}' // vertical tab
275
+ | '\u{000C}' // form feed
276
+ | '\u{000D}' // \r
277
+ | '\u{0020}' // space
278
+
279
+ // NEXT LINE from latin1
280
+ | '\u{0085}'
281
+
282
+ // Bidi markers
283
+ | '\u{200E}' // LEFT-TO-RIGHT MARK
284
+ | '\u{200F}' // RIGHT-TO-LEFT MARK
285
+
286
+ // Dedicated whitespace characters from Unicode
287
+ | '\u{2028}' // LINE SEPARATOR
288
+ | '\u{2029}' // PARAGRAPH SEPARATOR
289
+ )
290
+ }
291
+
259
292
#[ cfg( test) ]
260
293
mod test_expand {
261
294
use snapbox:: assert_data_eq;
0 commit comments