Skip to content

Commit 7a73174

Browse files
committed
fix(embedded): Match rustc's whitespace definition
1 parent 3fce417 commit 7a73174

File tree

1 file changed

+37
-4
lines changed

1 file changed

+37
-4
lines changed

src/cargo/util/toml/embedded.rs

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -149,8 +149,7 @@ impl<'s> ScriptSource<'s> {
149149
let mut rest = source.content;
150150

151151
// Whitespace may precede a frontmatter but must end with a newline
152-
const WHITESPACE: [char; 4] = [' ', '\t', '\r', '\n'];
153-
let trimmed = rest.trim_start_matches(WHITESPACE);
152+
let trimmed = rest.trim_start_matches(is_whitespace);
154153
if trimmed.len() != rest.len() {
155154
let trimmed_len = rest.len() - trimmed.len();
156155
let last_trimmed_index = trimmed_len - 1;
@@ -184,7 +183,7 @@ impl<'s> ScriptSource<'s> {
184183
anyhow::bail!("no closing `{fence_pattern}` found for frontmatter");
185184
};
186185
let (info, rest) = rest.split_at(info_end_index);
187-
let info = info.trim_matches(WHITESPACE);
186+
let info = info.trim_matches(is_whitespace);
188187
if !info.is_empty() {
189188
source.info = Some(info);
190189
}
@@ -202,7 +201,7 @@ impl<'s> ScriptSource<'s> {
202201
let rest = &rest[frontmatter_nl + nl_fence_pattern.len()..];
203202

204203
let (after_closing_fence, rest) = rest.split_once("\n").unwrap_or((rest, ""));
205-
let after_closing_fence = after_closing_fence.trim_matches(WHITESPACE);
204+
let after_closing_fence = after_closing_fence.trim_matches(is_whitespace);
206205
if !after_closing_fence.is_empty() {
207206
// extra characters beyond the original fence pattern, even if they are extra `-`
208207
anyhow::bail!("trailing characters found after frontmatter close");
@@ -256,6 +255,40 @@ fn strip_shebang(input: &str) -> Option<usize> {
256255
None
257256
}
258257

258+
/// True if `c` is considered a whitespace according to Rust language definition.
259+
/// See [Rust language reference](https://doc.rust-lang.org/reference/whitespace.html)
260+
/// for definitions of these classes.
261+
///
262+
/// See rust-lang/rust's compiler/rustc_lexer/src/lib.rs `is_whitespace`
263+
fn is_whitespace(c: char) -> bool {
264+
// This is Pattern_White_Space.
265+
//
266+
// Note that this set is stable (ie, it doesn't change with different
267+
// Unicode versions), so it's ok to just hard-code the values.
268+
269+
matches!(
270+
c,
271+
// Usual ASCII suspects
272+
'\u{0009}' // \t
273+
| '\u{000A}' // \n
274+
| '\u{000B}' // vertical tab
275+
| '\u{000C}' // form feed
276+
| '\u{000D}' // \r
277+
| '\u{0020}' // space
278+
279+
// NEXT LINE from latin1
280+
| '\u{0085}'
281+
282+
// Bidi markers
283+
| '\u{200E}' // LEFT-TO-RIGHT MARK
284+
| '\u{200F}' // RIGHT-TO-LEFT MARK
285+
286+
// Dedicated whitespace characters from Unicode
287+
| '\u{2028}' // LINE SEPARATOR
288+
| '\u{2029}' // PARAGRAPH SEPARATOR
289+
)
290+
}
291+
259292
#[cfg(test)]
260293
mod test_expand {
261294
use snapbox::assert_data_eq;

0 commit comments

Comments
 (0)