From 193c2311d9acb966f487f202c1997a836903dcc1 Mon Sep 17 00:00:00 2001 From: DCjanus Date: Sun, 27 Apr 2025 23:34:40 +0800 Subject: [PATCH 1/5] test: add test for ignoring comments at end of file This test reproduces issue #363, ensuring that comments at the end of the input data are properly ignored by the CSV reader. --- src/reader.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/reader.rs b/src/reader.rs index 5b35d95..116008f 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -2636,4 +2636,22 @@ mod tests { assert_eq!(rdr.headers().unwrap().len(), 0); assert_eq!(rdr.records().count(), 0); } + + #[test] + fn comment_at_end_of_file_should_be_ignored() { + // Reproduce https://github.com/BurntSushi/rust-csv/issues/363 + // Test data: the last line is a comment without a trailing newline + let data = b"foo,bar,baz\na,b,c\nd,e,f\n# this is a comment"; + let mut rdr = + ReaderBuilder::new().comment(Some(b'#')).from_reader(&data[..]); + let mut rec = StringRecord::new(); + // First record + assert!(rdr.read_record(&mut rec).unwrap()); + assert_eq!(rec, vec!["a", "b", "c"]); + // Second record + assert!(rdr.read_record(&mut rec).unwrap()); + assert_eq!(rec, vec!["d", "e", "f"]); + // The comment line at EOF should be ignored, no more records + assert!(!rdr.read_record(&mut rec).unwrap()); + } } From 7b2be38c1855ccc6160090a0e2a932d6bc120862 Mon Sep 17 00:00:00 2001 From: DCjanus Date: Sun, 27 Apr 2025 23:48:31 +0800 Subject: [PATCH 2/5] test: update comment handling test to ignore EOF comments Refines the test for ignoring comments at the end of the input data by removing the second record assertion, ensuring that only relevant records are processed. This addresses the behavior outlined in issue #363. --- src/reader.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/reader.rs b/src/reader.rs index 116008f..eaf9c76 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -2641,16 +2641,13 @@ mod tests { fn comment_at_end_of_file_should_be_ignored() { // Reproduce https://github.com/BurntSushi/rust-csv/issues/363 // Test data: the last line is a comment without a trailing newline - let data = b"foo,bar,baz\na,b,c\nd,e,f\n# this is a comment"; + let data = b"foo,bar,baz\na,b,c\n# this is a comment"; let mut rdr = ReaderBuilder::new().comment(Some(b'#')).from_reader(&data[..]); let mut rec = StringRecord::new(); // First record assert!(rdr.read_record(&mut rec).unwrap()); assert_eq!(rec, vec!["a", "b", "c"]); - // Second record - assert!(rdr.read_record(&mut rec).unwrap()); - assert_eq!(rec, vec!["d", "e", "f"]); // The comment line at EOF should be ignored, no more records assert!(!rdr.read_record(&mut rec).unwrap()); } From f2cba427d7bf2facaa1b2ed59cc56745227d8c9c Mon Sep 17 00:00:00 2001 From: DCjanus Date: Mon, 28 Apr 2025 03:56:04 +0800 Subject: [PATCH 3/5] fix: skip last line in comment https://github.com/BurntSushi/rust-csv/issues/363 --- csv-core/src/reader.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/csv-core/src/reader.rs b/csv-core/src/reader.rs index fbb8894..d498db9 100644 --- a/csv-core/src/reader.rs +++ b/csv-core/src/reader.rs @@ -752,7 +752,7 @@ impl Reader { // parsing a new record, then we should sink into the final state // and never move from there. (pro-tip: the start state doubles as // the final state!) - if state >= self.dfa.final_record || state.is_start() { + if state >= self.dfa.final_record || state.is_start() || state == self.dfa.in_comment { self.dfa.new_state_final_end() } else { self.dfa.new_state_final_record() @@ -1116,6 +1116,8 @@ struct Dfa { in_field: DfaState, /// The DFA state corresponding to being inside an quoted field. in_quoted: DfaState, + /// The DFA state corresponding to being inside a comment. + in_comment: DfaState, /// The minimum DFA state that indicates a field has been parsed. All DFA /// states greater than this are also final-field states. final_field: DfaState, @@ -1132,6 +1134,7 @@ impl Dfa { classes: DfaClasses::new(), in_field: DfaState(0), in_quoted: DfaState(0), + in_comment: DfaState(0), final_field: DfaState(0), final_record: DfaState(0), } @@ -1167,6 +1170,7 @@ impl Dfa { fn finish(&mut self) { self.in_field = self.new_state(NfaState::InField); self.in_quoted = self.new_state(NfaState::InQuotedField); + self.in_comment = self.new_state(NfaState::InComment); self.final_field = self.new_state(NfaState::EndFieldDelim); self.final_record = self.new_state(NfaState::EndRecord); } From 095e842cfd6fc96bfdbe07789caf6fc9c5e6a6a3 Mon Sep 17 00:00:00 2001 From: DCjanus Date: Mon, 28 Apr 2025 04:04:18 +0800 Subject: [PATCH 4/5] fix: improve comment handling in CSV reader Refines the logic to ensure comments at the end of the file are ignored, addressing issue #363. Additionally, updates the test to validate this behavior. --- csv-core/src/reader.rs | 14 +++++++++++++- src/reader.rs | 15 --------------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/csv-core/src/reader.rs b/csv-core/src/reader.rs index d498db9..e7e8991 100644 --- a/csv-core/src/reader.rs +++ b/csv-core/src/reader.rs @@ -752,7 +752,10 @@ impl Reader { // parsing a new record, then we should sink into the final state // and never move from there. (pro-tip: the start state doubles as // the final state!) - if state >= self.dfa.final_record || state.is_start() || state == self.dfa.in_comment { + if state >= self.dfa.final_record + || state.is_start() + || state == self.dfa.in_comment + { self.dfa.new_state_final_end() } else { self.dfa.new_state_final_record() @@ -1726,6 +1729,15 @@ mod tests { b.comment(Some(b'#')); } ); + // ref: https://github.com/BurntSushi/rust-csv/issues/363 + parses_to!( + comment_at_end_of_file_should_be_ignored, + "foo,bar,baz\n# this is a comment in last line", + csv![["foo", "bar", "baz"]], + |b: &mut ReaderBuilder| { + b.comment(Some(b'#')); + } + ); macro_rules! assert_read { ( diff --git a/src/reader.rs b/src/reader.rs index eaf9c76..5b35d95 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -2636,19 +2636,4 @@ mod tests { assert_eq!(rdr.headers().unwrap().len(), 0); assert_eq!(rdr.records().count(), 0); } - - #[test] - fn comment_at_end_of_file_should_be_ignored() { - // Reproduce https://github.com/BurntSushi/rust-csv/issues/363 - // Test data: the last line is a comment without a trailing newline - let data = b"foo,bar,baz\na,b,c\n# this is a comment"; - let mut rdr = - ReaderBuilder::new().comment(Some(b'#')).from_reader(&data[..]); - let mut rec = StringRecord::new(); - // First record - assert!(rdr.read_record(&mut rec).unwrap()); - assert_eq!(rec, vec!["a", "b", "c"]); - // The comment line at EOF should be ignored, no more records - assert!(!rdr.read_record(&mut rec).unwrap()); - } } From 32d3649ea7019faf67f6c2799e760ca951cf4b50 Mon Sep 17 00:00:00 2001 From: DCjanus Date: Mon, 28 Apr 2025 04:06:33 +0800 Subject: [PATCH 5/5] test: add additional test for ignoring comments at end of file Introduces a new test case to validate that comments at the end of the file are properly ignored, enhancing the coverage of the existing comment handling logic. --- csv-core/src/reader.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/csv-core/src/reader.rs b/csv-core/src/reader.rs index e7e8991..6adef8f 100644 --- a/csv-core/src/reader.rs +++ b/csv-core/src/reader.rs @@ -1731,13 +1731,21 @@ mod tests { ); // ref: https://github.com/BurntSushi/rust-csv/issues/363 parses_to!( - comment_at_end_of_file_should_be_ignored, + comment_at_end_of_file_should_be_ignored1, "foo,bar,baz\n# this is a comment in last line", csv![["foo", "bar", "baz"]], |b: &mut ReaderBuilder| { b.comment(Some(b'#')); } ); + parses_to!( + comment_at_end_of_file_should_be_ignored2, + "foo,bar,baz\n# this is a comment in last line\n", + csv![["foo", "bar", "baz"]], + |b: &mut ReaderBuilder| { + b.comment(Some(b'#')); + } + ); macro_rules! assert_read { (