cloudflare · orium · Jun 20, 2025 · Jan 1, 2025 · Jun 12, 2025 · Jan 1, 2025
diff --git a/src/rewritable_units/mod.rs b/src/rewritable_units/mod.rs
@@ -143,7 +143,9 @@ mod test_utils {
                 |c: &[u8]| output.push(c),
             );
 
-            rewriter.write(html).unwrap();
+            for ch in html.chunks(15) {
+                rewriter.write(ch).unwrap();
+            }
             rewriter.end().unwrap();
         }
 

diff --git a/src/rewritable_units/text_decoder.rs b/src/rewritable_units/text_decoder.rs
@@ -2,6 +2,8 @@ use crate::base::SharedEncoding;
 use crate::rewriter::RewritingError;
 use encoding_rs::{CoderResult, Decoder, Encoding, UTF_8};
 
+const DEFAULT_BUFFER_LEN: usize = if cfg!(test) { 13 } else { 1024 };
+
 pub(crate) struct TextDecoder {
     encoding: SharedEncoding,
     pending_text_streaming_decoder: Option<Decoder>,
@@ -15,8 +17,9 @@ impl TextDecoder {
         Self {
             encoding,
             pending_text_streaming_decoder: None,
-            // TODO make adjustable
-            text_buffer: String::from_utf8(vec![0u8; 1024]).unwrap(),
+            // this will be later initialized to DEFAULT_BUFFER_LEN,
+            // because encoding_rs wants a slice
+            text_buffer: String::new(),
         }
     }
 
@@ -52,6 +55,10 @@ impl TextDecoder {
             }
         }
 
+        if self.pending_text_streaming_decoder.is_none() && self.text_buffer.is_empty() {
+            // repeat() avoids utf8 check comapred to `String::from_utf8(vec![0; len])`
+            self.text_buffer = "\0".repeat(DEFAULT_BUFFER_LEN);
+        }
         let decoder = self
             .pending_text_streaming_decoder
             .get_or_insert_with(|| encoding.new_decoder_without_bom_handling());
@@ -113,7 +120,7 @@ impl TextDecoder {
                 // The slow path buffers 1KB, and even though this shouldn't matter,
                 // it is an observable behavior, and it makes bugs worse for text handlers
                 // that assume they'll get only a single chunk.
-                if valid_up_to != raw_input.len() && valid_up_to < self.text_buffer.len() {
+                if valid_up_to != raw_input.len() && valid_up_to < DEFAULT_BUFFER_LEN {
                     return None;
                 }
 

diff --git a/src/rewritable_units/tokens/text_chunk.rs b/src/rewritable_units/tokens/text_chunk.rs
@@ -412,32 +412,27 @@ mod tests {
             };
         }
 
-        macro_rules! skip_eof_chunk {
-            ($c:ident) => {
-                if $c.last_in_text_node() {
-                    // This is not always true — a replacement char for an incomplete UTF-8 sequence could be flushed last
-                    assert!($c.as_str().is_empty());
-                    return;
-                }
-            };
-        }
-
         #[test]
         fn parsed() {
             test!(|_| {}, HTML);
         }
 
         #[test]
         fn with_prepends_and_appends() {
+            let mut first = true;
             test!(
                 |c| {
-                    skip_eof_chunk!(c);
-                    c.before("<span>", ContentType::Text);
-                    c.before("<div>Hey</div>", ContentType::Html);
-                    c.before("<foo>", ContentType::Html);
-                    c.after("</foo>", ContentType::Html);
-                    c.after("<!-- 42 -->", ContentType::Html);
-                    c.after("<foo & bar>", ContentType::Text);
+                    let is_first = std::mem::replace(&mut first, c.last_in_text_node());
+                    if is_first {
+                        c.before("<span>", ContentType::Text);
+                        c.before("<div>Hey</div>", ContentType::Html);
+                        c.before("<foo>", ContentType::Html);
+                    }
+                    if c.last_in_text_node() {
+                        c.after("</foo>", ContentType::Html);
+                        c.after("<!-- 42 -->", ContentType::Html);
+                        c.after("<foo & bar>", ContentType::Text);
+                    }
                 },
                 concat!(
                     "&lt;span&gt;<div>Hey</div><foo>",
@@ -452,17 +447,22 @@ mod tests {
 
         #[test]
         fn removed() {
+            let mut first = true;
             test!(
                 |c| {
-                    skip_eof_chunk!(c);
+                    let is_first = std::mem::replace(&mut first, c.last_in_text_node());
                     assert!(!c.removed());
 
                     c.remove();
 
                     assert!(c.removed());
 
-                    c.before("<before>", ContentType::Html);
-                    c.after("<after>", ContentType::Html);
+                    if is_first {
+                        c.before("<before>", ContentType::Html);
+                    }
+                    if c.last_in_text_node() {
+                        c.after("<after>", ContentType::Html);
+                    }
                 },
                 "<before><after>"
             );
@@ -472,17 +472,20 @@ mod tests {
         fn replaced_with_text() {
             test!(
                 |c| {
-                    skip_eof_chunk!(c);
-                    c.before("<before>", ContentType::Html);
-                    c.after("<after>", ContentType::Html);
+                    if c.last_in_text_node() {
+                        c.before("<before>", ContentType::Html);
+                        c.after("<after>", ContentType::Html);
 
-                    assert!(!c.removed());
+                        assert!(!c.removed());
 
-                    c.replace("<div></div>", ContentType::Html);
-                    c.replace("<!--42-->", ContentType::Html);
-                    c.replace("<foo & bar>", ContentType::Text);
+                        c.replace("<div></div>", ContentType::Html);
+                        c.replace("<!--42-->", ContentType::Html);
+                        c.replace("<foo & bar>", ContentType::Text);
 
-                    assert!(c.removed());
+                        assert!(c.removed());
+                    } else {
+                        c.remove();
+                    }
                 },
                 "<before>&lt;foo &amp; bar&gt;<after>"
             );
@@ -492,17 +495,20 @@ mod tests {
         fn replaced_with_html() {
             test!(
                 |c| {
-                    skip_eof_chunk!(c);
-                    c.before("<before>", ContentType::Html);
-                    c.after("<after>", ContentType::Html);
+                    if c.last_in_text_node() {
+                        c.before("<before>", ContentType::Html);
+                        c.after("<after>", ContentType::Html);
 
-                    assert!(!c.removed());
+                        assert!(!c.removed());
 
-                    c.replace("<div></div>", ContentType::Html);
-                    c.replace("<!--42-->", ContentType::Html);
-                    c.replace("<foo & bar>", ContentType::Html);
+                        c.replace("<div></div>", ContentType::Html);
+                        c.replace("<!--42-->", ContentType::Html);
+                        c.replace("<foo & bar>", ContentType::Html);
 
-                    assert!(c.removed());
+                        assert!(c.removed());
+                    } else {
+                        c.remove();
+                    }
                 },
                 "<before><foo & bar><after>"
             );