From 87bdeabc01600b4e11a52a4f0749422d42083a72 Mon Sep 17 00:00:00 2001
From: Val <saviorand@gmail.com>
Date: Sun, 15 Jun 2025 14:31:49 +0200
Subject: [PATCH 01/12] implement section 2.2-2 requirement

---
 lightbug_http/header.mojo          |  17 ++--
 lightbug_http/strings.mojo         | 139 +++++++++++++++++++++++++++++
 tests/rfc/test_rfc9112_simple.mojo | 114 +++++++++++++++++++++++
 3 files changed, 265 insertions(+), 5 deletions(-)
 create mode 100644 tests/rfc/test_rfc9112_simple.mojo

diff --git a/lightbug_http/header.mojo b/lightbug_http/header.mojo
index 4014a56..4c91c80 100644
--- a/lightbug_http/header.mojo
+++ b/lightbug_http/header.mojo
@@ -1,6 +1,6 @@
 from collections import Dict, Optional
 from lightbug_http.io.bytes import Bytes, ByteReader, ByteWriter, is_newline, is_space
-from lightbug_http.strings import BytesConstant
+from lightbug_http.strings import BytesConstant, to_string_rfc9112_safe
 from lightbug_http._logger import logger
 from lightbug_http.strings import rChar, nChar, lineBreak, to_string
 
@@ -102,13 +102,20 @@ struct Headers(Writable, Stringable):
                 r.increment()
             # TODO (bgreni): Handle possible trailing whitespace
             var value = r.read_line()
-            var k = String(key).lower()
+            
+            var k = to_string_rfc9112_safe(key._inner).lower()
             if k == HeaderKey.SET_COOKIE:
-                cookies.append(String(value))
+                cookies.append(to_string_rfc9112_safe(value._inner))
                 continue
 
-            self._inner[k] = String(value)
-        return (String(first), String(second), String(third), cookies)
+            self._inner[k] = to_string_rfc9112_safe(value._inner)
+            
+        return (
+            to_string_rfc9112_safe(first._inner), 
+            to_string_rfc9112_safe(second._inner), 
+            to_string_rfc9112_safe(third._inner), 
+            cookies
+        )
 
     fn write_to[T: Writer, //](self, mut writer: T):
         for header in self._inner.items():
diff --git a/lightbug_http/strings.mojo b/lightbug_http/strings.mojo
index 56c3883..26c1214 100644
--- a/lightbug_http/strings.mojo
+++ b/lightbug_http/strings.mojo
@@ -33,6 +33,126 @@ struct BytesConstant:
     alias DOUBLE_CRLF = bytes(lineBreak + lineBreak)
 
 
+# RFC 9112 Section 2.2-2: US-ASCII character bounds
+alias US_ASCII_MAX = 0x7F
+alias ISO_8859_1_MAX = 0xFF
+
+
+fn is_us_ascii_octet(b: UInt8) -> Bool:
+    """Check if a byte is within US-ASCII range (0x00-0x7F).
+    
+    Args:
+        b: The byte to check.
+        
+    Returns:
+        True if the byte is US-ASCII compliant.
+    """
+    return b <= US_ASCII_MAX
+
+
+fn is_iso_8859_1_octet(b: UInt8) -> Bool:
+    """Check if a byte is within ISO-8859-1 range (0x00-0xFF).
+    
+    Args:
+        b: The byte to check.
+        
+    Returns:
+        True if the byte is ISO-8859-1 compliant.
+    """
+    return b <= ISO_8859_1_MAX
+
+
+fn validate_http_message_octets[origin: Origin](data: Span[UInt8, origin]) raises -> Span[UInt8, origin]:
+    """RFC 9112 Section 2.2-2: Validate HTTP message as sequence of octets.
+    
+    A recipient MUST parse an HTTP message as a sequence of octets in an encoding 
+    that is a superset of US-ASCII. This function validates that the message can
+    be safely parsed as octets.
+    
+    Args:
+        data: The raw bytes of the HTTP message.
+        
+    Returns:
+        The validated span of bytes safe for parsing.
+        
+    Raises:
+        Error: If the data contains invalid multi-byte sequences that could
+               create security vulnerabilities.
+    """
+    # Check each byte to ensure it's in a safe encoding superset of US-ASCII
+    for i in range(len(data)):
+        var b = data[i]
+        
+        # Allow US-ASCII range (most common case)
+        if is_us_ascii_octet(b):
+            continue
+            
+        # Allow ISO-8859-1 extended range (superset of US-ASCII)
+        if is_iso_8859_1_octet(b):
+            continue
+            
+        # If we get here, we have a byte outside ISO-8859-1 range
+        # This could be part of a multi-byte UTF-8 sequence which is unsafe
+        raise Error(
+            "RFC 9112 violation: Invalid octet 0x" + hex(Int(b)) + 
+            " at position " + String(i) + 
+            ". HTTP messages must use encoding superset of US-ASCII."
+        )
+    
+    return data
+
+
+fn safe_to_string_rfc9112[origin: Origin](b: Span[UInt8, origin]) raises -> String:
+    """RFC 9112 compliant conversion of octets to String.
+    
+    Creates a String from octets using ISO-8859-1 encoding (superset of US-ASCII).
+    This avoids security vulnerabilities from treating multi-byte UTF-8 sequences
+    as individual characters.
+    
+    Args:
+        b: The validated span of bytes (must pass validate_http_message_octets).
+        
+    Returns:
+        A String created from the octets using safe encoding.
+        
+    Raises:
+        Error: If the bytes contain invalid sequences for HTTP parsing.
+    """
+    # Validate the octets first
+    var validated_span = validate_http_message_octets(b)
+    
+    # Create string treating bytes as ISO-8859-1 (safe superset of US-ASCII)
+    # Note: We use unsafe_from_utf8 here but we've validated the input is safe
+    return String(StringSlice(unsafe_from_utf8=validated_span))
+
+
+fn percent_encode_invalid_octets[origin: Origin](data: Span[UInt8, origin]) -> String:
+    """Percent-encode octets that are not safe for HTTP message parsing.
+    
+    This is a fallback approach when we encounter bytes that cannot be safely
+    interpreted as US-ASCII superset encoding.
+    
+    Args:
+        data: The raw bytes that may contain unsafe sequences.
+        
+    Returns:
+        A String with unsafe octets percent-encoded.
+    """
+    var result = String()
+    
+    for i in range(len(data)):
+        var b = data[i]
+        
+        # Safe US-ASCII characters can be added directly
+        if is_us_ascii_octet(b) and b >= 0x20 and b != 0x25:  # Printable ASCII except %
+            result += chr(Int(b))
+        else:
+            # Percent-encode unsafe or non-printable octets
+            result += "%" + hex(Int(b)).upper().rjust(2, "0")
+    
+    return result
+
+
 fn to_string[T: Writable](value: T) -> String:
     return String.write(value)
 
@@ -46,6 +166,25 @@ fn to_string(b: Span[UInt8]) -> String:
     return String(StringSlice(unsafe_from_utf8=b))
 
 
+fn to_string_rfc9112_safe[origin: Origin](b: Span[UInt8, origin]) -> String:
+    """RFC 9112 compliant String creation with fallback to percent-encoding.
+    
+    Attempts to create a String using safe octet parsing. If that fails,
+    falls back to percent-encoding unsafe sequences.
+    
+    Args:
+        b: The Span of bytes to convert to a String.
+        
+    Returns:
+        A String created safely according to RFC 9112.
+    """
+    try:
+        return safe_to_string_rfc9112(b)
+    except:
+        # Fallback to percent-encoding for unsafe sequences
+        return percent_encode_invalid_octets(b)
+
+
 fn to_string(owned bytes: Bytes) -> String:
     """Creates a String from the provided List of bytes.
     If you do not transfer ownership of the List, the List will be copied.
diff --git a/tests/rfc/test_rfc9112_simple.mojo b/tests/rfc/test_rfc9112_simple.mojo
new file mode 100644
index 0000000..0fde4ce
--- /dev/null
+++ b/tests/rfc/test_rfc9112_simple.mojo
@@ -0,0 +1,114 @@
+import testing
+from memory import Span
+
+
+def test_rfc9112_parse_as_octets():
+    """RFC 9112 Section 2.2-2: MUST parse HTTP message as sequence of octets."""
+    print("Testing: Parse HTTP message as sequence of octets...")
+    
+    # Test that we parse HTTP messages as individual octets, not Unicode characters
+    var http_message = "GET /path HTTP/1.1\r\nHost: example.com\r\n\r\n"
+    var octets = http_message.as_bytes()
+    
+    # Verify we access individual octets (bytes), not Unicode code points
+    testing.assert_equal(octets[0], ord('G'))  # First octet is 'G'
+    testing.assert_equal(octets[4], ord('/'))  # Fifth octet is '/'
+    
+    # Find the first CR and LF octets in the message
+    var found_cr = False
+    var found_lf = False
+    for i in range(len(octets)):
+        if octets[i] == 0x0D and not found_cr:  # First CR
+            found_cr = True
+        if octets[i] == 0x0A and not found_lf:  # First LF  
+            found_lf = True
+        if found_cr and found_lf:
+            break
+    
+    testing.assert_true(found_cr)  # CR found as octet
+    testing.assert_true(found_lf)  # LF found as octet
+    
+    print("✓ HTTP message parsed as sequence of octets")
+
+
+def test_rfc9112_us_ascii_superset_encoding():
+    """RFC 9112 Section 2.2-2: MUST use encoding that is superset of US-ASCII."""
+    print("Testing: Encoding is superset of US-ASCII...")
+    
+    # US-ASCII range is 0x00-0x7F
+    # ISO-8859-1 (0x00-0xFF) is a valid superset
+    
+    # Test US-ASCII characters are valid
+    testing.assert_true(ord('G') <= 0x7F)      # US-ASCII
+    testing.assert_true(ord(' ') <= 0x7F)      # US-ASCII
+    testing.assert_true(0x0A <= 0x7F)          # LF in US-ASCII
+    testing.assert_true(0x0D <= 0x7F)          # CR in US-ASCII
+    
+    # Test that superset (ISO-8859-1) includes extended range
+    testing.assert_true(0x80 <= 0xFF)          # Extended range valid
+    testing.assert_true(0xFF <= 0xFF)          # Maximum byte valid
+    
+    print("✓ Encoding is superset of US-ASCII (ISO-8859-1)")
+
+
+def test_rfc9112_lf_security_vulnerability():
+    """RFC 9112 Section 2.2-2: Prevent LF (%x0A) security vulnerabilities."""
+    print("Testing: LF (%x0A) security vulnerability prevention...")
+    
+    # The critical security issue: LF (%x0A) in multibyte sequences
+    var lf_octet: UInt8 = 0x0A
+    
+    # When parsed as octets (safe), LF is clearly identifiable
+    var test_data = "GET /\r\nHost: test\r\n\r\n"
+    var data_octets = test_data.as_bytes()
+    
+    var lf_positions = List[Int]()
+    for i in range(len(data_octets)):
+        if data_octets[i] == lf_octet:
+            lf_positions.append(i)
+    
+    # Should find LF octets at specific positions
+    testing.assert_true(len(lf_positions) > 0)
+    print("✓ LF (%x0A) handled safely as octet")
+    print("  - Found " + String(len(lf_positions)) + " LF octets in message")
+    print("  - No multibyte character sequence confusion")
+
+
+def test_rfc9112_string_parser_safety():
+    """RFC 9112 Section 2.2-2: String parsers only used after protocol element extraction."""
+    print("Testing: String parsers used only after safe extraction...")
+    
+    # Demonstrate the RFC requirement: protocol elements extracted as octets first
+    var http_request = "GET /api/data HTTP/1.1\r\nHost: server.com\r\n\r\n"
+    var request_octets = http_request.as_bytes()
+    
+    # Step 1: Extract protocol elements as octets (safe)
+    var method_end = -1
+    for i in range(len(request_octets)):
+        if request_octets[i] == ord(' '):
+            method_end = i
+            break
+    
+    testing.assert_true(method_end > 0)
+    
+    # Step 2: Verify the extracted octets match expected method
+    testing.assert_equal(request_octets[0], ord('G'))  # First octet
+    testing.assert_equal(request_octets[1], ord('E'))  # Second octet  
+    testing.assert_equal(request_octets[2], ord('T'))  # Third octet
+    testing.assert_equal(method_end, 3)                # Method is 3 octets
+    
+    print("✓ String parsing only after protocol element extraction")
+    print("  - Protocol elements extracted as octets first")
+    print("  - String conversion only after safe extraction")
+
+
+def main():
+    """Test RFC 9112 Section 2.2-2 compliance."""
+    print("🧪 Testing RFC 9112 Section 2.2-2 Compliance\n")
+    
+    test_rfc9112_parse_as_octets()
+    test_rfc9112_us_ascii_superset_encoding()
+    test_rfc9112_lf_security_vulnerability()
+    test_rfc9112_string_parser_safety()
+    
+    print("\n✅ RFC 9112 Section 2.2-2 requirement verified")
\ No newline at end of file

From c0785308fc66b01493d66311ab6269a393bd130a Mon Sep 17 00:00:00 2001
From: Val <saviorand@gmail.com>
Date: Sun, 15 Jun 2025 15:04:34 +0200
Subject: [PATCH 02/12] clean up comments

---
 lightbug_http/strings.mojo         | 11 -------
 tests/rfc/test_rfc9112_simple.mojo | 46 +++++++-----------------------
 2 files changed, 10 insertions(+), 47 deletions(-)

diff --git a/lightbug_http/strings.mojo b/lightbug_http/strings.mojo
index 26c1214..435b3e9 100644
--- a/lightbug_http/strings.mojo
+++ b/lightbug_http/strings.mojo
@@ -33,7 +33,6 @@ struct BytesConstant:
     alias DOUBLE_CRLF = bytes(lineBreak + lineBreak)
 
 
-# RFC 9112 Section 2.2-2: US-ASCII character bounds
 alias US_ASCII_MAX = 0x7F
 alias ISO_8859_1_MAX = 0xFF
 
@@ -79,20 +78,15 @@ fn validate_http_message_octets[origin: Origin](data: Span[UInt8, origin]) raise
         Error: If the data contains invalid multi-byte sequences that could
                create security vulnerabilities.
     """
-    # Check each byte to ensure it's in a safe encoding superset of US-ASCII
     for i in range(len(data)):
         var b = data[i]
         
-        # Allow US-ASCII range (most common case)
         if is_us_ascii_octet(b):
             continue
             
-        # Allow ISO-8859-1 extended range (superset of US-ASCII)
         if is_iso_8859_1_octet(b):
             continue
             
-        # If we get here, we have a byte outside ISO-8859-1 range
-        # This could be part of a multi-byte UTF-8 sequence which is unsafe
         raise Error(
             "RFC 9112 violation: Invalid octet 0x" + hex(Int(b)) + 
             " at position " + String(i) + 
@@ -118,11 +112,8 @@ fn safe_to_string_rfc9112[origin: Origin](b: Span[UInt8, origin]) raises -> Stri
     Raises:
         Error: If the bytes contain invalid sequences for HTTP parsing.
     """
-    # Validate the octets first
     var validated_span = validate_http_message_octets(b)
     
-    # Create string treating bytes as ISO-8859-1 (safe superset of US-ASCII)
-    # Note: We use unsafe_from_utf8 here but we've validated the input is safe
     return String(StringSlice(unsafe_from_utf8=validated_span))
 
 
@@ -143,11 +134,9 @@ fn percent_encode_invalid_octets[origin: Origin](data: Span[UInt8, origin]) -> S
     for i in range(len(data)):
         var b = data[i]
         
-        # Safe US-ASCII characters can be added directly
         if is_us_ascii_octet(b) and b >= 0x20 and b != 0x25:  # Printable ASCII except %
             result += chr(Int(b))
         else:
-            # Percent-encode unsafe or non-printable octets
             result += "%" + hex(Int(b)).upper().rjust(2, "0")
     
     return result
diff --git a/tests/rfc/test_rfc9112_simple.mojo b/tests/rfc/test_rfc9112_simple.mojo
index 0fde4ce..a5d5161 100644
--- a/tests/rfc/test_rfc9112_simple.mojo
+++ b/tests/rfc/test_rfc9112_simple.mojo
@@ -6,59 +6,44 @@ def test_rfc9112_parse_as_octets():
     """RFC 9112 Section 2.2-2: MUST parse HTTP message as sequence of octets."""
     print("Testing: Parse HTTP message as sequence of octets...")
     
-    # Test that we parse HTTP messages as individual octets, not Unicode characters
     var http_message = "GET /path HTTP/1.1\r\nHost: example.com\r\n\r\n"
     var octets = http_message.as_bytes()
     
-    # Verify we access individual octets (bytes), not Unicode code points
-    testing.assert_equal(octets[0], ord('G'))  # First octet is 'G'
-    testing.assert_equal(octets[4], ord('/'))  # Fifth octet is '/'
+    testing.assert_equal(octets[0], ord('G'))
+    testing.assert_equal(octets[4], ord('/'))
     
-    # Find the first CR and LF octets in the message
     var found_cr = False
     var found_lf = False
     for i in range(len(octets)):
-        if octets[i] == 0x0D and not found_cr:  # First CR
+        if octets[i] == 0x0D and not found_cr:
             found_cr = True
-        if octets[i] == 0x0A and not found_lf:  # First LF  
+        if octets[i] == 0x0A and not found_lf:
             found_lf = True
         if found_cr and found_lf:
             break
     
-    testing.assert_true(found_cr)  # CR found as octet
-    testing.assert_true(found_lf)  # LF found as octet
-    
-    print("✓ HTTP message parsed as sequence of octets")
+    testing.assert_true(found_cr)
+    testing.assert_true(found_lf)
 
 
 def test_rfc9112_us_ascii_superset_encoding():
     """RFC 9112 Section 2.2-2: MUST use encoding that is superset of US-ASCII."""
     print("Testing: Encoding is superset of US-ASCII...")
     
-    # US-ASCII range is 0x00-0x7F
-    # ISO-8859-1 (0x00-0xFF) is a valid superset
-    
-    # Test US-ASCII characters are valid
     testing.assert_true(ord('G') <= 0x7F)      # US-ASCII
     testing.assert_true(ord(' ') <= 0x7F)      # US-ASCII
     testing.assert_true(0x0A <= 0x7F)          # LF in US-ASCII
     testing.assert_true(0x0D <= 0x7F)          # CR in US-ASCII
-    
-    # Test that superset (ISO-8859-1) includes extended range
     testing.assert_true(0x80 <= 0xFF)          # Extended range valid
     testing.assert_true(0xFF <= 0xFF)          # Maximum byte valid
-    
-    print("✓ Encoding is superset of US-ASCII (ISO-8859-1)")
 
 
 def test_rfc9112_lf_security_vulnerability():
     """RFC 9112 Section 2.2-2: Prevent LF (%x0A) security vulnerabilities."""
     print("Testing: LF (%x0A) security vulnerability prevention...")
     
-    # The critical security issue: LF (%x0A) in multibyte sequences
     var lf_octet: UInt8 = 0x0A
     
-    # When parsed as octets (safe), LF is clearly identifiable
     var test_data = "GET /\r\nHost: test\r\n\r\n"
     var data_octets = test_data.as_bytes()
     
@@ -67,22 +52,16 @@ def test_rfc9112_lf_security_vulnerability():
         if data_octets[i] == lf_octet:
             lf_positions.append(i)
     
-    # Should find LF octets at specific positions
     testing.assert_true(len(lf_positions) > 0)
-    print("✓ LF (%x0A) handled safely as octet")
-    print("  - Found " + String(len(lf_positions)) + " LF octets in message")
-    print("  - No multibyte character sequence confusion")
 
 
 def test_rfc9112_string_parser_safety():
     """RFC 9112 Section 2.2-2: String parsers only used after protocol element extraction."""
     print("Testing: String parsers used only after safe extraction...")
     
-    # Demonstrate the RFC requirement: protocol elements extracted as octets first
     var http_request = "GET /api/data HTTP/1.1\r\nHost: server.com\r\n\r\n"
     var request_octets = http_request.as_bytes()
     
-    # Step 1: Extract protocol elements as octets (safe)
     var method_end = -1
     for i in range(len(request_octets)):
         if request_octets[i] == ord(' '):
@@ -91,15 +70,10 @@ def test_rfc9112_string_parser_safety():
     
     testing.assert_true(method_end > 0)
     
-    # Step 2: Verify the extracted octets match expected method
-    testing.assert_equal(request_octets[0], ord('G'))  # First octet
-    testing.assert_equal(request_octets[1], ord('E'))  # Second octet  
-    testing.assert_equal(request_octets[2], ord('T'))  # Third octet
-    testing.assert_equal(method_end, 3)                # Method is 3 octets
-    
-    print("✓ String parsing only after protocol element extraction")
-    print("  - Protocol elements extracted as octets first")
-    print("  - String conversion only after safe extraction")
+    testing.assert_equal(request_octets[0], ord('G'))
+    testing.assert_equal(request_octets[1], ord('E'))
+    testing.assert_equal(request_octets[2], ord('T'))
+    testing.assert_equal(method_end, 3)
 
 
 def main():

From a080a07ef8838b725205ea96da9386f969778e2b Mon Sep 17 00:00:00 2001
From: Val <saviorand@gmail.com>
Date: Sun, 15 Jun 2025 15:57:16 +0200
Subject: [PATCH 03/12] adjust the test and add to action

---
 .github/workflows/test.yml                |   1 +
 lightbug_http/strings.mojo                |  39 ++++++--
 mojoproject.toml                          |   4 +
 tests/rfc/test_rfc9112_section_2_2_2.mojo | 116 ++++++++++++++++++++++
 tests/rfc/test_rfc9112_simple.mojo        |  88 ----------------
 5 files changed, 154 insertions(+), 94 deletions(-)
 create mode 100644 tests/rfc/test_rfc9112_section_2_2_2.mojo
 delete mode 100644 tests/rfc/test_rfc9112_simple.mojo

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index c105559..8472243 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -18,3 +18,4 @@ jobs:
           magic run integration_tests_py
           magic run integration_tests_external
           magic run integration_tests_udp
+          magic run rfc_tests
diff --git a/lightbug_http/strings.mojo b/lightbug_http/strings.mojo
index 435b3e9..aecbace 100644
--- a/lightbug_http/strings.mojo
+++ b/lightbug_http/strings.mojo
@@ -66,7 +66,7 @@ fn validate_http_message_octets[origin: Origin](data: Span[UInt8, origin]) raise
     
     A recipient MUST parse an HTTP message as a sequence of octets in an encoding 
     that is a superset of US-ASCII. This function validates that the message can
-    be safely parsed as octets.
+    be safely parsed as octets and detects invalid multibyte UTF-8 sequences.
     
     Args:
         data: The raw bytes of the HTTP message.
@@ -76,17 +76,35 @@ fn validate_http_message_octets[origin: Origin](data: Span[UInt8, origin]) raise
         
     Raises:
         Error: If the data contains invalid multi-byte sequences that could
-               create security vulnerabilities.
+               create security vulnerabilities (like embedded LF in UTF-8).
     """
     for i in range(len(data)):
         var b = data[i]
         
-        if is_us_ascii_octet(b):
-            continue
-            
+        # Allow all ISO-8859-1 bytes (0x00-0xFF)
         if is_iso_8859_1_octet(b):
+            # Check for potential UTF-8 multibyte sequence vulnerabilities
+            if b >= 0x80:  # Non-ASCII byte
+                # Check if this looks like a UTF-8 start byte
+                if b >= 0xC0 and b <= 0xF7:  # UTF-8 start bytes
+                    # This could be start of multibyte sequence - check for embedded LF
+                    if i + 1 < len(data) and data[i + 1] == 0x0A:  # LF embedded in sequence
+                        raise Error(
+                            "RFC 9112 violation: LF (0x0A) embedded in potential multibyte sequence at position " + 
+                            String(i + 1) + ". This creates security vulnerabilities."
+                        )
+                elif b >= 0x80 and b <= 0xBF:  # UTF-8 continuation byte without proper start
+                    # Check if previous byte is valid UTF-8 start, if not this is invalid
+                    if i == 0 or (data[i - 1] < 0xC0):  # No proper UTF-8 start byte before
+                        # Check if this continuation byte contains control characters
+                        if i + 1 < len(data) and data[i + 1] == 0x0A:  # LF after invalid continuation
+                            raise Error(
+                                "RFC 9112 violation: LF (0x0A) after invalid UTF-8 continuation byte at position " + 
+                                String(i + 1) + ". This creates security vulnerabilities."
+                            )
             continue
             
+        # This should never happen since is_iso_8859_1_octet covers 0x00-0xFF
         raise Error(
             "RFC 9112 violation: Invalid octet 0x" + hex(Int(b)) + 
             " at position " + String(i) + 
@@ -137,7 +155,16 @@ fn percent_encode_invalid_octets[origin: Origin](data: Span[UInt8, origin]) -> S
         if is_us_ascii_octet(b) and b >= 0x20 and b != 0x25:  # Printable ASCII except %
             result += chr(Int(b))
         else:
-            result += "%" + hex(Int(b)).upper().rjust(2, "0")
+            # Fix hex formatting: ensure proper zero-padding
+            var hex_val = hex(Int(b)).upper()
+            # Remove "0X" prefix if present
+            if hex_val.startswith("0X"):
+                hex_val = hex_val[2:]
+            # Ensure two-digit hex format
+            if len(hex_val) == 1:
+                result += "%0" + hex_val
+            else:
+                result += "%" + hex_val
     
     return result
 
diff --git a/mojoproject.toml b/mojoproject.toml
index d932cea..6b7540b 100644
--- a/mojoproject.toml
+++ b/mojoproject.toml
@@ -19,6 +19,9 @@ integration_tests_py = { cmd = "bash scripts/integration_test.sh" }
 integration_tests_external = { cmd = "magic run mojo test -I . tests/integration" }
 integration_tests_udp = { cmd = "bash scripts/udp_test.sh" }
 
+[feature.rfc-tests.tasks]
+rfc_tests = { cmd = "magic run mojo test -I . tests/rfc" }
+
 [feature.bench.tasks]
 bench = { cmd = "magic run mojo -I . benchmark/bench.mojo" }
 bench_server = { cmd = "bash scripts/bench_server.sh" }
@@ -34,4 +37,5 @@ fastapi = ">=0.114.2,<0.115"
 default = { solve-group = "default" }
 unit-tests = { features = ["unit-tests"], solve-group = "default" }
 integration-tests = { features = ["integration-tests"], solve-group = "default" }
+rfc-tests = { features = ["rfc-tests"], solve-group = "default" }
 bench = { features = ["bench"], solve-group = "default" }
diff --git a/tests/rfc/test_rfc9112_section_2_2_2.mojo b/tests/rfc/test_rfc9112_section_2_2_2.mojo
new file mode 100644
index 0000000..d6f905c
--- /dev/null
+++ b/tests/rfc/test_rfc9112_section_2_2_2.mojo
@@ -0,0 +1,116 @@
+import testing
+from memory import Span
+from lightbug_http.strings import (
+    validate_http_message_octets,
+    safe_to_string_rfc9112,
+    to_string_rfc9112_safe,
+    is_us_ascii_octet,
+    is_iso_8859_1_octet,
+    percent_encode_invalid_octets,
+)
+from lightbug_http.io.bytes import Bytes, ByteReader
+from lightbug_http.http.request import HTTPRequest
+
+
+def test_rfc9112_octet_parsing_requirement():
+    """RFC 9112 Section 2.2-2: HTTP messages MUST be parsed as sequence of octets."""
+    print("Testing: HTTP message parsing as sequence of octets...")
+    
+    # Valid HTTP message with mixed ASCII and extended ASCII
+    var http_message = "GET /path HTTP/1.1\r\nHost: example.com\r\nX-Custom: café\r\n\r\n"
+    var octets = http_message.as_bytes()
+    
+    testing.assert_equal(octets[0], ord('G'))
+    testing.assert_equal(octets[1], ord('E'))
+    testing.assert_equal(octets[2], ord('T'))
+    testing.assert_equal(octets[3], ord(' '))
+    
+    var crlf_count = 0
+    for i in range(len(octets) - 1):
+        if octets[i] == 0x0D and octets[i + 1] == 0x0A:
+            crlf_count += 1
+    
+    testing.assert_true(crlf_count >= 3)
+
+
+def test_rfc9112_us_ascii_superset_encoding():
+    """RFC 9112 Section 2.2-2: Encoding MUST be superset of US-ASCII."""
+    print("Testing: US-ASCII superset encoding requirement...")
+    
+    # Test US-ASCII range (0x00-0x7F)
+    for i in range(0x80):
+        testing.assert_true(is_us_ascii_octet(UInt8(i)))
+    
+    # Test extended range for ISO-8859-1 (superset of US-ASCII)
+    for i in range(0x80, 0x100):
+        testing.assert_true(is_iso_8859_1_octet(UInt8(i)))
+        testing.assert_false(is_us_ascii_octet(UInt8(i)))
+
+
+def test_rfc9112_lf_security_vulnerability_prevention():
+    """RFC 9112 Section 2.2-2: Prevent security vulnerabilities from LF (%x0A) in multibyte sequences."""
+    print("Testing: LF security vulnerability prevention...")
+    
+    # Valid LF in HTTP context
+    var valid_http = "GET /test HTTP/1.1\r\nHost: test.com\r\n\r\n"
+    var valid_octets = valid_http.as_bytes()
+    
+    try:
+        var validated = validate_http_message_octets(Span(valid_octets))
+        testing.assert_equal(len(validated), len(valid_octets))
+    except e:
+        testing.assert_true(False, "Valid HTTP message should not raise error: " + String(e))
+    
+    # Invalid multibyte sequence containing LF
+    var malicious_bytes = List[UInt8]()
+    malicious_bytes.extend("GET /".as_bytes())
+    malicious_bytes.append(0xC0)  # Invalid UTF-8 start byte
+    malicious_bytes.append(0x0A)  # LF embedded in "multibyte" sequence
+    malicious_bytes.append(0x80)  # Continuation byte
+    malicious_bytes.extend(" HTTP/1.1\r\nHost: test.com\r\n\r\n".as_bytes())
+    
+    var malicious_span = Span(malicious_bytes)
+    
+    try:
+        var validated = validate_http_message_octets(malicious_span)
+        testing.assert_true(False, "Should have rejected invalid multibyte sequence with embedded LF")
+    except e:
+        testing.assert_true(True, "Correctly rejected invalid sequence: " + String(e))
+    
+    var safe_result = to_string_rfc9112_safe(malicious_span)
+    
+    testing.assert_true(safe_result.find("%") != -1, "Should percent-encode unsafe sequences")
+
+
+def test_rfc9112_percent_encoding_fallback():
+    """RFC 9112 Section 2.2-2: Test percent-encoding fallback for unsafe sequences."""
+    print("Testing: Percent-encoding fallback for unsafe sequences...")
+    
+    # Create byte sequence with characters that need percent-encoding
+    var unsafe_bytes = List[UInt8]()
+    unsafe_bytes.append(0x00)  # NULL byte
+    unsafe_bytes.append(0x0A)  # LF
+    unsafe_bytes.append(0x0D)  # CR
+    unsafe_bytes.append(0x25)  # % (should be encoded)
+    unsafe_bytes.append(0xFF)  # High byte
+    
+    var unsafe_span = Span(unsafe_bytes)
+    var encoded = percent_encode_invalid_octets(unsafe_span)
+    
+    # Verify percent encoding with correct format
+    testing.assert_true(encoded.find("%00") != -1, "Should encode NULL byte")
+    testing.assert_true(encoded.find("%0A") != -1, "Should encode LF")
+    testing.assert_true(encoded.find("%0D") != -1, "Should encode CR")
+    testing.assert_true(encoded.find("%25") != -1, "Should encode % character")
+    testing.assert_true(encoded.find("%FF") != -1, "Should encode high byte")
+
+
+def main():
+    print("🧪 Testing RFC 9112 Section 2.2-2: HTTP Message Parsing as Octets")
+    
+    test_rfc9112_octet_parsing_requirement()
+    test_rfc9112_us_ascii_superset_encoding()
+    test_rfc9112_lf_security_vulnerability_prevention()
+    test_rfc9112_percent_encoding_fallback()
+    
+    print("\n✅ RFC 9112 Section 2.2-2 requirement fully verified")
\ No newline at end of file
diff --git a/tests/rfc/test_rfc9112_simple.mojo b/tests/rfc/test_rfc9112_simple.mojo
deleted file mode 100644
index a5d5161..0000000
--- a/tests/rfc/test_rfc9112_simple.mojo
+++ /dev/null
@@ -1,88 +0,0 @@
-import testing
-from memory import Span
-
-
-def test_rfc9112_parse_as_octets():
-    """RFC 9112 Section 2.2-2: MUST parse HTTP message as sequence of octets."""
-    print("Testing: Parse HTTP message as sequence of octets...")
-    
-    var http_message = "GET /path HTTP/1.1\r\nHost: example.com\r\n\r\n"
-    var octets = http_message.as_bytes()
-    
-    testing.assert_equal(octets[0], ord('G'))
-    testing.assert_equal(octets[4], ord('/'))
-    
-    var found_cr = False
-    var found_lf = False
-    for i in range(len(octets)):
-        if octets[i] == 0x0D and not found_cr:
-            found_cr = True
-        if octets[i] == 0x0A and not found_lf:
-            found_lf = True
-        if found_cr and found_lf:
-            break
-    
-    testing.assert_true(found_cr)
-    testing.assert_true(found_lf)
-
-
-def test_rfc9112_us_ascii_superset_encoding():
-    """RFC 9112 Section 2.2-2: MUST use encoding that is superset of US-ASCII."""
-    print("Testing: Encoding is superset of US-ASCII...")
-    
-    testing.assert_true(ord('G') <= 0x7F)      # US-ASCII
-    testing.assert_true(ord(' ') <= 0x7F)      # US-ASCII
-    testing.assert_true(0x0A <= 0x7F)          # LF in US-ASCII
-    testing.assert_true(0x0D <= 0x7F)          # CR in US-ASCII
-    testing.assert_true(0x80 <= 0xFF)          # Extended range valid
-    testing.assert_true(0xFF <= 0xFF)          # Maximum byte valid
-
-
-def test_rfc9112_lf_security_vulnerability():
-    """RFC 9112 Section 2.2-2: Prevent LF (%x0A) security vulnerabilities."""
-    print("Testing: LF (%x0A) security vulnerability prevention...")
-    
-    var lf_octet: UInt8 = 0x0A
-    
-    var test_data = "GET /\r\nHost: test\r\n\r\n"
-    var data_octets = test_data.as_bytes()
-    
-    var lf_positions = List[Int]()
-    for i in range(len(data_octets)):
-        if data_octets[i] == lf_octet:
-            lf_positions.append(i)
-    
-    testing.assert_true(len(lf_positions) > 0)
-
-
-def test_rfc9112_string_parser_safety():
-    """RFC 9112 Section 2.2-2: String parsers only used after protocol element extraction."""
-    print("Testing: String parsers used only after safe extraction...")
-    
-    var http_request = "GET /api/data HTTP/1.1\r\nHost: server.com\r\n\r\n"
-    var request_octets = http_request.as_bytes()
-    
-    var method_end = -1
-    for i in range(len(request_octets)):
-        if request_octets[i] == ord(' '):
-            method_end = i
-            break
-    
-    testing.assert_true(method_end > 0)
-    
-    testing.assert_equal(request_octets[0], ord('G'))
-    testing.assert_equal(request_octets[1], ord('E'))
-    testing.assert_equal(request_octets[2], ord('T'))
-    testing.assert_equal(method_end, 3)
-
-
-def main():
-    """Test RFC 9112 Section 2.2-2 compliance."""
-    print("🧪 Testing RFC 9112 Section 2.2-2 Compliance\n")
-    
-    test_rfc9112_parse_as_octets()
-    test_rfc9112_us_ascii_superset_encoding()
-    test_rfc9112_lf_security_vulnerability()
-    test_rfc9112_string_parser_safety()
-    
-    print("\n✅ RFC 9112 Section 2.2-2 requirement verified")
\ No newline at end of file

From 45f0a81ac6d3c00b09cf12a457dabaa6b2190aa1 Mon Sep 17 00:00:00 2001
From: Val <saviorand@gmail.com>
Date: Sun, 15 Jun 2025 16:07:01 +0200
Subject: [PATCH 04/12] more clean up

---
 lightbug_http/strings.mojo                | 194 +++++++---------------
 tests/rfc/test_rfc9112_section_2_2_2.mojo |  41 +----
 2 files changed, 57 insertions(+), 178 deletions(-)

diff --git a/lightbug_http/strings.mojo b/lightbug_http/strings.mojo
index aecbace..2b230ec 100644
--- a/lightbug_http/strings.mojo
+++ b/lightbug_http/strings.mojo
@@ -38,137 +38,13 @@ alias ISO_8859_1_MAX = 0xFF
 
 
 fn is_us_ascii_octet(b: UInt8) -> Bool:
-    """Check if a byte is within US-ASCII range (0x00-0x7F).
-    
-    Args:
-        b: The byte to check.
-        
-    Returns:
-        True if the byte is US-ASCII compliant.
-    """
     return b <= US_ASCII_MAX
 
 
 fn is_iso_8859_1_octet(b: UInt8) -> Bool:
-    """Check if a byte is within ISO-8859-1 range (0x00-0xFF).
-    
-    Args:
-        b: The byte to check.
-        
-    Returns:
-        True if the byte is ISO-8859-1 compliant.
-    """
     return b <= ISO_8859_1_MAX
 
 
-fn validate_http_message_octets[origin: Origin](data: Span[UInt8, origin]) raises -> Span[UInt8, origin]:
-    """RFC 9112 Section 2.2-2: Validate HTTP message as sequence of octets.
-    
-    A recipient MUST parse an HTTP message as a sequence of octets in an encoding 
-    that is a superset of US-ASCII. This function validates that the message can
-    be safely parsed as octets and detects invalid multibyte UTF-8 sequences.
-    
-    Args:
-        data: The raw bytes of the HTTP message.
-        
-    Returns:
-        The validated span of bytes safe for parsing.
-        
-    Raises:
-        Error: If the data contains invalid multi-byte sequences that could
-               create security vulnerabilities (like embedded LF in UTF-8).
-    """
-    for i in range(len(data)):
-        var b = data[i]
-        
-        # Allow all ISO-8859-1 bytes (0x00-0xFF)
-        if is_iso_8859_1_octet(b):
-            # Check for potential UTF-8 multibyte sequence vulnerabilities
-            if b >= 0x80:  # Non-ASCII byte
-                # Check if this looks like a UTF-8 start byte
-                if b >= 0xC0 and b <= 0xF7:  # UTF-8 start bytes
-                    # This could be start of multibyte sequence - check for embedded LF
-                    if i + 1 < len(data) and data[i + 1] == 0x0A:  # LF embedded in sequence
-                        raise Error(
-                            "RFC 9112 violation: LF (0x0A) embedded in potential multibyte sequence at position " + 
-                            String(i + 1) + ". This creates security vulnerabilities."
-                        )
-                elif b >= 0x80 and b <= 0xBF:  # UTF-8 continuation byte without proper start
-                    # Check if previous byte is valid UTF-8 start, if not this is invalid
-                    if i == 0 or (data[i - 1] < 0xC0):  # No proper UTF-8 start byte before
-                        # Check if this continuation byte contains control characters
-                        if i + 1 < len(data) and data[i + 1] == 0x0A:  # LF after invalid continuation
-                            raise Error(
-                                "RFC 9112 violation: LF (0x0A) after invalid UTF-8 continuation byte at position " + 
-                                String(i + 1) + ". This creates security vulnerabilities."
-                            )
-            continue
-            
-        # This should never happen since is_iso_8859_1_octet covers 0x00-0xFF
-        raise Error(
-            "RFC 9112 violation: Invalid octet 0x" + hex(Int(b)) + 
-            " at position " + String(i) + 
-            ". HTTP messages must use encoding superset of US-ASCII."
-        )
-    
-    return data
-
-
-fn safe_to_string_rfc9112[origin: Origin](b: Span[UInt8, origin]) raises -> String:
-    """RFC 9112 compliant conversion of octets to String.
-    
-    Creates a String from octets using ISO-8859-1 encoding (superset of US-ASCII).
-    This avoids security vulnerabilities from treating multi-byte UTF-8 sequences
-    as individual characters.
-    
-    Args:
-        b: The validated span of bytes (must pass validate_http_message_octets).
-        
-    Returns:
-        A String created from the octets using safe encoding.
-        
-    Raises:
-        Error: If the bytes contain invalid sequences for HTTP parsing.
-    """
-    var validated_span = validate_http_message_octets(b)
-    
-    return String(StringSlice(unsafe_from_utf8=validated_span))
-
-
-fn percent_encode_invalid_octets[origin: Origin](data: Span[UInt8, origin]) -> String:
-    """Percent-encode octets that are not safe for HTTP message parsing.
-    
-    This is a fallback approach when we encounter bytes that cannot be safely
-    interpreted as US-ASCII superset encoding.
-    
-    Args:
-        data: The raw bytes that may contain unsafe sequences.
-        
-    Returns:
-        A String with unsafe octets percent-encoded.
-    """
-    var result = String()
-    
-    for i in range(len(data)):
-        var b = data[i]
-        
-        if is_us_ascii_octet(b) and b >= 0x20 and b != 0x25:  # Printable ASCII except %
-            result += chr(Int(b))
-        else:
-            # Fix hex formatting: ensure proper zero-padding
-            var hex_val = hex(Int(b)).upper()
-            # Remove "0X" prefix if present
-            if hex_val.startswith("0X"):
-                hex_val = hex_val[2:]
-            # Ensure two-digit hex format
-            if len(hex_val) == 1:
-                result += "%0" + hex_val
-            else:
-                result += "%" + hex_val
-    
-    return result
-
-
 fn to_string[T: Writable](value: T) -> String:
     return String.write(value)
 
@@ -183,22 +59,11 @@ fn to_string(b: Span[UInt8]) -> String:
 
 
 fn to_string_rfc9112_safe[origin: Origin](b: Span[UInt8, origin]) -> String:
-    """RFC 9112 compliant String creation with fallback to percent-encoding.
-    
-    Attempts to create a String using safe octet parsing. If that fails,
-    falls back to percent-encoding unsafe sequences.
-    
-    Args:
-        b: The Span of bytes to convert to a String.
-        
-    Returns:
-        A String created safely according to RFC 9112.
-    """
     try:
-        return safe_to_string_rfc9112(b)
+        var validated_span = validate_http_message_octets_rfc9112(b)
+        return String(StringSlice(unsafe_from_utf8=validated_span))
     except:
-        # Fallback to percent-encoding for unsafe sequences
-        return percent_encode_invalid_octets(b)
+        return percent_encode_octets(b)
 
 
 fn to_string(owned bytes: Bytes) -> String:
@@ -220,3 +85,56 @@ fn find_all(s: String, sub_str: String) -> List[Int]:
         match_idxs.append(current_idx)
         current_idx = s.find(sub_str, start=current_idx + 1)
     return match_idxs^
+
+
+fn percent_encode_octets[origin: Origin](data: Span[UInt8, origin]) -> String:
+    var result = String()
+    
+    for i in range(len(data)):
+        var b = data[i]
+        
+        if is_us_ascii_octet(b) and b >= 0x20 and b != 0x25:  # Printable ASCII except %
+            result += chr(Int(b))
+        else:
+            # Fix hex formatting: ensure proper zero-padding
+            var hex_val = hex(Int(b)).upper()
+            # Remove "0X" prefix if present
+            if hex_val.startswith("0X"):
+                hex_val = hex_val[2:]
+            # Ensure two-digit hex format
+            if len(hex_val) == 1:
+                result += "%0" + hex_val
+            else:
+                result += "%" + hex_val
+    
+    return result
+
+fn validate_http_message_octets_rfc9112[origin: Origin](data: Span[UInt8, origin]) raises -> Span[UInt8, origin]:
+    for i in range(len(data)):
+        var b = data[i]
+        
+        if is_iso_8859_1_octet(b):
+            if b >= 0x80:
+                if b >= 0xC0 and b <= 0xF7:
+                    if i + 1 < len(data) and data[i + 1] == 0x0A:
+                        raise Error(
+                            "RFC 9112 violation: LF (0x0A) embedded in potential multibyte sequence at position " + 
+                            String(i + 1) + ". This creates security vulnerabilities."
+                        )
+                elif b >= 0x80 and b <= 0xBF:
+                    if i == 0 or (data[i - 1] < 0xC0):
+                        if i + 1 < len(data) and data[i + 1] == 0x0A:
+                            raise Error(
+                                "RFC 9112 violation: LF (0x0A) after invalid UTF-8 continuation byte at position " + 
+                                String(i + 1) + ". This creates security vulnerabilities."
+                            )
+            continue
+            
+        # This should never happen since is_iso_8859_1_octet covers 0x00-0xFF
+        raise Error(
+            "RFC 9112 violation: Invalid octet 0x" + hex(Int(b)) + 
+            " at position " + String(i) + 
+            ". HTTP messages must use encoding superset of US-ASCII."
+        )
+    
+    return data
\ No newline at end of file
diff --git a/tests/rfc/test_rfc9112_section_2_2_2.mojo b/tests/rfc/test_rfc9112_section_2_2_2.mojo
index d6f905c..b3c9993 100644
--- a/tests/rfc/test_rfc9112_section_2_2_2.mojo
+++ b/tests/rfc/test_rfc9112_section_2_2_2.mojo
@@ -12,41 +12,6 @@ from lightbug_http.io.bytes import Bytes, ByteReader
 from lightbug_http.http.request import HTTPRequest
 
 
-def test_rfc9112_octet_parsing_requirement():
-    """RFC 9112 Section 2.2-2: HTTP messages MUST be parsed as sequence of octets."""
-    print("Testing: HTTP message parsing as sequence of octets...")
-    
-    # Valid HTTP message with mixed ASCII and extended ASCII
-    var http_message = "GET /path HTTP/1.1\r\nHost: example.com\r\nX-Custom: café\r\n\r\n"
-    var octets = http_message.as_bytes()
-    
-    testing.assert_equal(octets[0], ord('G'))
-    testing.assert_equal(octets[1], ord('E'))
-    testing.assert_equal(octets[2], ord('T'))
-    testing.assert_equal(octets[3], ord(' '))
-    
-    var crlf_count = 0
-    for i in range(len(octets) - 1):
-        if octets[i] == 0x0D and octets[i + 1] == 0x0A:
-            crlf_count += 1
-    
-    testing.assert_true(crlf_count >= 3)
-
-
-def test_rfc9112_us_ascii_superset_encoding():
-    """RFC 9112 Section 2.2-2: Encoding MUST be superset of US-ASCII."""
-    print("Testing: US-ASCII superset encoding requirement...")
-    
-    # Test US-ASCII range (0x00-0x7F)
-    for i in range(0x80):
-        testing.assert_true(is_us_ascii_octet(UInt8(i)))
-    
-    # Test extended range for ISO-8859-1 (superset of US-ASCII)
-    for i in range(0x80, 0x100):
-        testing.assert_true(is_iso_8859_1_octet(UInt8(i)))
-        testing.assert_false(is_us_ascii_octet(UInt8(i)))
-
-
 def test_rfc9112_lf_security_vulnerability_prevention():
     """RFC 9112 Section 2.2-2: Prevent security vulnerabilities from LF (%x0A) in multibyte sequences."""
     print("Testing: LF security vulnerability prevention...")
@@ -65,7 +30,7 @@ def test_rfc9112_lf_security_vulnerability_prevention():
     var malicious_bytes = List[UInt8]()
     malicious_bytes.extend("GET /".as_bytes())
     malicious_bytes.append(0xC0)  # Invalid UTF-8 start byte
-    malicious_bytes.append(0x0A)  # LF embedded in "multibyte" sequence
+    malicious_bytes.append(0x0A)  # LF embedded in multibyte sequence
     malicious_bytes.append(0x80)  # Continuation byte
     malicious_bytes.extend(" HTTP/1.1\r\nHost: test.com\r\n\r\n".as_bytes())
     
@@ -86,7 +51,6 @@ def test_rfc9112_percent_encoding_fallback():
     """RFC 9112 Section 2.2-2: Test percent-encoding fallback for unsafe sequences."""
     print("Testing: Percent-encoding fallback for unsafe sequences...")
     
-    # Create byte sequence with characters that need percent-encoding
     var unsafe_bytes = List[UInt8]()
     unsafe_bytes.append(0x00)  # NULL byte
     unsafe_bytes.append(0x0A)  # LF
@@ -97,7 +61,6 @@ def test_rfc9112_percent_encoding_fallback():
     var unsafe_span = Span(unsafe_bytes)
     var encoded = percent_encode_invalid_octets(unsafe_span)
     
-    # Verify percent encoding with correct format
     testing.assert_true(encoded.find("%00") != -1, "Should encode NULL byte")
     testing.assert_true(encoded.find("%0A") != -1, "Should encode LF")
     testing.assert_true(encoded.find("%0D") != -1, "Should encode CR")
@@ -108,8 +71,6 @@ def test_rfc9112_percent_encoding_fallback():
 def main():
     print("🧪 Testing RFC 9112 Section 2.2-2: HTTP Message Parsing as Octets")
     
-    test_rfc9112_octet_parsing_requirement()
-    test_rfc9112_us_ascii_superset_encoding()
     test_rfc9112_lf_security_vulnerability_prevention()
     test_rfc9112_percent_encoding_fallback()
     

From cfe7cb6259d54384b5905c47ff4906a3be999708 Mon Sep 17 00:00:00 2001
From: Val <saviorand@gmail.com>
Date: Sun, 15 Jun 2025 16:20:36 +0200
Subject: [PATCH 05/12] revert changes

---
 lightbug_http/header.mojo  | 166 ++++++++++++-------------------------
 lightbug_http/strings.mojo |  12 ++-
 2 files changed, 59 insertions(+), 119 deletions(-)

diff --git a/lightbug_http/header.mojo b/lightbug_http/header.mojo
index 4c91c80..396fbdc 100644
--- a/lightbug_http/header.mojo
+++ b/lightbug_http/header.mojo
@@ -1,125 +1,67 @@
-from collections import Dict, Optional
-from lightbug_http.io.bytes import Bytes, ByteReader, ByteWriter, is_newline, is_space
-from lightbug_http.strings import BytesConstant, to_string_rfc9112_safe
-from lightbug_http._logger import logger
-from lightbug_http.strings import rChar, nChar, lineBreak, to_string
+from memory import Span
+from lightbug_http.io.bytes import Bytes, bytes, byte
 
+alias strSlash = "/"
+alias strHttp = "http"
+alias http = "http"
+alias strHttps = "https"
+alias https = "https"
+alias strHttp11 = "HTTP/1.1"
+alias strHttp10 = "HTTP/1.0"
 
-struct HeaderKey:
-    # TODO: Fill in more of these
-    alias CONNECTION = "connection"
-    alias CONTENT_TYPE = "content-type"
-    alias CONTENT_LENGTH = "content-length"
-    alias CONTENT_ENCODING = "content-encoding"
-    alias TRANSFER_ENCODING = "transfer-encoding"
-    alias DATE = "date"
-    alias LOCATION = "location"
-    alias HOST = "host"
-    alias SERVER = "server"
-    alias SET_COOKIE = "set-cookie"
-    alias COOKIE = "cookie"
+alias strMethodGet = "GET"
 
+alias rChar = "\r"
+alias nChar = "\n"
+alias lineBreak = rChar + nChar
+alias colonChar = ":"
 
-@value
-struct Header(Writable, Stringable):
-    var key: String
-    var value: String
+alias empty_string = ""
+alias whitespace = " "
+alias whitespace_byte = ord(whitespace)
+alias tab = "\t"
+alias tab_byte = ord(tab)
 
-    fn __str__(self) -> String:
-        return String.write(self)
 
-    fn write_to[T: Writer, //](self, mut writer: T):
-        writer.write(self.key + ": ", self.value, lineBreak)
+struct BytesConstant:
+    alias whitespace = byte(whitespace)
+    alias colon = byte(colonChar)
+    alias rChar = byte(rChar)
+    alias nChar = byte(nChar)
 
+    alias CRLF = bytes(lineBreak)
+    alias DOUBLE_CRLF = bytes(lineBreak + lineBreak)
 
-@always_inline
-fn write_header[T: Writer](mut writer: T, key: String, value: String):
-    writer.write(key + ": ", value, lineBreak)
 
+fn to_string[T: Writable](value: T) -> String:
+    return String.write(value)
 
-@value
-struct Headers(Writable, Stringable):
-    """Represents the header key/values in an http request/response.
 
-    Header keys are normalized to lowercase
+fn to_string(b: Span[UInt8]) -> String:
+    """Creates a String from a copy of the provided Span of bytes.
+
+    Args:
+        b: The Span of bytes to convert to a String.
     """
+    return String(StringSlice(unsafe_from_utf8=b))
+
 
-    var _inner: Dict[String, String]
-
-    fn __init__(out self):
-        self._inner = Dict[String, String]()
-
-    fn __init__(out self, owned *headers: Header):
-        self._inner = Dict[String, String]()
-        for header in headers:
-            self[header[].key.lower()] = header[].value
-
-    @always_inline
-    fn empty(self) -> Bool:
-        return len(self._inner) == 0
-
-    @always_inline
-    fn __contains__(self, key: String) -> Bool:
-        return key.lower() in self._inner
-
-    @always_inline
-    fn __getitem__(self, key: String) raises -> String:
-        try:
-            return self._inner[key.lower()]
-        except:
-            raise Error("KeyError: Key not found in headers: " + key)
-
-    @always_inline
-    fn get(self, key: String) -> Optional[String]:
-        return self._inner.get(key.lower())
-
-    @always_inline
-    fn __setitem__(mut self, key: String, value: String):
-        self._inner[key.lower()] = value
-
-    fn content_length(self) -> Int:
-        try:
-            return Int(self[HeaderKey.CONTENT_LENGTH])
-        except:
-            return 0
-
-    fn parse_raw(mut self, mut r: ByteReader) raises -> (String, String, String, List[String]):
-        var first_byte = r.peek()
-        if not first_byte:
-            raise Error("Headers.parse_raw: Failed to read first byte from response header")
-
-        var first = r.read_word()
-        r.increment()
-        var second = r.read_word()
-        r.increment()
-        var third = r.read_line()
-        var cookies = List[String]()
-
-        while not is_newline(r.peek()):
-            var key = r.read_until(BytesConstant.colon)
-            r.increment()
-            if is_space(r.peek()):
-                r.increment()
-            # TODO (bgreni): Handle possible trailing whitespace
-            var value = r.read_line()
-            
-            var k = to_string_rfc9112_safe(key._inner).lower()
-            if k == HeaderKey.SET_COOKIE:
-                cookies.append(to_string_rfc9112_safe(value._inner))
-                continue
-
-            self._inner[k] = to_string_rfc9112_safe(value._inner)
-            
-        return (
-            to_string_rfc9112_safe(first._inner), 
-            to_string_rfc9112_safe(second._inner), 
-            to_string_rfc9112_safe(third._inner), 
-            cookies
-        )
-
-    fn write_to[T: Writer, //](self, mut writer: T):
-        for header in self._inner.items():
-            write_header(writer, header[].key, header[].value)
-
-    fn __str__(self) -> String:
-        return String.write(self)
+fn to_string(owned bytes: Bytes) -> String:
+    """Creates a String from the provided List of bytes.
+    If you do not transfer ownership of the List, the List will be copied.
+
+    Args:
+        bytes: The List of bytes to convert to a String.
+    """
+    var result = String()
+    result.write_bytes(bytes)
+    return result^
+
+
+fn find_all(s: String, sub_str: String) -> List[Int]:
+    match_idxs = List[Int]()
+    var current_idx: Int = s.find(sub_str)
+    while current_idx > -1:
+        match_idxs.append(current_idx)
+        current_idx = s.find(sub_str, start=current_idx + 1)
+    return match_idxs^
\ No newline at end of file
diff --git a/lightbug_http/strings.mojo b/lightbug_http/strings.mojo
index 2b230ec..02e105c 100644
--- a/lightbug_http/strings.mojo
+++ b/lightbug_http/strings.mojo
@@ -60,7 +60,7 @@ fn to_string(b: Span[UInt8]) -> String:
 
 fn to_string_rfc9112_safe[origin: Origin](b: Span[UInt8, origin]) -> String:
     try:
-        var validated_span = validate_http_message_octets_rfc9112(b)
+        var validated_span = validate_message_octets_iso_8859_1(b)
         return String(StringSlice(unsafe_from_utf8=validated_span))
     except:
         return percent_encode_octets(b)
@@ -109,7 +109,7 @@ fn percent_encode_octets[origin: Origin](data: Span[UInt8, origin]) -> String:
     
     return result
 
-fn validate_http_message_octets_rfc9112[origin: Origin](data: Span[UInt8, origin]) raises -> Span[UInt8, origin]:
+fn validate_message_octets_iso_8859_1[origin: Origin](data: Span[UInt8, origin]) raises -> Span[UInt8, origin]:
     for i in range(len(data)):
         var b = data[i]
         
@@ -118,21 +118,19 @@ fn validate_http_message_octets_rfc9112[origin: Origin](data: Span[UInt8, origin
                 if b >= 0xC0 and b <= 0xF7:
                     if i + 1 < len(data) and data[i + 1] == 0x0A:
                         raise Error(
-                            "RFC 9112 violation: LF (0x0A) embedded in potential multibyte sequence at position " + 
-                            String(i + 1) + ". This creates security vulnerabilities."
+                            "."
                         )
                 elif b >= 0x80 and b <= 0xBF:
                     if i == 0 or (data[i - 1] < 0xC0):
                         if i + 1 < len(data) and data[i + 1] == 0x0A:
                             raise Error(
-                                "RFC 9112 violation: LF (0x0A) after invalid UTF-8 continuation byte at position " + 
-                                String(i + 1) + ". This creates security vulnerabilities."
+                                "."
                             )
             continue
             
         # This should never happen since is_iso_8859_1_octet covers 0x00-0xFF
         raise Error(
-            "RFC 9112 violation: Invalid octet 0x" + hex(Int(b)) + 
+            "Invalid octet 0x" + hex(Int(b)) + 
             " at position " + String(i) + 
             ". HTTP messages must use encoding superset of US-ASCII."
         )

From d84a8c93f85b7027d499d4436c24948de4abc25c Mon Sep 17 00:00:00 2001
From: Val <saviorand@gmail.com>
Date: Sun, 15 Jun 2025 16:21:17 +0200
Subject: [PATCH 06/12] revert header.mojo

---
 lightbug_http/header.mojo | 159 +++++++++++++++++++++++++-------------
 1 file changed, 105 insertions(+), 54 deletions(-)

diff --git a/lightbug_http/header.mojo b/lightbug_http/header.mojo
index 396fbdc..92c58ab 100644
--- a/lightbug_http/header.mojo
+++ b/lightbug_http/header.mojo
@@ -1,67 +1,118 @@
-from memory import Span
-from lightbug_http.io.bytes import Bytes, bytes, byte
+from collections import Dict, Optional
+from lightbug_http.io.bytes import Bytes, ByteReader, ByteWriter, is_newline, is_space
+from lightbug_http.strings import BytesConstant
+from lightbug_http._logger import logger
+from lightbug_http.strings import rChar, nChar, lineBreak, to_string
 
-alias strSlash = "/"
-alias strHttp = "http"
-alias http = "http"
-alias strHttps = "https"
-alias https = "https"
-alias strHttp11 = "HTTP/1.1"
-alias strHttp10 = "HTTP/1.0"
 
-alias strMethodGet = "GET"
+struct HeaderKey:
+    # TODO: Fill in more of these
+    alias CONNECTION = "connection"
+    alias CONTENT_TYPE = "content-type"
+    alias CONTENT_LENGTH = "content-length"
+    alias CONTENT_ENCODING = "content-encoding"
+    alias TRANSFER_ENCODING = "transfer-encoding"
+    alias DATE = "date"
+    alias LOCATION = "location"
+    alias HOST = "host"
+    alias SERVER = "server"
+    alias SET_COOKIE = "set-cookie"
+    alias COOKIE = "cookie"
 
-alias rChar = "\r"
-alias nChar = "\n"
-alias lineBreak = rChar + nChar
-alias colonChar = ":"
 
-alias empty_string = ""
-alias whitespace = " "
-alias whitespace_byte = ord(whitespace)
-alias tab = "\t"
-alias tab_byte = ord(tab)
+@value
+struct Header(Writable, Stringable):
+    var key: String
+    var value: String
 
+    fn __str__(self) -> String:
+        return String.write(self)
 
-struct BytesConstant:
-    alias whitespace = byte(whitespace)
-    alias colon = byte(colonChar)
-    alias rChar = byte(rChar)
-    alias nChar = byte(nChar)
+    fn write_to[T: Writer, //](self, mut writer: T):
+        writer.write(self.key + ": ", self.value, lineBreak)
 
-    alias CRLF = bytes(lineBreak)
-    alias DOUBLE_CRLF = bytes(lineBreak + lineBreak)
 
+@always_inline
+fn write_header[T: Writer](mut writer: T, key: String, value: String):
+    writer.write(key + ": ", value, lineBreak)
 
-fn to_string[T: Writable](value: T) -> String:
-    return String.write(value)
 
+@value
+struct Headers(Writable, Stringable):
+    """Represents the header key/values in an http request/response.
 
-fn to_string(b: Span[UInt8]) -> String:
-    """Creates a String from a copy of the provided Span of bytes.
-
-    Args:
-        b: The Span of bytes to convert to a String.
+    Header keys are normalized to lowercase
     """
-    return String(StringSlice(unsafe_from_utf8=b))
-
 
-fn to_string(owned bytes: Bytes) -> String:
-    """Creates a String from the provided List of bytes.
-    If you do not transfer ownership of the List, the List will be copied.
-
-    Args:
-        bytes: The List of bytes to convert to a String.
-    """
-    var result = String()
-    result.write_bytes(bytes)
-    return result^
-
-
-fn find_all(s: String, sub_str: String) -> List[Int]:
-    match_idxs = List[Int]()
-    var current_idx: Int = s.find(sub_str)
-    while current_idx > -1:
-        match_idxs.append(current_idx)
-        current_idx = s.find(sub_str, start=current_idx + 1)
-    return match_idxs^
\ No newline at end of file
+    var _inner: Dict[String, String]
+
+    fn __init__(out self):
+        self._inner = Dict[String, String]()
+
+    fn __init__(out self, owned *headers: Header):
+        self._inner = Dict[String, String]()
+        for header in headers:
+            self[header[].key.lower()] = header[].value
+
+    @always_inline
+    fn empty(self) -> Bool:
+        return len(self._inner) == 0
+
+    @always_inline
+    fn __contains__(self, key: String) -> Bool:
+        return key.lower() in self._inner
+
+    @always_inline
+    fn __getitem__(self, key: String) raises -> String:
+        try:
+            return self._inner[key.lower()]
+        except:
+            raise Error("KeyError: Key not found in headers: " + key)
+
+    @always_inline
+    fn get(self, key: String) -> Optional[String]:
+        return self._inner.get(key.lower())
+
+    @always_inline
+    fn __setitem__(mut self, key: String, value: String):
+        self._inner[key.lower()] = value
+
+    fn content_length(self) -> Int:
+        try:
+            return Int(self[HeaderKey.CONTENT_LENGTH])
+        except:
+            return 0
+
+    fn parse_raw(mut self, mut r: ByteReader) raises -> (String, String, String, List[String]):
+        var first_byte = r.peek()
+        if not first_byte:
+            raise Error("Headers.parse_raw: Failed to read first byte from response header")
+
+        var first = r.read_word()
+        r.increment()
+        var second = r.read_word()
+        r.increment()
+        var third = r.read_line()
+        var cookies = List[String]()
+
+        while not is_newline(r.peek()):
+            var key = r.read_until(BytesConstant.colon)
+            r.increment()
+            if is_space(r.peek()):
+                r.increment()
+            # TODO (bgreni): Handle possible trailing whitespace
+            var value = r.read_line()
+            var k = String(key).lower()
+            if k == HeaderKey.SET_COOKIE:
+                cookies.append(String(value))
+                continue
+
+            self._inner[k] = String(value)
+        return (String(first), String(second), String(third), cookies)
+
+    fn write_to[T: Writer, //](self, mut writer: T):
+        for header in self._inner.items():
+            write_header(writer, header[].key, header[].value)
+
+    fn __str__(self) -> String:
+        return String.write(self)
\ No newline at end of file

From 198569abaa23c14bb196a90ed81796f5d079283a Mon Sep 17 00:00:00 2001
From: Val <saviorand@gmail.com>
Date: Sun, 15 Jun 2025 16:21:49 +0200
Subject: [PATCH 07/12] revert strings.mojo

---
 lightbug_http/strings.mojo | 73 +-------------------------------------
 1 file changed, 1 insertion(+), 72 deletions(-)

diff --git a/lightbug_http/strings.mojo b/lightbug_http/strings.mojo
index 02e105c..396fbdc 100644
--- a/lightbug_http/strings.mojo
+++ b/lightbug_http/strings.mojo
@@ -33,18 +33,6 @@ struct BytesConstant:
     alias DOUBLE_CRLF = bytes(lineBreak + lineBreak)
 
 
-alias US_ASCII_MAX = 0x7F
-alias ISO_8859_1_MAX = 0xFF
-
-
-fn is_us_ascii_octet(b: UInt8) -> Bool:
-    return b <= US_ASCII_MAX
-
-
-fn is_iso_8859_1_octet(b: UInt8) -> Bool:
-    return b <= ISO_8859_1_MAX
-
-
 fn to_string[T: Writable](value: T) -> String:
     return String.write(value)
 
@@ -58,14 +46,6 @@ fn to_string(b: Span[UInt8]) -> String:
     return String(StringSlice(unsafe_from_utf8=b))
 
 
-fn to_string_rfc9112_safe[origin: Origin](b: Span[UInt8, origin]) -> String:
-    try:
-        var validated_span = validate_message_octets_iso_8859_1(b)
-        return String(StringSlice(unsafe_from_utf8=validated_span))
-    except:
-        return percent_encode_octets(b)
-
-
 fn to_string(owned bytes: Bytes) -> String:
     """Creates a String from the provided List of bytes.
     If you do not transfer ownership of the List, the List will be copied.
@@ -84,55 +64,4 @@ fn find_all(s: String, sub_str: String) -> List[Int]:
     while current_idx > -1:
         match_idxs.append(current_idx)
         current_idx = s.find(sub_str, start=current_idx + 1)
-    return match_idxs^
-
-
-fn percent_encode_octets[origin: Origin](data: Span[UInt8, origin]) -> String:
-    var result = String()
-    
-    for i in range(len(data)):
-        var b = data[i]
-        
-        if is_us_ascii_octet(b) and b >= 0x20 and b != 0x25:  # Printable ASCII except %
-            result += chr(Int(b))
-        else:
-            # Fix hex formatting: ensure proper zero-padding
-            var hex_val = hex(Int(b)).upper()
-            # Remove "0X" prefix if present
-            if hex_val.startswith("0X"):
-                hex_val = hex_val[2:]
-            # Ensure two-digit hex format
-            if len(hex_val) == 1:
-                result += "%0" + hex_val
-            else:
-                result += "%" + hex_val
-    
-    return result
-
-fn validate_message_octets_iso_8859_1[origin: Origin](data: Span[UInt8, origin]) raises -> Span[UInt8, origin]:
-    for i in range(len(data)):
-        var b = data[i]
-        
-        if is_iso_8859_1_octet(b):
-            if b >= 0x80:
-                if b >= 0xC0 and b <= 0xF7:
-                    if i + 1 < len(data) and data[i + 1] == 0x0A:
-                        raise Error(
-                            "."
-                        )
-                elif b >= 0x80 and b <= 0xBF:
-                    if i == 0 or (data[i - 1] < 0xC0):
-                        if i + 1 < len(data) and data[i + 1] == 0x0A:
-                            raise Error(
-                                "."
-                            )
-            continue
-            
-        # This should never happen since is_iso_8859_1_octet covers 0x00-0xFF
-        raise Error(
-            "Invalid octet 0x" + hex(Int(b)) + 
-            " at position " + String(i) + 
-            ". HTTP messages must use encoding superset of US-ASCII."
-        )
-    
-    return data
\ No newline at end of file
+    return match_idxs^
\ No newline at end of file

From 99553ad977a59e6f0b6c9e28d7560e31fee47212 Mon Sep 17 00:00:00 2001
From: Val <saviorand@gmail.com>
Date: Sun, 15 Jun 2025 16:22:14 +0200
Subject: [PATCH 08/12] add back newlines

---
 lightbug_http/header.mojo  | 2 +-
 lightbug_http/strings.mojo | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/lightbug_http/header.mojo b/lightbug_http/header.mojo
index 92c58ab..4014a56 100644
--- a/lightbug_http/header.mojo
+++ b/lightbug_http/header.mojo
@@ -115,4 +115,4 @@ struct Headers(Writable, Stringable):
             write_header(writer, header[].key, header[].value)
 
     fn __str__(self) -> String:
-        return String.write(self)
\ No newline at end of file
+        return String.write(self)
diff --git a/lightbug_http/strings.mojo b/lightbug_http/strings.mojo
index 396fbdc..56c3883 100644
--- a/lightbug_http/strings.mojo
+++ b/lightbug_http/strings.mojo
@@ -64,4 +64,4 @@ fn find_all(s: String, sub_str: String) -> List[Int]:
     while current_idx > -1:
         match_idxs.append(current_idx)
         current_idx = s.find(sub_str, start=current_idx + 1)
-    return match_idxs^
\ No newline at end of file
+    return match_idxs^

From 31c764687d9bb4afd800484a226bf26b629d1767 Mon Sep 17 00:00:00 2001
From: Val <saviorand@gmail.com>
Date: Sun, 15 Jun 2025 16:23:40 +0200
Subject: [PATCH 09/12] remove the test

---
 tests/rfc/test_rfc9112_section_2_2_2.mojo | 69 -----------------------
 1 file changed, 69 deletions(-)

diff --git a/tests/rfc/test_rfc9112_section_2_2_2.mojo b/tests/rfc/test_rfc9112_section_2_2_2.mojo
index b3c9993..50bf836 100644
--- a/tests/rfc/test_rfc9112_section_2_2_2.mojo
+++ b/tests/rfc/test_rfc9112_section_2_2_2.mojo
@@ -1,77 +1,8 @@
 import testing
-from memory import Span
-from lightbug_http.strings import (
-    validate_http_message_octets,
-    safe_to_string_rfc9112,
-    to_string_rfc9112_safe,
-    is_us_ascii_octet,
-    is_iso_8859_1_octet,
-    percent_encode_invalid_octets,
-)
-from lightbug_http.io.bytes import Bytes, ByteReader
-from lightbug_http.http.request import HTTPRequest
-
-
-def test_rfc9112_lf_security_vulnerability_prevention():
-    """RFC 9112 Section 2.2-2: Prevent security vulnerabilities from LF (%x0A) in multibyte sequences."""
-    print("Testing: LF security vulnerability prevention...")
-    
-    # Valid LF in HTTP context
-    var valid_http = "GET /test HTTP/1.1\r\nHost: test.com\r\n\r\n"
-    var valid_octets = valid_http.as_bytes()
-    
-    try:
-        var validated = validate_http_message_octets(Span(valid_octets))
-        testing.assert_equal(len(validated), len(valid_octets))
-    except e:
-        testing.assert_true(False, "Valid HTTP message should not raise error: " + String(e))
-    
-    # Invalid multibyte sequence containing LF
-    var malicious_bytes = List[UInt8]()
-    malicious_bytes.extend("GET /".as_bytes())
-    malicious_bytes.append(0xC0)  # Invalid UTF-8 start byte
-    malicious_bytes.append(0x0A)  # LF embedded in multibyte sequence
-    malicious_bytes.append(0x80)  # Continuation byte
-    malicious_bytes.extend(" HTTP/1.1\r\nHost: test.com\r\n\r\n".as_bytes())
-    
-    var malicious_span = Span(malicious_bytes)
-    
-    try:
-        var validated = validate_http_message_octets(malicious_span)
-        testing.assert_true(False, "Should have rejected invalid multibyte sequence with embedded LF")
-    except e:
-        testing.assert_true(True, "Correctly rejected invalid sequence: " + String(e))
-    
-    var safe_result = to_string_rfc9112_safe(malicious_span)
-    
-    testing.assert_true(safe_result.find("%") != -1, "Should percent-encode unsafe sequences")
-
-
-def test_rfc9112_percent_encoding_fallback():
-    """RFC 9112 Section 2.2-2: Test percent-encoding fallback for unsafe sequences."""
-    print("Testing: Percent-encoding fallback for unsafe sequences...")
-    
-    var unsafe_bytes = List[UInt8]()
-    unsafe_bytes.append(0x00)  # NULL byte
-    unsafe_bytes.append(0x0A)  # LF
-    unsafe_bytes.append(0x0D)  # CR
-    unsafe_bytes.append(0x25)  # % (should be encoded)
-    unsafe_bytes.append(0xFF)  # High byte
-    
-    var unsafe_span = Span(unsafe_bytes)
-    var encoded = percent_encode_invalid_octets(unsafe_span)
-    
-    testing.assert_true(encoded.find("%00") != -1, "Should encode NULL byte")
-    testing.assert_true(encoded.find("%0A") != -1, "Should encode LF")
-    testing.assert_true(encoded.find("%0D") != -1, "Should encode CR")
-    testing.assert_true(encoded.find("%25") != -1, "Should encode % character")
-    testing.assert_true(encoded.find("%FF") != -1, "Should encode high byte")
 
 
 def main():
     print("🧪 Testing RFC 9112 Section 2.2-2: HTTP Message Parsing as Octets")
     
-    test_rfc9112_lf_security_vulnerability_prevention()
-    test_rfc9112_percent_encoding_fallback()
     
     print("\n✅ RFC 9112 Section 2.2-2 requirement fully verified")
\ No newline at end of file

From 6639e29c5b3b2ba8127598e1cc745caabf7f0c1b Mon Sep 17 00:00:00 2001
From: Val <saviorand@gmail.com>
Date: Sun, 15 Jun 2025 20:09:23 +0200
Subject: [PATCH 10/12] switch parse raw to bytes

---
 lightbug_http/header.mojo            |  75 ++++++++---
 lightbug_http/http/response.mojo     |   4 +-
 magic.lock                           | 181 +++++++++++++++++++++++++++
 tests/lightbug_http/test_header.mojo |   6 +-
 4 files changed, 246 insertions(+), 20 deletions(-)

diff --git a/lightbug_http/header.mojo b/lightbug_http/header.mojo
index 4014a56..4208d13 100644
--- a/lightbug_http/header.mojo
+++ b/lightbug_http/header.mojo
@@ -1,5 +1,5 @@
 from collections import Dict, Optional
-from lightbug_http.io.bytes import Bytes, ByteReader, ByteWriter, is_newline, is_space
+from lightbug_http.io.bytes import Bytes, ByteReader, ByteWriter, is_newline, is_space, ByteView
 from lightbug_http.strings import BytesConstant
 from lightbug_http._logger import logger
 from lightbug_http.strings import rChar, nChar, lineBreak, to_string
@@ -37,22 +37,56 @@ fn write_header[T: Writer](mut writer: T, key: String, value: String):
     writer.write(key + ": ", value, lineBreak)
 
 
+fn bytes_equal_ignore_case(a: ByteView, b: String) -> Bool:
+    """Compare ByteView with String case-insensitively without creating intermediate strings."""
+    if len(a) != len(b):
+        return False
+    
+    for i in range(len(a)):
+        var byte_a = a[i]
+        var byte_b = ord(b[i])
+        
+        # Convert to lowercase for comparison
+        if byte_a >= ord('A') and byte_a <= ord('Z'):
+            byte_a = byte_a + 32  # Convert to lowercase
+        if byte_b >= ord('A') and byte_b <= ord('Z'):
+            byte_b = byte_b + 32  # Convert to lowercase
+            
+        if byte_a != byte_b:
+            return False
+    return True
+
+
+fn bytes_to_lower_string(b: ByteView) -> String:
+    """Convert ByteView to lowercase String."""
+    var result = Bytes()
+    for i in range(len(b)):
+        var byte_val = b[i]
+        if byte_val >= ord('A') and byte_val <= ord('Z'):
+            byte_val = byte_val + 32  # Convert to lowercase
+        result.append(byte_val)
+    return to_string(result^)
+
+
 @value
-struct Headers(Writable, Stringable):
+struct Headers[origin: Origin](Writable, Stringable):
     """Represents the header key/values in an http request/response.
 
-    Header keys are normalized to lowercase
+    Header keys are normalized to lowercase and stored as strings for efficient lookup,
+    while values are stored as bytes to comply with RFC requirements.
     """
 
-    var _inner: Dict[String, String]
+    var _inner: Dict[String, Bytes]
 
     fn __init__(out self):
-        self._inner = Dict[String, String]()
+        self._inner = Dict[String, Bytes]()
 
     fn __init__(out self, owned *headers: Header):
-        self._inner = Dict[String, String]()
+        self._inner = Dict[String, Bytes]()
         for header in headers:
-            self[header[].key.lower()] = header[].value
+            var key_lower = header[].key.lower()
+            var value_bytes = Bytes(header[].value.as_bytes())
+            self._inner[key_lower] = value_bytes
 
     @always_inline
     fn empty(self) -> Bool:
@@ -65,17 +99,22 @@ struct Headers(Writable, Stringable):
     @always_inline
     fn __getitem__(self, key: String) raises -> String:
         try:
-            return self._inner[key.lower()]
+            var value_bytes = self._inner[key.lower()]
+            return to_string(value_bytes)
         except:
             raise Error("KeyError: Key not found in headers: " + key)
 
     @always_inline
     fn get(self, key: String) -> Optional[String]:
-        return self._inner.get(key.lower())
+        var value_opt = self._inner.get(key.lower())
+        if value_opt:
+            return to_string(value_opt.value())
+        return None
 
     @always_inline
     fn __setitem__(mut self, key: String, value: String):
-        self._inner[key.lower()] = value
+        var value_bytes = Bytes(value.as_bytes())
+        self._inner[key.lower()] = value_bytes
 
     fn content_length(self) -> Int:
         try:
@@ -83,7 +122,7 @@ struct Headers(Writable, Stringable):
         except:
             return 0
 
-    fn parse_raw(mut self, mut r: ByteReader) raises -> (String, String, String, List[String]):
+    fn parse_raw[origin: Origin](mut self, mut r: ByteReader[origin]) raises -> (ByteView[origin], ByteView[origin], ByteView[origin], List[String]):
         var first_byte = r.peek()
         if not first_byte:
             raise Error("Headers.parse_raw: Failed to read first byte from response header")
@@ -102,17 +141,21 @@ struct Headers(Writable, Stringable):
                 r.increment()
             # TODO (bgreni): Handle possible trailing whitespace
             var value = r.read_line()
-            var k = String(key).lower()
-            if k == HeaderKey.SET_COOKIE:
+            
+            if bytes_equal_ignore_case(key, HeaderKey.SET_COOKIE):
                 cookies.append(String(value))
                 continue
 
-            self._inner[k] = String(value)
-        return (String(first), String(second), String(third), cookies)
+            var key_str = bytes_to_lower_string(key)
+            var value_bytes = value.to_bytes()
+            self._inner[key_str] = value_bytes
+            
+        return (first, second, third, cookies)
 
     fn write_to[T: Writer, //](self, mut writer: T):
         for header in self._inner.items():
-            write_header(writer, header[].key, header[].value)
+            var value_str = to_string(header[].value)
+            write_header(writer, header[].key, value_str)
 
     fn __str__(self) -> String:
         return String.write(self)
diff --git a/lightbug_http/http/response.mojo b/lightbug_http/http/response.mojo
index c8cd2cb..3a98b69 100644
--- a/lightbug_http/http/response.mojo
+++ b/lightbug_http/http/response.mojo
@@ -46,7 +46,7 @@ struct HTTPResponse(Writable, Stringable):
 
         try:
             var properties = headers.parse_raw(reader)
-            protocol, status_code, status_text = properties[0], properties[1], properties[2]
+            protocol, status_code, status_text = String(properties[0]), String(properties[1]), String(properties[2])
             cookies.from_headers(properties[3])
             reader.skip_carriage_return()
         except e:
@@ -76,7 +76,7 @@ struct HTTPResponse(Writable, Stringable):
 
         try:
             var properties = headers.parse_raw(reader)
-            protocol, status_code, status_text = properties[0], properties[1], properties[2]
+            protocol, status_code, status_text = String(properties[0]), String(properties[1]), String(properties[2])
             cookies.from_headers(properties[3])
             reader.skip_carriage_return()
         except e:
diff --git a/magic.lock b/magic.lock
index 35eb6bc..a7c0339 100644
--- a/magic.lock
+++ b/magic.lock
@@ -699,6 +699,187 @@ environments:
       - conda: https://conda.anaconda.org/conda-forge/osx-arm64/zeromq-4.3.5-hc1bb282_7.conda
       - conda: https://conda.anaconda.org/conda-forge/noarch/zipp-3.21.0-pyhd8ed1ab_1.conda
       - conda: https://conda.anaconda.org/conda-forge/osx-arm64/zstandard-0.23.0-py312hea69d52_1.conda
+  rfc-tests:
+    channels:
+    - url: https://conda.anaconda.org/conda-forge/
+    - url: https://conda.modular.com/max/
+    - url: https://repo.prefix.dev/modular-community/
+    packages:
+      linux-64:
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h4bc722e_7.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.1.31-hbd8a1cb_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/click-8.1.8-pyh707e725_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cpython-3.12.10-py312hd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-8.6.1-pyha770c72_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_client-8.6.3-pyhd8ed1ab_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_core-5.7.2-pyh31011fe_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.3-h659f571_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.43-h712a8e2_4.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-31_h59b9bed_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-31_he106b2a_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20250104-pl5321h7949ede_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.0-h5888daf_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.6-h2dba641_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-14.2.0-h767d61c_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-14.2.0-h69a702a_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libgfortran-14.2.0-h69a702a_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-14.2.0-hf1ad2bd_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libgomp-14.2.0-h767d61c_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-31_h7ac8fdf_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.1-hb9d3cd8_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.29-pthreads_h94d23a6_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libsodium-1.0.20-h4ab18f5_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.49.1-hee588c1_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-14.2.0-h8f9b012_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-14.2.0-h4852527_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-hb9d3cd8_2.conda
+      - conda: https://conda.modular.com/max/noarch/max-25.3.0-release.conda
+      - conda: https://conda.modular.com/max/linux-64/max-core-25.3.0-release.conda
+      - conda: https://conda.modular.com/max/linux-64/max-python-25.3.0-release.conda
+      - conda: https://conda.modular.com/max/noarch/mblack-25.3.0-release.conda
+      - conda: https://conda.modular.com/max/noarch/mojo-jupyter-25.3.0-release.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/mypy_extensions-1.0.0-pyha770c72_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h2d0b736_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/numpy-1.26.4-py312heda63a1_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/openssl-3.5.0-h7b32b05_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-25.0-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/pathspec-0.12.1-pyhd8ed1ab_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/platformdirs-4.3.7-pyh29332c3_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/python-3.12.10-h9e4cc4f_0_cpython.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0.post0-pyhff2d567_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/python-gil-3.12.10-hd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/python_abi-3.12-7_cp312.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/pyzmq-26.4.0-py312hbf22597_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8c095d6_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/six-1.17.0-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/tornado-6.4.2-py312h66e93f0_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/tqdm-4.67.1-pyhd8ed1ab_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/traitlets-5.14.3-pyhd8ed1ab_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.13.2-pyh29332c3_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-64/zeromq-4.3.5-h3b0a872_7.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/zipp-3.21.0-pyhd8ed1ab_1.conda
+      linux-aarch64:
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/_openmp_mutex-4.5-2_gnu.tar.bz2
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/bzip2-1.0.8-h68df207_7.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.1.31-hbd8a1cb_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/click-8.1.8-pyh707e725_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cpython-3.12.10-py312hd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-8.6.1-pyha770c72_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_client-8.6.3-pyhd8ed1ab_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_core-5.7.2-pyh31011fe_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/keyutils-1.6.1-h4e544f5_0.tar.bz2
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/krb5-1.21.3-h50a48e9_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/ld_impl_linux-aarch64-2.43-h80caac9_4.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libblas-3.9.0-31_h1a9f1db_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcblas-3.9.0-31_hab92f65_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libedit-3.1.20250104-pl5321h976ea20_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libexpat-2.7.0-h5ad3122_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libffi-3.4.6-he21f813_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-14.2.0-he277a41_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-ng-14.2.0-he9431aa_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgfortran-14.2.0-he9431aa_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgfortran5-14.2.0-hb6113d0_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgomp-14.2.0-he277a41_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/liblapack-3.9.0-31_h411afd4_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/liblzma-5.8.1-h86ecc28_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libnsl-2.0.1-h31becfc_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libopenblas-0.3.29-pthreads_h9d3fd7e_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libsodium-1.0.20-h68df207_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libsqlite-3.49.1-h5eb1b54_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libstdcxx-14.2.0-h3f4de04_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libstdcxx-ng-14.2.0-hf1166c9_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libuuid-2.38.1-hb4cce97_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libxcrypt-4.4.36-h31becfc_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libzlib-1.3.1-h86ecc28_2.conda
+      - conda: https://conda.modular.com/max/noarch/max-25.3.0-release.conda
+      - conda: https://conda.modular.com/max/linux-aarch64/max-core-25.3.0-release.conda
+      - conda: https://conda.modular.com/max/linux-aarch64/max-python-25.3.0-release.conda
+      - conda: https://conda.modular.com/max/noarch/mblack-25.3.0-release.conda
+      - conda: https://conda.modular.com/max/noarch/mojo-jupyter-25.3.0-release.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/mypy_extensions-1.0.0-pyha770c72_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/ncurses-6.5-ha32ae93_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/numpy-1.26.4-py312h470d778_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/openssl-3.5.0-hd08dc88_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-25.0-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/pathspec-0.12.1-pyhd8ed1ab_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/platformdirs-4.3.7-pyh29332c3_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/python-3.12.10-h1683364_0_cpython.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0.post0-pyhff2d567_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/python-gil-3.12.10-hd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/python_abi-3.12-7_cp312.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/pyzmq-26.4.0-py312h2427ae1_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/readline-8.2-h8382b9d_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/six-1.17.0-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/tk-8.6.13-h194ca79_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/tornado-6.4.2-py312h52516f5_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/tqdm-4.67.1-pyhd8ed1ab_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/traitlets-5.14.3-pyhd8ed1ab_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.13.2-pyh29332c3_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/zeromq-4.3.5-h5efb499_7.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/zipp-3.21.0-pyhd8ed1ab_1.conda
+      osx-arm64:
+      - conda: https://conda.anaconda.org/conda-forge/osx-arm64/bzip2-1.0.8-h99b78c6_7.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.1.31-hbd8a1cb_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/click-8.1.8-pyh707e725_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/cpython-3.12.10-py312hd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-8.6.1-pyha770c72_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_client-8.6.3-pyhd8ed1ab_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_core-5.7.2-pyh31011fe_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/osx-arm64/krb5-1.21.3-h237132a_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libblas-3.9.0-31_h10e41b3_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcblas-3.9.0-31_hb3479ef_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcxx-20.1.3-ha82da77_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libedit-3.1.20250104-pl5321hafb1f1b_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libexpat-2.7.0-h286801f_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libffi-3.4.6-h1da3d7d_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libgfortran-14.2.0-heb5dd2a_105.conda
+      - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libgfortran5-14.2.0-h2c44a93_105.conda
+      - conda: https://conda.anaconda.org/conda-forge/osx-arm64/liblapack-3.9.0-31_hc9a63f6_openblas.conda
+      - conda: https://conda.anaconda.org/conda-forge/osx-arm64/liblzma-5.8.1-h39f12f2_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libopenblas-0.3.29-openmp_hf332438_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libsodium-1.0.20-h99b78c6_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libsqlite-3.49.1-h3f77e49_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libzlib-1.3.1-h8359307_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/osx-arm64/llvm-openmp-20.1.3-hdb05f8b_0.conda
+      - conda: https://conda.modular.com/max/noarch/max-25.3.0-release.conda
+      - conda: https://conda.modular.com/max/osx-arm64/max-core-25.3.0-release.conda
+      - conda: https://conda.modular.com/max/osx-arm64/max-python-25.3.0-release.conda
+      - conda: https://conda.modular.com/max/noarch/mblack-25.3.0-release.conda
+      - conda: https://conda.modular.com/max/noarch/mojo-jupyter-25.3.0-release.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/mypy_extensions-1.0.0-pyha770c72_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/osx-arm64/ncurses-6.5-h5e97a16_3.conda
+      - conda: https://conda.anaconda.org/conda-forge/osx-arm64/numpy-1.26.4-py312h8442bc7_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/osx-arm64/openssl-3.5.0-h81ee809_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-25.0-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/pathspec-0.12.1-pyhd8ed1ab_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/platformdirs-4.3.7-pyh29332c3_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/osx-arm64/python-3.12.10-hc22306f_0_cpython.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0.post0-pyhff2d567_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/python-gil-3.12.10-hd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/python_abi-3.12-7_cp312.conda
+      - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pyzmq-26.4.0-py312hf4875e0_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/osx-arm64/readline-8.2-h1d1bf99_2.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/six-1.17.0-pyhd8ed1ab_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/osx-arm64/tk-8.6.13-h5083fa2_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/osx-arm64/tornado-6.4.2-py312hea69d52_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/tqdm-4.67.1-pyhd8ed1ab_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/traitlets-5.14.3-pyhd8ed1ab_1.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.13.2-pyh29332c3_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda
+      - conda: https://conda.anaconda.org/conda-forge/osx-arm64/zeromq-4.3.5-hc1bb282_7.conda
+      - conda: https://conda.anaconda.org/conda-forge/noarch/zipp-3.21.0-pyhd8ed1ab_1.conda
   unit-tests:
     channels:
     - url: https://conda.anaconda.org/conda-forge/
diff --git a/tests/lightbug_http/test_header.mojo b/tests/lightbug_http/test_header.mojo
index d790006..5d0f88e 100644
--- a/tests/lightbug_http/test_header.mojo
+++ b/tests/lightbug_http/test_header.mojo
@@ -21,7 +21,8 @@ def test_parse_request_header():
     var protocol: String
     var uri: String
     var properties = header.parse_raw(reader)
-    method, uri, protocol = properties[0], properties[1], properties[2]
+    # Convert ByteView to String for comparison
+    method, uri, protocol = String(properties[0]), String(properties[1]), String(properties[2])
     assert_equal(uri, "/index.html")
     assert_equal(protocol, "HTTP/1.1")
     assert_equal(method, "GET")
@@ -40,7 +41,8 @@ def test_parse_response_header():
     var status_text: String
     var reader = ByteReader(headers_str.as_bytes())
     var properties = header.parse_raw(reader)
-    protocol, status_code, status_text = properties[0], properties[1], properties[2]
+    # Convert ByteView to String for comparison
+    protocol, status_code, status_text = String(properties[0]), String(properties[1]), String(properties[2])
     assert_equal(protocol, "HTTP/1.1")
     assert_equal(status_code, "200")
     assert_equal(status_text, "OK")

From 790ae034ea6fb696032566b9f88822b200c7bc6c Mon Sep 17 00:00:00 2001
From: Val <saviorand@gmail.com>
Date: Sun, 15 Jun 2025 20:55:24 +0200
Subject: [PATCH 11/12] wip switching to byteview

---
 lightbug_http/header.mojo       | 35 ++---------------------------
 lightbug_http/http/request.mojo | 39 ++++++++++++++++++++-------------
 lightbug_http/io/bytes.mojo     | 31 ++++++++++++++++++++++++++
 3 files changed, 57 insertions(+), 48 deletions(-)

diff --git a/lightbug_http/header.mojo b/lightbug_http/header.mojo
index 4208d13..c769694 100644
--- a/lightbug_http/header.mojo
+++ b/lightbug_http/header.mojo
@@ -1,5 +1,5 @@
 from collections import Dict, Optional
-from lightbug_http.io.bytes import Bytes, ByteReader, ByteWriter, is_newline, is_space, ByteView
+from lightbug_http.io.bytes import Bytes, ByteReader, ByteWriter, is_newline, is_space, ByteView, bytes_equal_ignore_case, bytes_to_lower_string
 from lightbug_http.strings import BytesConstant
 from lightbug_http._logger import logger
 from lightbug_http.strings import rChar, nChar, lineBreak, to_string
@@ -37,42 +37,11 @@ fn write_header[T: Writer](mut writer: T, key: String, value: String):
     writer.write(key + ": ", value, lineBreak)
 
 
-fn bytes_equal_ignore_case(a: ByteView, b: String) -> Bool:
-    """Compare ByteView with String case-insensitively without creating intermediate strings."""
-    if len(a) != len(b):
-        return False
-    
-    for i in range(len(a)):
-        var byte_a = a[i]
-        var byte_b = ord(b[i])
-        
-        # Convert to lowercase for comparison
-        if byte_a >= ord('A') and byte_a <= ord('Z'):
-            byte_a = byte_a + 32  # Convert to lowercase
-        if byte_b >= ord('A') and byte_b <= ord('Z'):
-            byte_b = byte_b + 32  # Convert to lowercase
-            
-        if byte_a != byte_b:
-            return False
-    return True
-
-
-fn bytes_to_lower_string(b: ByteView) -> String:
-    """Convert ByteView to lowercase String."""
-    var result = Bytes()
-    for i in range(len(b)):
-        var byte_val = b[i]
-        if byte_val >= ord('A') and byte_val <= ord('Z'):
-            byte_val = byte_val + 32  # Convert to lowercase
-        result.append(byte_val)
-    return to_string(result^)
-
-
 @value
 struct Headers[origin: Origin](Writable, Stringable):
     """Represents the header key/values in an http request/response.
 
-    Header keys are normalized to lowercase and stored as strings for efficient lookup,
+    Header keys are normalized to lowercase and stored as strings,
     while values are stored as bytes to comply with RFC requirements.
     """
 
diff --git a/lightbug_http/http/request.mojo b/lightbug_http/http/request.mojo
index f678d56..92f652f 100644
--- a/lightbug_http/http/request.mojo
+++ b/lightbug_http/http/request.mojo
@@ -1,5 +1,5 @@
 from memory import Span
-from lightbug_http.io.bytes import Bytes, bytes, ByteReader, ByteWriter
+from lightbug_http.io.bytes import Bytes, bytes, ByteReader, ByteWriter, ByteView
 from lightbug_http.header import Headers, HeaderKey, Header, write_header
 from lightbug_http.cookie import RequestCookieJar
 from lightbug_http.uri import URI
@@ -30,29 +30,30 @@ struct RequestMethod:
     alias options = RequestMethod("OPTIONS")
 
 
-@value
-struct HTTPRequest(Writable, Stringable):
-    var headers: Headers
+struct HTTPRequest[origin: Origin](Writable, Stringable):
+    var headers: Headers[origin]
     var cookies: RequestCookieJar
     var uri: URI
     var body_raw: Bytes
 
-    var method: String
-    var protocol: String
+    var method: ByteView[origin]
+    var protocol: ByteView[origin]
 
     var server_is_tls: Bool
     var timeout: Duration
 
     @staticmethod
-    fn from_bytes(addr: String, max_body_size: Int, b: Span[Byte]) raises -> HTTPRequest:
+    fn from_bytes(addr: String, max_body_size: Int, b: Span[Byte]) raises -> HTTPRequest[origin]:
         var reader = ByteReader(b)
-        var headers = Headers()
-        var method: String
-        var protocol: String
-        var uri: String
+        var headers = Headers[origin]()
+        var method: ByteView[origin]
+        var protocol: ByteView[origin]
+        var uri: ByteView[origin]
         try:
             var rest = headers.parse_raw(reader)
-            method, uri, protocol = rest[0], rest[1], rest[2]
+            var method = rest[0]
+            var uri = rest[1]
+            var protocol = rest[2]
         except e:
             raise Error("HTTPRequest.from_bytes: Failed to parse request headers: " + String(e))
 
@@ -67,7 +68,7 @@ struct HTTPRequest(Writable, Stringable):
             raise Error("HTTPRequest.from_bytes: Request body too large.")
 
         var request = HTTPRequest(
-            URI.parse(addr + uri), headers=headers, method=method, protocol=protocol, cookies=cookies
+            URI.parse(addr + String(uri)), headers=headers, method=String(method), protocol=String(protocol), cookies=cookies
         )
 
         if content_length > 0:
@@ -82,7 +83,7 @@ struct HTTPRequest(Writable, Stringable):
     fn __init__(
         out self,
         uri: URI,
-        headers: Headers = Headers(),
+        headers: Headers[origin] = Headers[origin](),
         cookies: RequestCookieJar = RequestCookieJar(),
         method: String = "GET",
         protocol: String = strHttp11,
@@ -92,7 +93,7 @@ struct HTTPRequest(Writable, Stringable):
     ):
         self.headers = headers
         self.cookies = cookies
-        self.method = method
+        self.method = ByteView(method.as_bytes())
         self.protocol = protocol
         self.uri = uri
         self.body_raw = body
@@ -108,6 +109,14 @@ struct HTTPRequest(Writable, Stringable):
             else:
                 self.headers[HeaderKey.HOST] = uri.host
 
+    fn __copyinit__(out self, existing: HTTPRequest[origin]):
+        self.headers = existing.headers
+        self.cookies = existing.cookies
+        self.uri = existing.uri
+        self.body_raw = existing.body_raw
+        self.method = existing.method
+        self.protocol = existing.protocol
+        
     fn get_body(self) -> StringSlice[__origin_of(self.body_raw)]:
         return StringSlice(unsafe_from_utf8=Span(self.body_raw))
 
diff --git a/lightbug_http/io/bytes.mojo b/lightbug_http/io/bytes.mojo
index 089634f..94caaba 100644
--- a/lightbug_http/io/bytes.mojo
+++ b/lightbug_http/io/bytes.mojo
@@ -26,6 +26,37 @@ fn is_space(b: Byte) -> Bool:
     return b == BytesConstant.whitespace
 
 
+fn bytes_equal_ignore_case(a: ByteView, b: String) -> Bool:
+    """Compare ByteView with String case-insensitively without creating intermediate strings."""
+    if len(a) != len(b):
+        return False
+    
+    for i in range(len(a)):
+        var byte_a = a[i]
+        var byte_b = ord(b[i])
+        
+        # Convert to lowercase for comparison
+        if byte_a >= ord('A') and byte_a <= ord('Z'):
+            byte_a = byte_a + 32  # Convert to lowercase
+        if byte_b >= ord('A') and byte_b <= ord('Z'):
+            byte_b = byte_b + 32  # Convert to lowercase
+            
+        if byte_a != byte_b:
+            return False
+    return True
+
+
+fn bytes_to_lower_string(b: ByteView) -> String:
+    """Convert ByteView to lowercase String."""
+    var result = Bytes()
+    for i in range(len(b)):
+        var byte_val = b[i]
+        if byte_val >= ord('A') and byte_val <= ord('Z'):
+            byte_val = byte_val + 32  # Convert to lowercase
+        result.append(byte_val)
+    return to_string(result^)
+
+
 struct ByteWriter(Writer):
     var _inner: Bytes
 

From 51552a5c1457b1bb027b90b2a475e227e3443952 Mon Sep 17 00:00:00 2001
From: Val <saviorand@gmail.com>
Date: Sun, 15 Jun 2025 21:15:57 +0200
Subject: [PATCH 12/12] switch to bytes instead of byteview

---
 lightbug_http/http/request.mojo | 17 +++++++++--------
 lightbug_http/strings.mojo      |  4 ++++
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/lightbug_http/http/request.mojo b/lightbug_http/http/request.mojo
index 92f652f..b91c43b 100644
--- a/lightbug_http/http/request.mojo
+++ b/lightbug_http/http/request.mojo
@@ -14,6 +14,7 @@ from lightbug_http.strings import (
     nChar,
     lineBreak,
     to_string,
+    to_bytes,
 )
 
 
@@ -36,8 +37,8 @@ struct HTTPRequest[origin: Origin](Writable, Stringable):
     var uri: URI
     var body_raw: Bytes
 
-    var method: ByteView[origin]
-    var protocol: ByteView[origin]
+    var method: Bytes
+    var protocol: Bytes
 
     var server_is_tls: Bool
     var timeout: Duration
@@ -46,9 +47,9 @@ struct HTTPRequest[origin: Origin](Writable, Stringable):
     fn from_bytes(addr: String, max_body_size: Int, b: Span[Byte]) raises -> HTTPRequest[origin]:
         var reader = ByteReader(b)
         var headers = Headers[origin]()
-        var method: ByteView[origin]
-        var protocol: ByteView[origin]
-        var uri: ByteView[origin]
+        var method: Bytes
+        var protocol: Bytes
+        var uri: Bytes
         try:
             var rest = headers.parse_raw(reader)
             var method = rest[0]
@@ -68,7 +69,7 @@ struct HTTPRequest[origin: Origin](Writable, Stringable):
             raise Error("HTTPRequest.from_bytes: Request body too large.")
 
         var request = HTTPRequest(
-            URI.parse(addr + String(uri)), headers=headers, method=String(method), protocol=String(protocol), cookies=cookies
+            URI.parse(addr + to_string(uri)), headers=headers, method=to_string(method), protocol=to_string(protocol), cookies=cookies
         )
 
         if content_length > 0:
@@ -93,8 +94,8 @@ struct HTTPRequest[origin: Origin](Writable, Stringable):
     ):
         self.headers = headers
         self.cookies = cookies
-        self.method = ByteView(method.as_bytes())
-        self.protocol = protocol
+        self.method = to_bytes(method)
+        self.protocol = to_bytes(protocol)
         self.uri = uri
         self.body_raw = body
         self.server_is_tls = server_is_tls
diff --git a/lightbug_http/strings.mojo b/lightbug_http/strings.mojo
index 56c3883..5826b03 100644
--- a/lightbug_http/strings.mojo
+++ b/lightbug_http/strings.mojo
@@ -58,6 +58,10 @@ fn to_string(owned bytes: Bytes) -> String:
     return result^
 
 
+fn to_bytes(s: String) -> Bytes:
+    return Bytes(s.as_bytes())
+
+
 fn find_all(s: String, sub_str: String) -> List[Int]:
     match_idxs = List[Int]()
     var current_idx: Int = s.find(sub_str)