From 87bdeabc01600b4e11a52a4f0749422d42083a72 Mon Sep 17 00:00:00 2001 From: Val Date: Sun, 15 Jun 2025 14:31:49 +0200 Subject: [PATCH 01/12] implement section 2.2-2 requirement --- lightbug_http/header.mojo | 17 ++-- lightbug_http/strings.mojo | 139 +++++++++++++++++++++++++++++ tests/rfc/test_rfc9112_simple.mojo | 114 +++++++++++++++++++++++ 3 files changed, 265 insertions(+), 5 deletions(-) create mode 100644 tests/rfc/test_rfc9112_simple.mojo diff --git a/lightbug_http/header.mojo b/lightbug_http/header.mojo index 4014a56..4c91c80 100644 --- a/lightbug_http/header.mojo +++ b/lightbug_http/header.mojo @@ -1,6 +1,6 @@ from collections import Dict, Optional from lightbug_http.io.bytes import Bytes, ByteReader, ByteWriter, is_newline, is_space -from lightbug_http.strings import BytesConstant +from lightbug_http.strings import BytesConstant, to_string_rfc9112_safe from lightbug_http._logger import logger from lightbug_http.strings import rChar, nChar, lineBreak, to_string @@ -102,13 +102,20 @@ struct Headers(Writable, Stringable): r.increment() # TODO (bgreni): Handle possible trailing whitespace var value = r.read_line() - var k = String(key).lower() + + var k = to_string_rfc9112_safe(key._inner).lower() if k == HeaderKey.SET_COOKIE: - cookies.append(String(value)) + cookies.append(to_string_rfc9112_safe(value._inner)) continue - self._inner[k] = String(value) - return (String(first), String(second), String(third), cookies) + self._inner[k] = to_string_rfc9112_safe(value._inner) + + return ( + to_string_rfc9112_safe(first._inner), + to_string_rfc9112_safe(second._inner), + to_string_rfc9112_safe(third._inner), + cookies + ) fn write_to[T: Writer, //](self, mut writer: T): for header in self._inner.items(): diff --git a/lightbug_http/strings.mojo b/lightbug_http/strings.mojo index 56c3883..26c1214 100644 --- a/lightbug_http/strings.mojo +++ b/lightbug_http/strings.mojo @@ -33,6 +33,126 @@ struct BytesConstant: alias DOUBLE_CRLF = bytes(lineBreak + lineBreak) +# RFC 9112 Section 2.2-2: US-ASCII character bounds +alias US_ASCII_MAX = 0x7F +alias ISO_8859_1_MAX = 0xFF + + +fn is_us_ascii_octet(b: UInt8) -> Bool: + """Check if a byte is within US-ASCII range (0x00-0x7F). + + Args: + b: The byte to check. + + Returns: + True if the byte is US-ASCII compliant. + """ + return b <= US_ASCII_MAX + + +fn is_iso_8859_1_octet(b: UInt8) -> Bool: + """Check if a byte is within ISO-8859-1 range (0x00-0xFF). + + Args: + b: The byte to check. + + Returns: + True if the byte is ISO-8859-1 compliant. + """ + return b <= ISO_8859_1_MAX + + +fn validate_http_message_octets[origin: Origin](data: Span[UInt8, origin]) raises -> Span[UInt8, origin]: + """RFC 9112 Section 2.2-2: Validate HTTP message as sequence of octets. + + A recipient MUST parse an HTTP message as a sequence of octets in an encoding + that is a superset of US-ASCII. This function validates that the message can + be safely parsed as octets. + + Args: + data: The raw bytes of the HTTP message. + + Returns: + The validated span of bytes safe for parsing. + + Raises: + Error: If the data contains invalid multi-byte sequences that could + create security vulnerabilities. + """ + # Check each byte to ensure it's in a safe encoding superset of US-ASCII + for i in range(len(data)): + var b = data[i] + + # Allow US-ASCII range (most common case) + if is_us_ascii_octet(b): + continue + + # Allow ISO-8859-1 extended range (superset of US-ASCII) + if is_iso_8859_1_octet(b): + continue + + # If we get here, we have a byte outside ISO-8859-1 range + # This could be part of a multi-byte UTF-8 sequence which is unsafe + raise Error( + "RFC 9112 violation: Invalid octet 0x" + hex(Int(b)) + + " at position " + String(i) + + ". HTTP messages must use encoding superset of US-ASCII." + ) + + return data + + +fn safe_to_string_rfc9112[origin: Origin](b: Span[UInt8, origin]) raises -> String: + """RFC 9112 compliant conversion of octets to String. + + Creates a String from octets using ISO-8859-1 encoding (superset of US-ASCII). + This avoids security vulnerabilities from treating multi-byte UTF-8 sequences + as individual characters. + + Args: + b: The validated span of bytes (must pass validate_http_message_octets). + + Returns: + A String created from the octets using safe encoding. + + Raises: + Error: If the bytes contain invalid sequences for HTTP parsing. + """ + # Validate the octets first + var validated_span = validate_http_message_octets(b) + + # Create string treating bytes as ISO-8859-1 (safe superset of US-ASCII) + # Note: We use unsafe_from_utf8 here but we've validated the input is safe + return String(StringSlice(unsafe_from_utf8=validated_span)) + + +fn percent_encode_invalid_octets[origin: Origin](data: Span[UInt8, origin]) -> String: + """Percent-encode octets that are not safe for HTTP message parsing. + + This is a fallback approach when we encounter bytes that cannot be safely + interpreted as US-ASCII superset encoding. + + Args: + data: The raw bytes that may contain unsafe sequences. + + Returns: + A String with unsafe octets percent-encoded. + """ + var result = String() + + for i in range(len(data)): + var b = data[i] + + # Safe US-ASCII characters can be added directly + if is_us_ascii_octet(b) and b >= 0x20 and b != 0x25: # Printable ASCII except % + result += chr(Int(b)) + else: + # Percent-encode unsafe or non-printable octets + result += "%" + hex(Int(b)).upper().rjust(2, "0") + + return result + + fn to_string[T: Writable](value: T) -> String: return String.write(value) @@ -46,6 +166,25 @@ fn to_string(b: Span[UInt8]) -> String: return String(StringSlice(unsafe_from_utf8=b)) +fn to_string_rfc9112_safe[origin: Origin](b: Span[UInt8, origin]) -> String: + """RFC 9112 compliant String creation with fallback to percent-encoding. + + Attempts to create a String using safe octet parsing. If that fails, + falls back to percent-encoding unsafe sequences. + + Args: + b: The Span of bytes to convert to a String. + + Returns: + A String created safely according to RFC 9112. + """ + try: + return safe_to_string_rfc9112(b) + except: + # Fallback to percent-encoding for unsafe sequences + return percent_encode_invalid_octets(b) + + fn to_string(owned bytes: Bytes) -> String: """Creates a String from the provided List of bytes. If you do not transfer ownership of the List, the List will be copied. diff --git a/tests/rfc/test_rfc9112_simple.mojo b/tests/rfc/test_rfc9112_simple.mojo new file mode 100644 index 0000000..0fde4ce --- /dev/null +++ b/tests/rfc/test_rfc9112_simple.mojo @@ -0,0 +1,114 @@ +import testing +from memory import Span + + +def test_rfc9112_parse_as_octets(): + """RFC 9112 Section 2.2-2: MUST parse HTTP message as sequence of octets.""" + print("Testing: Parse HTTP message as sequence of octets...") + + # Test that we parse HTTP messages as individual octets, not Unicode characters + var http_message = "GET /path HTTP/1.1\r\nHost: example.com\r\n\r\n" + var octets = http_message.as_bytes() + + # Verify we access individual octets (bytes), not Unicode code points + testing.assert_equal(octets[0], ord('G')) # First octet is 'G' + testing.assert_equal(octets[4], ord('/')) # Fifth octet is '/' + + # Find the first CR and LF octets in the message + var found_cr = False + var found_lf = False + for i in range(len(octets)): + if octets[i] == 0x0D and not found_cr: # First CR + found_cr = True + if octets[i] == 0x0A and not found_lf: # First LF + found_lf = True + if found_cr and found_lf: + break + + testing.assert_true(found_cr) # CR found as octet + testing.assert_true(found_lf) # LF found as octet + + print("✓ HTTP message parsed as sequence of octets") + + +def test_rfc9112_us_ascii_superset_encoding(): + """RFC 9112 Section 2.2-2: MUST use encoding that is superset of US-ASCII.""" + print("Testing: Encoding is superset of US-ASCII...") + + # US-ASCII range is 0x00-0x7F + # ISO-8859-1 (0x00-0xFF) is a valid superset + + # Test US-ASCII characters are valid + testing.assert_true(ord('G') <= 0x7F) # US-ASCII + testing.assert_true(ord(' ') <= 0x7F) # US-ASCII + testing.assert_true(0x0A <= 0x7F) # LF in US-ASCII + testing.assert_true(0x0D <= 0x7F) # CR in US-ASCII + + # Test that superset (ISO-8859-1) includes extended range + testing.assert_true(0x80 <= 0xFF) # Extended range valid + testing.assert_true(0xFF <= 0xFF) # Maximum byte valid + + print("✓ Encoding is superset of US-ASCII (ISO-8859-1)") + + +def test_rfc9112_lf_security_vulnerability(): + """RFC 9112 Section 2.2-2: Prevent LF (%x0A) security vulnerabilities.""" + print("Testing: LF (%x0A) security vulnerability prevention...") + + # The critical security issue: LF (%x0A) in multibyte sequences + var lf_octet: UInt8 = 0x0A + + # When parsed as octets (safe), LF is clearly identifiable + var test_data = "GET /\r\nHost: test\r\n\r\n" + var data_octets = test_data.as_bytes() + + var lf_positions = List[Int]() + for i in range(len(data_octets)): + if data_octets[i] == lf_octet: + lf_positions.append(i) + + # Should find LF octets at specific positions + testing.assert_true(len(lf_positions) > 0) + print("✓ LF (%x0A) handled safely as octet") + print(" - Found " + String(len(lf_positions)) + " LF octets in message") + print(" - No multibyte character sequence confusion") + + +def test_rfc9112_string_parser_safety(): + """RFC 9112 Section 2.2-2: String parsers only used after protocol element extraction.""" + print("Testing: String parsers used only after safe extraction...") + + # Demonstrate the RFC requirement: protocol elements extracted as octets first + var http_request = "GET /api/data HTTP/1.1\r\nHost: server.com\r\n\r\n" + var request_octets = http_request.as_bytes() + + # Step 1: Extract protocol elements as octets (safe) + var method_end = -1 + for i in range(len(request_octets)): + if request_octets[i] == ord(' '): + method_end = i + break + + testing.assert_true(method_end > 0) + + # Step 2: Verify the extracted octets match expected method + testing.assert_equal(request_octets[0], ord('G')) # First octet + testing.assert_equal(request_octets[1], ord('E')) # Second octet + testing.assert_equal(request_octets[2], ord('T')) # Third octet + testing.assert_equal(method_end, 3) # Method is 3 octets + + print("✓ String parsing only after protocol element extraction") + print(" - Protocol elements extracted as octets first") + print(" - String conversion only after safe extraction") + + +def main(): + """Test RFC 9112 Section 2.2-2 compliance.""" + print("🧪 Testing RFC 9112 Section 2.2-2 Compliance\n") + + test_rfc9112_parse_as_octets() + test_rfc9112_us_ascii_superset_encoding() + test_rfc9112_lf_security_vulnerability() + test_rfc9112_string_parser_safety() + + print("\n✅ RFC 9112 Section 2.2-2 requirement verified") \ No newline at end of file From c0785308fc66b01493d66311ab6269a393bd130a Mon Sep 17 00:00:00 2001 From: Val Date: Sun, 15 Jun 2025 15:04:34 +0200 Subject: [PATCH 02/12] clean up comments --- lightbug_http/strings.mojo | 11 ------- tests/rfc/test_rfc9112_simple.mojo | 46 +++++++----------------------- 2 files changed, 10 insertions(+), 47 deletions(-) diff --git a/lightbug_http/strings.mojo b/lightbug_http/strings.mojo index 26c1214..435b3e9 100644 --- a/lightbug_http/strings.mojo +++ b/lightbug_http/strings.mojo @@ -33,7 +33,6 @@ struct BytesConstant: alias DOUBLE_CRLF = bytes(lineBreak + lineBreak) -# RFC 9112 Section 2.2-2: US-ASCII character bounds alias US_ASCII_MAX = 0x7F alias ISO_8859_1_MAX = 0xFF @@ -79,20 +78,15 @@ fn validate_http_message_octets[origin: Origin](data: Span[UInt8, origin]) raise Error: If the data contains invalid multi-byte sequences that could create security vulnerabilities. """ - # Check each byte to ensure it's in a safe encoding superset of US-ASCII for i in range(len(data)): var b = data[i] - # Allow US-ASCII range (most common case) if is_us_ascii_octet(b): continue - # Allow ISO-8859-1 extended range (superset of US-ASCII) if is_iso_8859_1_octet(b): continue - # If we get here, we have a byte outside ISO-8859-1 range - # This could be part of a multi-byte UTF-8 sequence which is unsafe raise Error( "RFC 9112 violation: Invalid octet 0x" + hex(Int(b)) + " at position " + String(i) + @@ -118,11 +112,8 @@ fn safe_to_string_rfc9112[origin: Origin](b: Span[UInt8, origin]) raises -> Stri Raises: Error: If the bytes contain invalid sequences for HTTP parsing. """ - # Validate the octets first var validated_span = validate_http_message_octets(b) - # Create string treating bytes as ISO-8859-1 (safe superset of US-ASCII) - # Note: We use unsafe_from_utf8 here but we've validated the input is safe return String(StringSlice(unsafe_from_utf8=validated_span)) @@ -143,11 +134,9 @@ fn percent_encode_invalid_octets[origin: Origin](data: Span[UInt8, origin]) -> S for i in range(len(data)): var b = data[i] - # Safe US-ASCII characters can be added directly if is_us_ascii_octet(b) and b >= 0x20 and b != 0x25: # Printable ASCII except % result += chr(Int(b)) else: - # Percent-encode unsafe or non-printable octets result += "%" + hex(Int(b)).upper().rjust(2, "0") return result diff --git a/tests/rfc/test_rfc9112_simple.mojo b/tests/rfc/test_rfc9112_simple.mojo index 0fde4ce..a5d5161 100644 --- a/tests/rfc/test_rfc9112_simple.mojo +++ b/tests/rfc/test_rfc9112_simple.mojo @@ -6,59 +6,44 @@ def test_rfc9112_parse_as_octets(): """RFC 9112 Section 2.2-2: MUST parse HTTP message as sequence of octets.""" print("Testing: Parse HTTP message as sequence of octets...") - # Test that we parse HTTP messages as individual octets, not Unicode characters var http_message = "GET /path HTTP/1.1\r\nHost: example.com\r\n\r\n" var octets = http_message.as_bytes() - # Verify we access individual octets (bytes), not Unicode code points - testing.assert_equal(octets[0], ord('G')) # First octet is 'G' - testing.assert_equal(octets[4], ord('/')) # Fifth octet is '/' + testing.assert_equal(octets[0], ord('G')) + testing.assert_equal(octets[4], ord('/')) - # Find the first CR and LF octets in the message var found_cr = False var found_lf = False for i in range(len(octets)): - if octets[i] == 0x0D and not found_cr: # First CR + if octets[i] == 0x0D and not found_cr: found_cr = True - if octets[i] == 0x0A and not found_lf: # First LF + if octets[i] == 0x0A and not found_lf: found_lf = True if found_cr and found_lf: break - testing.assert_true(found_cr) # CR found as octet - testing.assert_true(found_lf) # LF found as octet - - print("✓ HTTP message parsed as sequence of octets") + testing.assert_true(found_cr) + testing.assert_true(found_lf) def test_rfc9112_us_ascii_superset_encoding(): """RFC 9112 Section 2.2-2: MUST use encoding that is superset of US-ASCII.""" print("Testing: Encoding is superset of US-ASCII...") - # US-ASCII range is 0x00-0x7F - # ISO-8859-1 (0x00-0xFF) is a valid superset - - # Test US-ASCII characters are valid testing.assert_true(ord('G') <= 0x7F) # US-ASCII testing.assert_true(ord(' ') <= 0x7F) # US-ASCII testing.assert_true(0x0A <= 0x7F) # LF in US-ASCII testing.assert_true(0x0D <= 0x7F) # CR in US-ASCII - - # Test that superset (ISO-8859-1) includes extended range testing.assert_true(0x80 <= 0xFF) # Extended range valid testing.assert_true(0xFF <= 0xFF) # Maximum byte valid - - print("✓ Encoding is superset of US-ASCII (ISO-8859-1)") def test_rfc9112_lf_security_vulnerability(): """RFC 9112 Section 2.2-2: Prevent LF (%x0A) security vulnerabilities.""" print("Testing: LF (%x0A) security vulnerability prevention...") - # The critical security issue: LF (%x0A) in multibyte sequences var lf_octet: UInt8 = 0x0A - # When parsed as octets (safe), LF is clearly identifiable var test_data = "GET /\r\nHost: test\r\n\r\n" var data_octets = test_data.as_bytes() @@ -67,22 +52,16 @@ def test_rfc9112_lf_security_vulnerability(): if data_octets[i] == lf_octet: lf_positions.append(i) - # Should find LF octets at specific positions testing.assert_true(len(lf_positions) > 0) - print("✓ LF (%x0A) handled safely as octet") - print(" - Found " + String(len(lf_positions)) + " LF octets in message") - print(" - No multibyte character sequence confusion") def test_rfc9112_string_parser_safety(): """RFC 9112 Section 2.2-2: String parsers only used after protocol element extraction.""" print("Testing: String parsers used only after safe extraction...") - # Demonstrate the RFC requirement: protocol elements extracted as octets first var http_request = "GET /api/data HTTP/1.1\r\nHost: server.com\r\n\r\n" var request_octets = http_request.as_bytes() - # Step 1: Extract protocol elements as octets (safe) var method_end = -1 for i in range(len(request_octets)): if request_octets[i] == ord(' '): @@ -91,15 +70,10 @@ def test_rfc9112_string_parser_safety(): testing.assert_true(method_end > 0) - # Step 2: Verify the extracted octets match expected method - testing.assert_equal(request_octets[0], ord('G')) # First octet - testing.assert_equal(request_octets[1], ord('E')) # Second octet - testing.assert_equal(request_octets[2], ord('T')) # Third octet - testing.assert_equal(method_end, 3) # Method is 3 octets - - print("✓ String parsing only after protocol element extraction") - print(" - Protocol elements extracted as octets first") - print(" - String conversion only after safe extraction") + testing.assert_equal(request_octets[0], ord('G')) + testing.assert_equal(request_octets[1], ord('E')) + testing.assert_equal(request_octets[2], ord('T')) + testing.assert_equal(method_end, 3) def main(): From a080a07ef8838b725205ea96da9386f969778e2b Mon Sep 17 00:00:00 2001 From: Val Date: Sun, 15 Jun 2025 15:57:16 +0200 Subject: [PATCH 03/12] adjust the test and add to action --- .github/workflows/test.yml | 1 + lightbug_http/strings.mojo | 39 ++++++-- mojoproject.toml | 4 + tests/rfc/test_rfc9112_section_2_2_2.mojo | 116 ++++++++++++++++++++++ tests/rfc/test_rfc9112_simple.mojo | 88 ---------------- 5 files changed, 154 insertions(+), 94 deletions(-) create mode 100644 tests/rfc/test_rfc9112_section_2_2_2.mojo delete mode 100644 tests/rfc/test_rfc9112_simple.mojo diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index c105559..8472243 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -18,3 +18,4 @@ jobs: magic run integration_tests_py magic run integration_tests_external magic run integration_tests_udp + magic run rfc_tests diff --git a/lightbug_http/strings.mojo b/lightbug_http/strings.mojo index 435b3e9..aecbace 100644 --- a/lightbug_http/strings.mojo +++ b/lightbug_http/strings.mojo @@ -66,7 +66,7 @@ fn validate_http_message_octets[origin: Origin](data: Span[UInt8, origin]) raise A recipient MUST parse an HTTP message as a sequence of octets in an encoding that is a superset of US-ASCII. This function validates that the message can - be safely parsed as octets. + be safely parsed as octets and detects invalid multibyte UTF-8 sequences. Args: data: The raw bytes of the HTTP message. @@ -76,17 +76,35 @@ fn validate_http_message_octets[origin: Origin](data: Span[UInt8, origin]) raise Raises: Error: If the data contains invalid multi-byte sequences that could - create security vulnerabilities. + create security vulnerabilities (like embedded LF in UTF-8). """ for i in range(len(data)): var b = data[i] - if is_us_ascii_octet(b): - continue - + # Allow all ISO-8859-1 bytes (0x00-0xFF) if is_iso_8859_1_octet(b): + # Check for potential UTF-8 multibyte sequence vulnerabilities + if b >= 0x80: # Non-ASCII byte + # Check if this looks like a UTF-8 start byte + if b >= 0xC0 and b <= 0xF7: # UTF-8 start bytes + # This could be start of multibyte sequence - check for embedded LF + if i + 1 < len(data) and data[i + 1] == 0x0A: # LF embedded in sequence + raise Error( + "RFC 9112 violation: LF (0x0A) embedded in potential multibyte sequence at position " + + String(i + 1) + ". This creates security vulnerabilities." + ) + elif b >= 0x80 and b <= 0xBF: # UTF-8 continuation byte without proper start + # Check if previous byte is valid UTF-8 start, if not this is invalid + if i == 0 or (data[i - 1] < 0xC0): # No proper UTF-8 start byte before + # Check if this continuation byte contains control characters + if i + 1 < len(data) and data[i + 1] == 0x0A: # LF after invalid continuation + raise Error( + "RFC 9112 violation: LF (0x0A) after invalid UTF-8 continuation byte at position " + + String(i + 1) + ". This creates security vulnerabilities." + ) continue + # This should never happen since is_iso_8859_1_octet covers 0x00-0xFF raise Error( "RFC 9112 violation: Invalid octet 0x" + hex(Int(b)) + " at position " + String(i) + @@ -137,7 +155,16 @@ fn percent_encode_invalid_octets[origin: Origin](data: Span[UInt8, origin]) -> S if is_us_ascii_octet(b) and b >= 0x20 and b != 0x25: # Printable ASCII except % result += chr(Int(b)) else: - result += "%" + hex(Int(b)).upper().rjust(2, "0") + # Fix hex formatting: ensure proper zero-padding + var hex_val = hex(Int(b)).upper() + # Remove "0X" prefix if present + if hex_val.startswith("0X"): + hex_val = hex_val[2:] + # Ensure two-digit hex format + if len(hex_val) == 1: + result += "%0" + hex_val + else: + result += "%" + hex_val return result diff --git a/mojoproject.toml b/mojoproject.toml index d932cea..6b7540b 100644 --- a/mojoproject.toml +++ b/mojoproject.toml @@ -19,6 +19,9 @@ integration_tests_py = { cmd = "bash scripts/integration_test.sh" } integration_tests_external = { cmd = "magic run mojo test -I . tests/integration" } integration_tests_udp = { cmd = "bash scripts/udp_test.sh" } +[feature.rfc-tests.tasks] +rfc_tests = { cmd = "magic run mojo test -I . tests/rfc" } + [feature.bench.tasks] bench = { cmd = "magic run mojo -I . benchmark/bench.mojo" } bench_server = { cmd = "bash scripts/bench_server.sh" } @@ -34,4 +37,5 @@ fastapi = ">=0.114.2,<0.115" default = { solve-group = "default" } unit-tests = { features = ["unit-tests"], solve-group = "default" } integration-tests = { features = ["integration-tests"], solve-group = "default" } +rfc-tests = { features = ["rfc-tests"], solve-group = "default" } bench = { features = ["bench"], solve-group = "default" } diff --git a/tests/rfc/test_rfc9112_section_2_2_2.mojo b/tests/rfc/test_rfc9112_section_2_2_2.mojo new file mode 100644 index 0000000..d6f905c --- /dev/null +++ b/tests/rfc/test_rfc9112_section_2_2_2.mojo @@ -0,0 +1,116 @@ +import testing +from memory import Span +from lightbug_http.strings import ( + validate_http_message_octets, + safe_to_string_rfc9112, + to_string_rfc9112_safe, + is_us_ascii_octet, + is_iso_8859_1_octet, + percent_encode_invalid_octets, +) +from lightbug_http.io.bytes import Bytes, ByteReader +from lightbug_http.http.request import HTTPRequest + + +def test_rfc9112_octet_parsing_requirement(): + """RFC 9112 Section 2.2-2: HTTP messages MUST be parsed as sequence of octets.""" + print("Testing: HTTP message parsing as sequence of octets...") + + # Valid HTTP message with mixed ASCII and extended ASCII + var http_message = "GET /path HTTP/1.1\r\nHost: example.com\r\nX-Custom: café\r\n\r\n" + var octets = http_message.as_bytes() + + testing.assert_equal(octets[0], ord('G')) + testing.assert_equal(octets[1], ord('E')) + testing.assert_equal(octets[2], ord('T')) + testing.assert_equal(octets[3], ord(' ')) + + var crlf_count = 0 + for i in range(len(octets) - 1): + if octets[i] == 0x0D and octets[i + 1] == 0x0A: + crlf_count += 1 + + testing.assert_true(crlf_count >= 3) + + +def test_rfc9112_us_ascii_superset_encoding(): + """RFC 9112 Section 2.2-2: Encoding MUST be superset of US-ASCII.""" + print("Testing: US-ASCII superset encoding requirement...") + + # Test US-ASCII range (0x00-0x7F) + for i in range(0x80): + testing.assert_true(is_us_ascii_octet(UInt8(i))) + + # Test extended range for ISO-8859-1 (superset of US-ASCII) + for i in range(0x80, 0x100): + testing.assert_true(is_iso_8859_1_octet(UInt8(i))) + testing.assert_false(is_us_ascii_octet(UInt8(i))) + + +def test_rfc9112_lf_security_vulnerability_prevention(): + """RFC 9112 Section 2.2-2: Prevent security vulnerabilities from LF (%x0A) in multibyte sequences.""" + print("Testing: LF security vulnerability prevention...") + + # Valid LF in HTTP context + var valid_http = "GET /test HTTP/1.1\r\nHost: test.com\r\n\r\n" + var valid_octets = valid_http.as_bytes() + + try: + var validated = validate_http_message_octets(Span(valid_octets)) + testing.assert_equal(len(validated), len(valid_octets)) + except e: + testing.assert_true(False, "Valid HTTP message should not raise error: " + String(e)) + + # Invalid multibyte sequence containing LF + var malicious_bytes = List[UInt8]() + malicious_bytes.extend("GET /".as_bytes()) + malicious_bytes.append(0xC0) # Invalid UTF-8 start byte + malicious_bytes.append(0x0A) # LF embedded in "multibyte" sequence + malicious_bytes.append(0x80) # Continuation byte + malicious_bytes.extend(" HTTP/1.1\r\nHost: test.com\r\n\r\n".as_bytes()) + + var malicious_span = Span(malicious_bytes) + + try: + var validated = validate_http_message_octets(malicious_span) + testing.assert_true(False, "Should have rejected invalid multibyte sequence with embedded LF") + except e: + testing.assert_true(True, "Correctly rejected invalid sequence: " + String(e)) + + var safe_result = to_string_rfc9112_safe(malicious_span) + + testing.assert_true(safe_result.find("%") != -1, "Should percent-encode unsafe sequences") + + +def test_rfc9112_percent_encoding_fallback(): + """RFC 9112 Section 2.2-2: Test percent-encoding fallback for unsafe sequences.""" + print("Testing: Percent-encoding fallback for unsafe sequences...") + + # Create byte sequence with characters that need percent-encoding + var unsafe_bytes = List[UInt8]() + unsafe_bytes.append(0x00) # NULL byte + unsafe_bytes.append(0x0A) # LF + unsafe_bytes.append(0x0D) # CR + unsafe_bytes.append(0x25) # % (should be encoded) + unsafe_bytes.append(0xFF) # High byte + + var unsafe_span = Span(unsafe_bytes) + var encoded = percent_encode_invalid_octets(unsafe_span) + + # Verify percent encoding with correct format + testing.assert_true(encoded.find("%00") != -1, "Should encode NULL byte") + testing.assert_true(encoded.find("%0A") != -1, "Should encode LF") + testing.assert_true(encoded.find("%0D") != -1, "Should encode CR") + testing.assert_true(encoded.find("%25") != -1, "Should encode % character") + testing.assert_true(encoded.find("%FF") != -1, "Should encode high byte") + + +def main(): + print("🧪 Testing RFC 9112 Section 2.2-2: HTTP Message Parsing as Octets") + + test_rfc9112_octet_parsing_requirement() + test_rfc9112_us_ascii_superset_encoding() + test_rfc9112_lf_security_vulnerability_prevention() + test_rfc9112_percent_encoding_fallback() + + print("\n✅ RFC 9112 Section 2.2-2 requirement fully verified") \ No newline at end of file diff --git a/tests/rfc/test_rfc9112_simple.mojo b/tests/rfc/test_rfc9112_simple.mojo deleted file mode 100644 index a5d5161..0000000 --- a/tests/rfc/test_rfc9112_simple.mojo +++ /dev/null @@ -1,88 +0,0 @@ -import testing -from memory import Span - - -def test_rfc9112_parse_as_octets(): - """RFC 9112 Section 2.2-2: MUST parse HTTP message as sequence of octets.""" - print("Testing: Parse HTTP message as sequence of octets...") - - var http_message = "GET /path HTTP/1.1\r\nHost: example.com\r\n\r\n" - var octets = http_message.as_bytes() - - testing.assert_equal(octets[0], ord('G')) - testing.assert_equal(octets[4], ord('/')) - - var found_cr = False - var found_lf = False - for i in range(len(octets)): - if octets[i] == 0x0D and not found_cr: - found_cr = True - if octets[i] == 0x0A and not found_lf: - found_lf = True - if found_cr and found_lf: - break - - testing.assert_true(found_cr) - testing.assert_true(found_lf) - - -def test_rfc9112_us_ascii_superset_encoding(): - """RFC 9112 Section 2.2-2: MUST use encoding that is superset of US-ASCII.""" - print("Testing: Encoding is superset of US-ASCII...") - - testing.assert_true(ord('G') <= 0x7F) # US-ASCII - testing.assert_true(ord(' ') <= 0x7F) # US-ASCII - testing.assert_true(0x0A <= 0x7F) # LF in US-ASCII - testing.assert_true(0x0D <= 0x7F) # CR in US-ASCII - testing.assert_true(0x80 <= 0xFF) # Extended range valid - testing.assert_true(0xFF <= 0xFF) # Maximum byte valid - - -def test_rfc9112_lf_security_vulnerability(): - """RFC 9112 Section 2.2-2: Prevent LF (%x0A) security vulnerabilities.""" - print("Testing: LF (%x0A) security vulnerability prevention...") - - var lf_octet: UInt8 = 0x0A - - var test_data = "GET /\r\nHost: test\r\n\r\n" - var data_octets = test_data.as_bytes() - - var lf_positions = List[Int]() - for i in range(len(data_octets)): - if data_octets[i] == lf_octet: - lf_positions.append(i) - - testing.assert_true(len(lf_positions) > 0) - - -def test_rfc9112_string_parser_safety(): - """RFC 9112 Section 2.2-2: String parsers only used after protocol element extraction.""" - print("Testing: String parsers used only after safe extraction...") - - var http_request = "GET /api/data HTTP/1.1\r\nHost: server.com\r\n\r\n" - var request_octets = http_request.as_bytes() - - var method_end = -1 - for i in range(len(request_octets)): - if request_octets[i] == ord(' '): - method_end = i - break - - testing.assert_true(method_end > 0) - - testing.assert_equal(request_octets[0], ord('G')) - testing.assert_equal(request_octets[1], ord('E')) - testing.assert_equal(request_octets[2], ord('T')) - testing.assert_equal(method_end, 3) - - -def main(): - """Test RFC 9112 Section 2.2-2 compliance.""" - print("🧪 Testing RFC 9112 Section 2.2-2 Compliance\n") - - test_rfc9112_parse_as_octets() - test_rfc9112_us_ascii_superset_encoding() - test_rfc9112_lf_security_vulnerability() - test_rfc9112_string_parser_safety() - - print("\n✅ RFC 9112 Section 2.2-2 requirement verified") \ No newline at end of file From 45f0a81ac6d3c00b09cf12a457dabaa6b2190aa1 Mon Sep 17 00:00:00 2001 From: Val Date: Sun, 15 Jun 2025 16:07:01 +0200 Subject: [PATCH 04/12] more clean up --- lightbug_http/strings.mojo | 194 +++++++--------------- tests/rfc/test_rfc9112_section_2_2_2.mojo | 41 +---- 2 files changed, 57 insertions(+), 178 deletions(-) diff --git a/lightbug_http/strings.mojo b/lightbug_http/strings.mojo index aecbace..2b230ec 100644 --- a/lightbug_http/strings.mojo +++ b/lightbug_http/strings.mojo @@ -38,137 +38,13 @@ alias ISO_8859_1_MAX = 0xFF fn is_us_ascii_octet(b: UInt8) -> Bool: - """Check if a byte is within US-ASCII range (0x00-0x7F). - - Args: - b: The byte to check. - - Returns: - True if the byte is US-ASCII compliant. - """ return b <= US_ASCII_MAX fn is_iso_8859_1_octet(b: UInt8) -> Bool: - """Check if a byte is within ISO-8859-1 range (0x00-0xFF). - - Args: - b: The byte to check. - - Returns: - True if the byte is ISO-8859-1 compliant. - """ return b <= ISO_8859_1_MAX -fn validate_http_message_octets[origin: Origin](data: Span[UInt8, origin]) raises -> Span[UInt8, origin]: - """RFC 9112 Section 2.2-2: Validate HTTP message as sequence of octets. - - A recipient MUST parse an HTTP message as a sequence of octets in an encoding - that is a superset of US-ASCII. This function validates that the message can - be safely parsed as octets and detects invalid multibyte UTF-8 sequences. - - Args: - data: The raw bytes of the HTTP message. - - Returns: - The validated span of bytes safe for parsing. - - Raises: - Error: If the data contains invalid multi-byte sequences that could - create security vulnerabilities (like embedded LF in UTF-8). - """ - for i in range(len(data)): - var b = data[i] - - # Allow all ISO-8859-1 bytes (0x00-0xFF) - if is_iso_8859_1_octet(b): - # Check for potential UTF-8 multibyte sequence vulnerabilities - if b >= 0x80: # Non-ASCII byte - # Check if this looks like a UTF-8 start byte - if b >= 0xC0 and b <= 0xF7: # UTF-8 start bytes - # This could be start of multibyte sequence - check for embedded LF - if i + 1 < len(data) and data[i + 1] == 0x0A: # LF embedded in sequence - raise Error( - "RFC 9112 violation: LF (0x0A) embedded in potential multibyte sequence at position " + - String(i + 1) + ". This creates security vulnerabilities." - ) - elif b >= 0x80 and b <= 0xBF: # UTF-8 continuation byte without proper start - # Check if previous byte is valid UTF-8 start, if not this is invalid - if i == 0 or (data[i - 1] < 0xC0): # No proper UTF-8 start byte before - # Check if this continuation byte contains control characters - if i + 1 < len(data) and data[i + 1] == 0x0A: # LF after invalid continuation - raise Error( - "RFC 9112 violation: LF (0x0A) after invalid UTF-8 continuation byte at position " + - String(i + 1) + ". This creates security vulnerabilities." - ) - continue - - # This should never happen since is_iso_8859_1_octet covers 0x00-0xFF - raise Error( - "RFC 9112 violation: Invalid octet 0x" + hex(Int(b)) + - " at position " + String(i) + - ". HTTP messages must use encoding superset of US-ASCII." - ) - - return data - - -fn safe_to_string_rfc9112[origin: Origin](b: Span[UInt8, origin]) raises -> String: - """RFC 9112 compliant conversion of octets to String. - - Creates a String from octets using ISO-8859-1 encoding (superset of US-ASCII). - This avoids security vulnerabilities from treating multi-byte UTF-8 sequences - as individual characters. - - Args: - b: The validated span of bytes (must pass validate_http_message_octets). - - Returns: - A String created from the octets using safe encoding. - - Raises: - Error: If the bytes contain invalid sequences for HTTP parsing. - """ - var validated_span = validate_http_message_octets(b) - - return String(StringSlice(unsafe_from_utf8=validated_span)) - - -fn percent_encode_invalid_octets[origin: Origin](data: Span[UInt8, origin]) -> String: - """Percent-encode octets that are not safe for HTTP message parsing. - - This is a fallback approach when we encounter bytes that cannot be safely - interpreted as US-ASCII superset encoding. - - Args: - data: The raw bytes that may contain unsafe sequences. - - Returns: - A String with unsafe octets percent-encoded. - """ - var result = String() - - for i in range(len(data)): - var b = data[i] - - if is_us_ascii_octet(b) and b >= 0x20 and b != 0x25: # Printable ASCII except % - result += chr(Int(b)) - else: - # Fix hex formatting: ensure proper zero-padding - var hex_val = hex(Int(b)).upper() - # Remove "0X" prefix if present - if hex_val.startswith("0X"): - hex_val = hex_val[2:] - # Ensure two-digit hex format - if len(hex_val) == 1: - result += "%0" + hex_val - else: - result += "%" + hex_val - - return result - - fn to_string[T: Writable](value: T) -> String: return String.write(value) @@ -183,22 +59,11 @@ fn to_string(b: Span[UInt8]) -> String: fn to_string_rfc9112_safe[origin: Origin](b: Span[UInt8, origin]) -> String: - """RFC 9112 compliant String creation with fallback to percent-encoding. - - Attempts to create a String using safe octet parsing. If that fails, - falls back to percent-encoding unsafe sequences. - - Args: - b: The Span of bytes to convert to a String. - - Returns: - A String created safely according to RFC 9112. - """ try: - return safe_to_string_rfc9112(b) + var validated_span = validate_http_message_octets_rfc9112(b) + return String(StringSlice(unsafe_from_utf8=validated_span)) except: - # Fallback to percent-encoding for unsafe sequences - return percent_encode_invalid_octets(b) + return percent_encode_octets(b) fn to_string(owned bytes: Bytes) -> String: @@ -220,3 +85,56 @@ fn find_all(s: String, sub_str: String) -> List[Int]: match_idxs.append(current_idx) current_idx = s.find(sub_str, start=current_idx + 1) return match_idxs^ + + +fn percent_encode_octets[origin: Origin](data: Span[UInt8, origin]) -> String: + var result = String() + + for i in range(len(data)): + var b = data[i] + + if is_us_ascii_octet(b) and b >= 0x20 and b != 0x25: # Printable ASCII except % + result += chr(Int(b)) + else: + # Fix hex formatting: ensure proper zero-padding + var hex_val = hex(Int(b)).upper() + # Remove "0X" prefix if present + if hex_val.startswith("0X"): + hex_val = hex_val[2:] + # Ensure two-digit hex format + if len(hex_val) == 1: + result += "%0" + hex_val + else: + result += "%" + hex_val + + return result + +fn validate_http_message_octets_rfc9112[origin: Origin](data: Span[UInt8, origin]) raises -> Span[UInt8, origin]: + for i in range(len(data)): + var b = data[i] + + if is_iso_8859_1_octet(b): + if b >= 0x80: + if b >= 0xC0 and b <= 0xF7: + if i + 1 < len(data) and data[i + 1] == 0x0A: + raise Error( + "RFC 9112 violation: LF (0x0A) embedded in potential multibyte sequence at position " + + String(i + 1) + ". This creates security vulnerabilities." + ) + elif b >= 0x80 and b <= 0xBF: + if i == 0 or (data[i - 1] < 0xC0): + if i + 1 < len(data) and data[i + 1] == 0x0A: + raise Error( + "RFC 9112 violation: LF (0x0A) after invalid UTF-8 continuation byte at position " + + String(i + 1) + ". This creates security vulnerabilities." + ) + continue + + # This should never happen since is_iso_8859_1_octet covers 0x00-0xFF + raise Error( + "RFC 9112 violation: Invalid octet 0x" + hex(Int(b)) + + " at position " + String(i) + + ". HTTP messages must use encoding superset of US-ASCII." + ) + + return data \ No newline at end of file diff --git a/tests/rfc/test_rfc9112_section_2_2_2.mojo b/tests/rfc/test_rfc9112_section_2_2_2.mojo index d6f905c..b3c9993 100644 --- a/tests/rfc/test_rfc9112_section_2_2_2.mojo +++ b/tests/rfc/test_rfc9112_section_2_2_2.mojo @@ -12,41 +12,6 @@ from lightbug_http.io.bytes import Bytes, ByteReader from lightbug_http.http.request import HTTPRequest -def test_rfc9112_octet_parsing_requirement(): - """RFC 9112 Section 2.2-2: HTTP messages MUST be parsed as sequence of octets.""" - print("Testing: HTTP message parsing as sequence of octets...") - - # Valid HTTP message with mixed ASCII and extended ASCII - var http_message = "GET /path HTTP/1.1\r\nHost: example.com\r\nX-Custom: café\r\n\r\n" - var octets = http_message.as_bytes() - - testing.assert_equal(octets[0], ord('G')) - testing.assert_equal(octets[1], ord('E')) - testing.assert_equal(octets[2], ord('T')) - testing.assert_equal(octets[3], ord(' ')) - - var crlf_count = 0 - for i in range(len(octets) - 1): - if octets[i] == 0x0D and octets[i + 1] == 0x0A: - crlf_count += 1 - - testing.assert_true(crlf_count >= 3) - - -def test_rfc9112_us_ascii_superset_encoding(): - """RFC 9112 Section 2.2-2: Encoding MUST be superset of US-ASCII.""" - print("Testing: US-ASCII superset encoding requirement...") - - # Test US-ASCII range (0x00-0x7F) - for i in range(0x80): - testing.assert_true(is_us_ascii_octet(UInt8(i))) - - # Test extended range for ISO-8859-1 (superset of US-ASCII) - for i in range(0x80, 0x100): - testing.assert_true(is_iso_8859_1_octet(UInt8(i))) - testing.assert_false(is_us_ascii_octet(UInt8(i))) - - def test_rfc9112_lf_security_vulnerability_prevention(): """RFC 9112 Section 2.2-2: Prevent security vulnerabilities from LF (%x0A) in multibyte sequences.""" print("Testing: LF security vulnerability prevention...") @@ -65,7 +30,7 @@ def test_rfc9112_lf_security_vulnerability_prevention(): var malicious_bytes = List[UInt8]() malicious_bytes.extend("GET /".as_bytes()) malicious_bytes.append(0xC0) # Invalid UTF-8 start byte - malicious_bytes.append(0x0A) # LF embedded in "multibyte" sequence + malicious_bytes.append(0x0A) # LF embedded in multibyte sequence malicious_bytes.append(0x80) # Continuation byte malicious_bytes.extend(" HTTP/1.1\r\nHost: test.com\r\n\r\n".as_bytes()) @@ -86,7 +51,6 @@ def test_rfc9112_percent_encoding_fallback(): """RFC 9112 Section 2.2-2: Test percent-encoding fallback for unsafe sequences.""" print("Testing: Percent-encoding fallback for unsafe sequences...") - # Create byte sequence with characters that need percent-encoding var unsafe_bytes = List[UInt8]() unsafe_bytes.append(0x00) # NULL byte unsafe_bytes.append(0x0A) # LF @@ -97,7 +61,6 @@ def test_rfc9112_percent_encoding_fallback(): var unsafe_span = Span(unsafe_bytes) var encoded = percent_encode_invalid_octets(unsafe_span) - # Verify percent encoding with correct format testing.assert_true(encoded.find("%00") != -1, "Should encode NULL byte") testing.assert_true(encoded.find("%0A") != -1, "Should encode LF") testing.assert_true(encoded.find("%0D") != -1, "Should encode CR") @@ -108,8 +71,6 @@ def test_rfc9112_percent_encoding_fallback(): def main(): print("🧪 Testing RFC 9112 Section 2.2-2: HTTP Message Parsing as Octets") - test_rfc9112_octet_parsing_requirement() - test_rfc9112_us_ascii_superset_encoding() test_rfc9112_lf_security_vulnerability_prevention() test_rfc9112_percent_encoding_fallback() From cfe7cb6259d54384b5905c47ff4906a3be999708 Mon Sep 17 00:00:00 2001 From: Val Date: Sun, 15 Jun 2025 16:20:36 +0200 Subject: [PATCH 05/12] revert changes --- lightbug_http/header.mojo | 166 ++++++++++++------------------------- lightbug_http/strings.mojo | 12 ++- 2 files changed, 59 insertions(+), 119 deletions(-) diff --git a/lightbug_http/header.mojo b/lightbug_http/header.mojo index 4c91c80..396fbdc 100644 --- a/lightbug_http/header.mojo +++ b/lightbug_http/header.mojo @@ -1,125 +1,67 @@ -from collections import Dict, Optional -from lightbug_http.io.bytes import Bytes, ByteReader, ByteWriter, is_newline, is_space -from lightbug_http.strings import BytesConstant, to_string_rfc9112_safe -from lightbug_http._logger import logger -from lightbug_http.strings import rChar, nChar, lineBreak, to_string +from memory import Span +from lightbug_http.io.bytes import Bytes, bytes, byte +alias strSlash = "/" +alias strHttp = "http" +alias http = "http" +alias strHttps = "https" +alias https = "https" +alias strHttp11 = "HTTP/1.1" +alias strHttp10 = "HTTP/1.0" -struct HeaderKey: - # TODO: Fill in more of these - alias CONNECTION = "connection" - alias CONTENT_TYPE = "content-type" - alias CONTENT_LENGTH = "content-length" - alias CONTENT_ENCODING = "content-encoding" - alias TRANSFER_ENCODING = "transfer-encoding" - alias DATE = "date" - alias LOCATION = "location" - alias HOST = "host" - alias SERVER = "server" - alias SET_COOKIE = "set-cookie" - alias COOKIE = "cookie" +alias strMethodGet = "GET" +alias rChar = "\r" +alias nChar = "\n" +alias lineBreak = rChar + nChar +alias colonChar = ":" -@value -struct Header(Writable, Stringable): - var key: String - var value: String +alias empty_string = "" +alias whitespace = " " +alias whitespace_byte = ord(whitespace) +alias tab = "\t" +alias tab_byte = ord(tab) - fn __str__(self) -> String: - return String.write(self) - fn write_to[T: Writer, //](self, mut writer: T): - writer.write(self.key + ": ", self.value, lineBreak) +struct BytesConstant: + alias whitespace = byte(whitespace) + alias colon = byte(colonChar) + alias rChar = byte(rChar) + alias nChar = byte(nChar) + alias CRLF = bytes(lineBreak) + alias DOUBLE_CRLF = bytes(lineBreak + lineBreak) -@always_inline -fn write_header[T: Writer](mut writer: T, key: String, value: String): - writer.write(key + ": ", value, lineBreak) +fn to_string[T: Writable](value: T) -> String: + return String.write(value) -@value -struct Headers(Writable, Stringable): - """Represents the header key/values in an http request/response. - Header keys are normalized to lowercase +fn to_string(b: Span[UInt8]) -> String: + """Creates a String from a copy of the provided Span of bytes. + + Args: + b: The Span of bytes to convert to a String. """ + return String(StringSlice(unsafe_from_utf8=b)) + - var _inner: Dict[String, String] - - fn __init__(out self): - self._inner = Dict[String, String]() - - fn __init__(out self, owned *headers: Header): - self._inner = Dict[String, String]() - for header in headers: - self[header[].key.lower()] = header[].value - - @always_inline - fn empty(self) -> Bool: - return len(self._inner) == 0 - - @always_inline - fn __contains__(self, key: String) -> Bool: - return key.lower() in self._inner - - @always_inline - fn __getitem__(self, key: String) raises -> String: - try: - return self._inner[key.lower()] - except: - raise Error("KeyError: Key not found in headers: " + key) - - @always_inline - fn get(self, key: String) -> Optional[String]: - return self._inner.get(key.lower()) - - @always_inline - fn __setitem__(mut self, key: String, value: String): - self._inner[key.lower()] = value - - fn content_length(self) -> Int: - try: - return Int(self[HeaderKey.CONTENT_LENGTH]) - except: - return 0 - - fn parse_raw(mut self, mut r: ByteReader) raises -> (String, String, String, List[String]): - var first_byte = r.peek() - if not first_byte: - raise Error("Headers.parse_raw: Failed to read first byte from response header") - - var first = r.read_word() - r.increment() - var second = r.read_word() - r.increment() - var third = r.read_line() - var cookies = List[String]() - - while not is_newline(r.peek()): - var key = r.read_until(BytesConstant.colon) - r.increment() - if is_space(r.peek()): - r.increment() - # TODO (bgreni): Handle possible trailing whitespace - var value = r.read_line() - - var k = to_string_rfc9112_safe(key._inner).lower() - if k == HeaderKey.SET_COOKIE: - cookies.append(to_string_rfc9112_safe(value._inner)) - continue - - self._inner[k] = to_string_rfc9112_safe(value._inner) - - return ( - to_string_rfc9112_safe(first._inner), - to_string_rfc9112_safe(second._inner), - to_string_rfc9112_safe(third._inner), - cookies - ) - - fn write_to[T: Writer, //](self, mut writer: T): - for header in self._inner.items(): - write_header(writer, header[].key, header[].value) - - fn __str__(self) -> String: - return String.write(self) +fn to_string(owned bytes: Bytes) -> String: + """Creates a String from the provided List of bytes. + If you do not transfer ownership of the List, the List will be copied. + + Args: + bytes: The List of bytes to convert to a String. + """ + var result = String() + result.write_bytes(bytes) + return result^ + + +fn find_all(s: String, sub_str: String) -> List[Int]: + match_idxs = List[Int]() + var current_idx: Int = s.find(sub_str) + while current_idx > -1: + match_idxs.append(current_idx) + current_idx = s.find(sub_str, start=current_idx + 1) + return match_idxs^ \ No newline at end of file diff --git a/lightbug_http/strings.mojo b/lightbug_http/strings.mojo index 2b230ec..02e105c 100644 --- a/lightbug_http/strings.mojo +++ b/lightbug_http/strings.mojo @@ -60,7 +60,7 @@ fn to_string(b: Span[UInt8]) -> String: fn to_string_rfc9112_safe[origin: Origin](b: Span[UInt8, origin]) -> String: try: - var validated_span = validate_http_message_octets_rfc9112(b) + var validated_span = validate_message_octets_iso_8859_1(b) return String(StringSlice(unsafe_from_utf8=validated_span)) except: return percent_encode_octets(b) @@ -109,7 +109,7 @@ fn percent_encode_octets[origin: Origin](data: Span[UInt8, origin]) -> String: return result -fn validate_http_message_octets_rfc9112[origin: Origin](data: Span[UInt8, origin]) raises -> Span[UInt8, origin]: +fn validate_message_octets_iso_8859_1[origin: Origin](data: Span[UInt8, origin]) raises -> Span[UInt8, origin]: for i in range(len(data)): var b = data[i] @@ -118,21 +118,19 @@ fn validate_http_message_octets_rfc9112[origin: Origin](data: Span[UInt8, origin if b >= 0xC0 and b <= 0xF7: if i + 1 < len(data) and data[i + 1] == 0x0A: raise Error( - "RFC 9112 violation: LF (0x0A) embedded in potential multibyte sequence at position " + - String(i + 1) + ". This creates security vulnerabilities." + "." ) elif b >= 0x80 and b <= 0xBF: if i == 0 or (data[i - 1] < 0xC0): if i + 1 < len(data) and data[i + 1] == 0x0A: raise Error( - "RFC 9112 violation: LF (0x0A) after invalid UTF-8 continuation byte at position " + - String(i + 1) + ". This creates security vulnerabilities." + "." ) continue # This should never happen since is_iso_8859_1_octet covers 0x00-0xFF raise Error( - "RFC 9112 violation: Invalid octet 0x" + hex(Int(b)) + + "Invalid octet 0x" + hex(Int(b)) + " at position " + String(i) + ". HTTP messages must use encoding superset of US-ASCII." ) From d84a8c93f85b7027d499d4436c24948de4abc25c Mon Sep 17 00:00:00 2001 From: Val Date: Sun, 15 Jun 2025 16:21:17 +0200 Subject: [PATCH 06/12] revert header.mojo --- lightbug_http/header.mojo | 159 +++++++++++++++++++++++++------------- 1 file changed, 105 insertions(+), 54 deletions(-) diff --git a/lightbug_http/header.mojo b/lightbug_http/header.mojo index 396fbdc..92c58ab 100644 --- a/lightbug_http/header.mojo +++ b/lightbug_http/header.mojo @@ -1,67 +1,118 @@ -from memory import Span -from lightbug_http.io.bytes import Bytes, bytes, byte +from collections import Dict, Optional +from lightbug_http.io.bytes import Bytes, ByteReader, ByteWriter, is_newline, is_space +from lightbug_http.strings import BytesConstant +from lightbug_http._logger import logger +from lightbug_http.strings import rChar, nChar, lineBreak, to_string -alias strSlash = "/" -alias strHttp = "http" -alias http = "http" -alias strHttps = "https" -alias https = "https" -alias strHttp11 = "HTTP/1.1" -alias strHttp10 = "HTTP/1.0" -alias strMethodGet = "GET" +struct HeaderKey: + # TODO: Fill in more of these + alias CONNECTION = "connection" + alias CONTENT_TYPE = "content-type" + alias CONTENT_LENGTH = "content-length" + alias CONTENT_ENCODING = "content-encoding" + alias TRANSFER_ENCODING = "transfer-encoding" + alias DATE = "date" + alias LOCATION = "location" + alias HOST = "host" + alias SERVER = "server" + alias SET_COOKIE = "set-cookie" + alias COOKIE = "cookie" -alias rChar = "\r" -alias nChar = "\n" -alias lineBreak = rChar + nChar -alias colonChar = ":" -alias empty_string = "" -alias whitespace = " " -alias whitespace_byte = ord(whitespace) -alias tab = "\t" -alias tab_byte = ord(tab) +@value +struct Header(Writable, Stringable): + var key: String + var value: String + fn __str__(self) -> String: + return String.write(self) -struct BytesConstant: - alias whitespace = byte(whitespace) - alias colon = byte(colonChar) - alias rChar = byte(rChar) - alias nChar = byte(nChar) + fn write_to[T: Writer, //](self, mut writer: T): + writer.write(self.key + ": ", self.value, lineBreak) - alias CRLF = bytes(lineBreak) - alias DOUBLE_CRLF = bytes(lineBreak + lineBreak) +@always_inline +fn write_header[T: Writer](mut writer: T, key: String, value: String): + writer.write(key + ": ", value, lineBreak) -fn to_string[T: Writable](value: T) -> String: - return String.write(value) +@value +struct Headers(Writable, Stringable): + """Represents the header key/values in an http request/response. -fn to_string(b: Span[UInt8]) -> String: - """Creates a String from a copy of the provided Span of bytes. - - Args: - b: The Span of bytes to convert to a String. + Header keys are normalized to lowercase """ - return String(StringSlice(unsafe_from_utf8=b)) - -fn to_string(owned bytes: Bytes) -> String: - """Creates a String from the provided List of bytes. - If you do not transfer ownership of the List, the List will be copied. - - Args: - bytes: The List of bytes to convert to a String. - """ - var result = String() - result.write_bytes(bytes) - return result^ - - -fn find_all(s: String, sub_str: String) -> List[Int]: - match_idxs = List[Int]() - var current_idx: Int = s.find(sub_str) - while current_idx > -1: - match_idxs.append(current_idx) - current_idx = s.find(sub_str, start=current_idx + 1) - return match_idxs^ \ No newline at end of file + var _inner: Dict[String, String] + + fn __init__(out self): + self._inner = Dict[String, String]() + + fn __init__(out self, owned *headers: Header): + self._inner = Dict[String, String]() + for header in headers: + self[header[].key.lower()] = header[].value + + @always_inline + fn empty(self) -> Bool: + return len(self._inner) == 0 + + @always_inline + fn __contains__(self, key: String) -> Bool: + return key.lower() in self._inner + + @always_inline + fn __getitem__(self, key: String) raises -> String: + try: + return self._inner[key.lower()] + except: + raise Error("KeyError: Key not found in headers: " + key) + + @always_inline + fn get(self, key: String) -> Optional[String]: + return self._inner.get(key.lower()) + + @always_inline + fn __setitem__(mut self, key: String, value: String): + self._inner[key.lower()] = value + + fn content_length(self) -> Int: + try: + return Int(self[HeaderKey.CONTENT_LENGTH]) + except: + return 0 + + fn parse_raw(mut self, mut r: ByteReader) raises -> (String, String, String, List[String]): + var first_byte = r.peek() + if not first_byte: + raise Error("Headers.parse_raw: Failed to read first byte from response header") + + var first = r.read_word() + r.increment() + var second = r.read_word() + r.increment() + var third = r.read_line() + var cookies = List[String]() + + while not is_newline(r.peek()): + var key = r.read_until(BytesConstant.colon) + r.increment() + if is_space(r.peek()): + r.increment() + # TODO (bgreni): Handle possible trailing whitespace + var value = r.read_line() + var k = String(key).lower() + if k == HeaderKey.SET_COOKIE: + cookies.append(String(value)) + continue + + self._inner[k] = String(value) + return (String(first), String(second), String(third), cookies) + + fn write_to[T: Writer, //](self, mut writer: T): + for header in self._inner.items(): + write_header(writer, header[].key, header[].value) + + fn __str__(self) -> String: + return String.write(self) \ No newline at end of file From 198569abaa23c14bb196a90ed81796f5d079283a Mon Sep 17 00:00:00 2001 From: Val Date: Sun, 15 Jun 2025 16:21:49 +0200 Subject: [PATCH 07/12] revert strings.mojo --- lightbug_http/strings.mojo | 73 +------------------------------------- 1 file changed, 1 insertion(+), 72 deletions(-) diff --git a/lightbug_http/strings.mojo b/lightbug_http/strings.mojo index 02e105c..396fbdc 100644 --- a/lightbug_http/strings.mojo +++ b/lightbug_http/strings.mojo @@ -33,18 +33,6 @@ struct BytesConstant: alias DOUBLE_CRLF = bytes(lineBreak + lineBreak) -alias US_ASCII_MAX = 0x7F -alias ISO_8859_1_MAX = 0xFF - - -fn is_us_ascii_octet(b: UInt8) -> Bool: - return b <= US_ASCII_MAX - - -fn is_iso_8859_1_octet(b: UInt8) -> Bool: - return b <= ISO_8859_1_MAX - - fn to_string[T: Writable](value: T) -> String: return String.write(value) @@ -58,14 +46,6 @@ fn to_string(b: Span[UInt8]) -> String: return String(StringSlice(unsafe_from_utf8=b)) -fn to_string_rfc9112_safe[origin: Origin](b: Span[UInt8, origin]) -> String: - try: - var validated_span = validate_message_octets_iso_8859_1(b) - return String(StringSlice(unsafe_from_utf8=validated_span)) - except: - return percent_encode_octets(b) - - fn to_string(owned bytes: Bytes) -> String: """Creates a String from the provided List of bytes. If you do not transfer ownership of the List, the List will be copied. @@ -84,55 +64,4 @@ fn find_all(s: String, sub_str: String) -> List[Int]: while current_idx > -1: match_idxs.append(current_idx) current_idx = s.find(sub_str, start=current_idx + 1) - return match_idxs^ - - -fn percent_encode_octets[origin: Origin](data: Span[UInt8, origin]) -> String: - var result = String() - - for i in range(len(data)): - var b = data[i] - - if is_us_ascii_octet(b) and b >= 0x20 and b != 0x25: # Printable ASCII except % - result += chr(Int(b)) - else: - # Fix hex formatting: ensure proper zero-padding - var hex_val = hex(Int(b)).upper() - # Remove "0X" prefix if present - if hex_val.startswith("0X"): - hex_val = hex_val[2:] - # Ensure two-digit hex format - if len(hex_val) == 1: - result += "%0" + hex_val - else: - result += "%" + hex_val - - return result - -fn validate_message_octets_iso_8859_1[origin: Origin](data: Span[UInt8, origin]) raises -> Span[UInt8, origin]: - for i in range(len(data)): - var b = data[i] - - if is_iso_8859_1_octet(b): - if b >= 0x80: - if b >= 0xC0 and b <= 0xF7: - if i + 1 < len(data) and data[i + 1] == 0x0A: - raise Error( - "." - ) - elif b >= 0x80 and b <= 0xBF: - if i == 0 or (data[i - 1] < 0xC0): - if i + 1 < len(data) and data[i + 1] == 0x0A: - raise Error( - "." - ) - continue - - # This should never happen since is_iso_8859_1_octet covers 0x00-0xFF - raise Error( - "Invalid octet 0x" + hex(Int(b)) + - " at position " + String(i) + - ". HTTP messages must use encoding superset of US-ASCII." - ) - - return data \ No newline at end of file + return match_idxs^ \ No newline at end of file From 99553ad977a59e6f0b6c9e28d7560e31fee47212 Mon Sep 17 00:00:00 2001 From: Val Date: Sun, 15 Jun 2025 16:22:14 +0200 Subject: [PATCH 08/12] add back newlines --- lightbug_http/header.mojo | 2 +- lightbug_http/strings.mojo | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lightbug_http/header.mojo b/lightbug_http/header.mojo index 92c58ab..4014a56 100644 --- a/lightbug_http/header.mojo +++ b/lightbug_http/header.mojo @@ -115,4 +115,4 @@ struct Headers(Writable, Stringable): write_header(writer, header[].key, header[].value) fn __str__(self) -> String: - return String.write(self) \ No newline at end of file + return String.write(self) diff --git a/lightbug_http/strings.mojo b/lightbug_http/strings.mojo index 396fbdc..56c3883 100644 --- a/lightbug_http/strings.mojo +++ b/lightbug_http/strings.mojo @@ -64,4 +64,4 @@ fn find_all(s: String, sub_str: String) -> List[Int]: while current_idx > -1: match_idxs.append(current_idx) current_idx = s.find(sub_str, start=current_idx + 1) - return match_idxs^ \ No newline at end of file + return match_idxs^ From 31c764687d9bb4afd800484a226bf26b629d1767 Mon Sep 17 00:00:00 2001 From: Val Date: Sun, 15 Jun 2025 16:23:40 +0200 Subject: [PATCH 09/12] remove the test --- tests/rfc/test_rfc9112_section_2_2_2.mojo | 69 ----------------------- 1 file changed, 69 deletions(-) diff --git a/tests/rfc/test_rfc9112_section_2_2_2.mojo b/tests/rfc/test_rfc9112_section_2_2_2.mojo index b3c9993..50bf836 100644 --- a/tests/rfc/test_rfc9112_section_2_2_2.mojo +++ b/tests/rfc/test_rfc9112_section_2_2_2.mojo @@ -1,77 +1,8 @@ import testing -from memory import Span -from lightbug_http.strings import ( - validate_http_message_octets, - safe_to_string_rfc9112, - to_string_rfc9112_safe, - is_us_ascii_octet, - is_iso_8859_1_octet, - percent_encode_invalid_octets, -) -from lightbug_http.io.bytes import Bytes, ByteReader -from lightbug_http.http.request import HTTPRequest - - -def test_rfc9112_lf_security_vulnerability_prevention(): - """RFC 9112 Section 2.2-2: Prevent security vulnerabilities from LF (%x0A) in multibyte sequences.""" - print("Testing: LF security vulnerability prevention...") - - # Valid LF in HTTP context - var valid_http = "GET /test HTTP/1.1\r\nHost: test.com\r\n\r\n" - var valid_octets = valid_http.as_bytes() - - try: - var validated = validate_http_message_octets(Span(valid_octets)) - testing.assert_equal(len(validated), len(valid_octets)) - except e: - testing.assert_true(False, "Valid HTTP message should not raise error: " + String(e)) - - # Invalid multibyte sequence containing LF - var malicious_bytes = List[UInt8]() - malicious_bytes.extend("GET /".as_bytes()) - malicious_bytes.append(0xC0) # Invalid UTF-8 start byte - malicious_bytes.append(0x0A) # LF embedded in multibyte sequence - malicious_bytes.append(0x80) # Continuation byte - malicious_bytes.extend(" HTTP/1.1\r\nHost: test.com\r\n\r\n".as_bytes()) - - var malicious_span = Span(malicious_bytes) - - try: - var validated = validate_http_message_octets(malicious_span) - testing.assert_true(False, "Should have rejected invalid multibyte sequence with embedded LF") - except e: - testing.assert_true(True, "Correctly rejected invalid sequence: " + String(e)) - - var safe_result = to_string_rfc9112_safe(malicious_span) - - testing.assert_true(safe_result.find("%") != -1, "Should percent-encode unsafe sequences") - - -def test_rfc9112_percent_encoding_fallback(): - """RFC 9112 Section 2.2-2: Test percent-encoding fallback for unsafe sequences.""" - print("Testing: Percent-encoding fallback for unsafe sequences...") - - var unsafe_bytes = List[UInt8]() - unsafe_bytes.append(0x00) # NULL byte - unsafe_bytes.append(0x0A) # LF - unsafe_bytes.append(0x0D) # CR - unsafe_bytes.append(0x25) # % (should be encoded) - unsafe_bytes.append(0xFF) # High byte - - var unsafe_span = Span(unsafe_bytes) - var encoded = percent_encode_invalid_octets(unsafe_span) - - testing.assert_true(encoded.find("%00") != -1, "Should encode NULL byte") - testing.assert_true(encoded.find("%0A") != -1, "Should encode LF") - testing.assert_true(encoded.find("%0D") != -1, "Should encode CR") - testing.assert_true(encoded.find("%25") != -1, "Should encode % character") - testing.assert_true(encoded.find("%FF") != -1, "Should encode high byte") def main(): print("🧪 Testing RFC 9112 Section 2.2-2: HTTP Message Parsing as Octets") - test_rfc9112_lf_security_vulnerability_prevention() - test_rfc9112_percent_encoding_fallback() print("\n✅ RFC 9112 Section 2.2-2 requirement fully verified") \ No newline at end of file From 6639e29c5b3b2ba8127598e1cc745caabf7f0c1b Mon Sep 17 00:00:00 2001 From: Val Date: Sun, 15 Jun 2025 20:09:23 +0200 Subject: [PATCH 10/12] switch parse raw to bytes --- lightbug_http/header.mojo | 75 ++++++++--- lightbug_http/http/response.mojo | 4 +- magic.lock | 181 +++++++++++++++++++++++++++ tests/lightbug_http/test_header.mojo | 6 +- 4 files changed, 246 insertions(+), 20 deletions(-) diff --git a/lightbug_http/header.mojo b/lightbug_http/header.mojo index 4014a56..4208d13 100644 --- a/lightbug_http/header.mojo +++ b/lightbug_http/header.mojo @@ -1,5 +1,5 @@ from collections import Dict, Optional -from lightbug_http.io.bytes import Bytes, ByteReader, ByteWriter, is_newline, is_space +from lightbug_http.io.bytes import Bytes, ByteReader, ByteWriter, is_newline, is_space, ByteView from lightbug_http.strings import BytesConstant from lightbug_http._logger import logger from lightbug_http.strings import rChar, nChar, lineBreak, to_string @@ -37,22 +37,56 @@ fn write_header[T: Writer](mut writer: T, key: String, value: String): writer.write(key + ": ", value, lineBreak) +fn bytes_equal_ignore_case(a: ByteView, b: String) -> Bool: + """Compare ByteView with String case-insensitively without creating intermediate strings.""" + if len(a) != len(b): + return False + + for i in range(len(a)): + var byte_a = a[i] + var byte_b = ord(b[i]) + + # Convert to lowercase for comparison + if byte_a >= ord('A') and byte_a <= ord('Z'): + byte_a = byte_a + 32 # Convert to lowercase + if byte_b >= ord('A') and byte_b <= ord('Z'): + byte_b = byte_b + 32 # Convert to lowercase + + if byte_a != byte_b: + return False + return True + + +fn bytes_to_lower_string(b: ByteView) -> String: + """Convert ByteView to lowercase String.""" + var result = Bytes() + for i in range(len(b)): + var byte_val = b[i] + if byte_val >= ord('A') and byte_val <= ord('Z'): + byte_val = byte_val + 32 # Convert to lowercase + result.append(byte_val) + return to_string(result^) + + @value -struct Headers(Writable, Stringable): +struct Headers[origin: Origin](Writable, Stringable): """Represents the header key/values in an http request/response. - Header keys are normalized to lowercase + Header keys are normalized to lowercase and stored as strings for efficient lookup, + while values are stored as bytes to comply with RFC requirements. """ - var _inner: Dict[String, String] + var _inner: Dict[String, Bytes] fn __init__(out self): - self._inner = Dict[String, String]() + self._inner = Dict[String, Bytes]() fn __init__(out self, owned *headers: Header): - self._inner = Dict[String, String]() + self._inner = Dict[String, Bytes]() for header in headers: - self[header[].key.lower()] = header[].value + var key_lower = header[].key.lower() + var value_bytes = Bytes(header[].value.as_bytes()) + self._inner[key_lower] = value_bytes @always_inline fn empty(self) -> Bool: @@ -65,17 +99,22 @@ struct Headers(Writable, Stringable): @always_inline fn __getitem__(self, key: String) raises -> String: try: - return self._inner[key.lower()] + var value_bytes = self._inner[key.lower()] + return to_string(value_bytes) except: raise Error("KeyError: Key not found in headers: " + key) @always_inline fn get(self, key: String) -> Optional[String]: - return self._inner.get(key.lower()) + var value_opt = self._inner.get(key.lower()) + if value_opt: + return to_string(value_opt.value()) + return None @always_inline fn __setitem__(mut self, key: String, value: String): - self._inner[key.lower()] = value + var value_bytes = Bytes(value.as_bytes()) + self._inner[key.lower()] = value_bytes fn content_length(self) -> Int: try: @@ -83,7 +122,7 @@ struct Headers(Writable, Stringable): except: return 0 - fn parse_raw(mut self, mut r: ByteReader) raises -> (String, String, String, List[String]): + fn parse_raw[origin: Origin](mut self, mut r: ByteReader[origin]) raises -> (ByteView[origin], ByteView[origin], ByteView[origin], List[String]): var first_byte = r.peek() if not first_byte: raise Error("Headers.parse_raw: Failed to read first byte from response header") @@ -102,17 +141,21 @@ struct Headers(Writable, Stringable): r.increment() # TODO (bgreni): Handle possible trailing whitespace var value = r.read_line() - var k = String(key).lower() - if k == HeaderKey.SET_COOKIE: + + if bytes_equal_ignore_case(key, HeaderKey.SET_COOKIE): cookies.append(String(value)) continue - self._inner[k] = String(value) - return (String(first), String(second), String(third), cookies) + var key_str = bytes_to_lower_string(key) + var value_bytes = value.to_bytes() + self._inner[key_str] = value_bytes + + return (first, second, third, cookies) fn write_to[T: Writer, //](self, mut writer: T): for header in self._inner.items(): - write_header(writer, header[].key, header[].value) + var value_str = to_string(header[].value) + write_header(writer, header[].key, value_str) fn __str__(self) -> String: return String.write(self) diff --git a/lightbug_http/http/response.mojo b/lightbug_http/http/response.mojo index c8cd2cb..3a98b69 100644 --- a/lightbug_http/http/response.mojo +++ b/lightbug_http/http/response.mojo @@ -46,7 +46,7 @@ struct HTTPResponse(Writable, Stringable): try: var properties = headers.parse_raw(reader) - protocol, status_code, status_text = properties[0], properties[1], properties[2] + protocol, status_code, status_text = String(properties[0]), String(properties[1]), String(properties[2]) cookies.from_headers(properties[3]) reader.skip_carriage_return() except e: @@ -76,7 +76,7 @@ struct HTTPResponse(Writable, Stringable): try: var properties = headers.parse_raw(reader) - protocol, status_code, status_text = properties[0], properties[1], properties[2] + protocol, status_code, status_text = String(properties[0]), String(properties[1]), String(properties[2]) cookies.from_headers(properties[3]) reader.skip_carriage_return() except e: diff --git a/magic.lock b/magic.lock index 35eb6bc..a7c0339 100644 --- a/magic.lock +++ b/magic.lock @@ -699,6 +699,187 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-arm64/zeromq-4.3.5-hc1bb282_7.conda - conda: https://conda.anaconda.org/conda-forge/noarch/zipp-3.21.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/zstandard-0.23.0-py312hea69d52_1.conda + rfc-tests: + channels: + - url: https://conda.anaconda.org/conda-forge/ + - url: https://conda.modular.com/max/ + - url: https://repo.prefix.dev/modular-community/ + packages: + linux-64: + - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h4bc722e_7.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.1.31-hbd8a1cb_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/click-8.1.8-pyh707e725_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/cpython-3.12.10-py312hd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-8.6.1-pyha770c72_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_client-8.6.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_core-5.7.2-pyh31011fe_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.3-h659f571_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.43-h712a8e2_4.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-31_h59b9bed_openblas.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-31_he106b2a_openblas.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20250104-pl5321h7949ede_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.0-h5888daf_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.6-h2dba641_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-14.2.0-h767d61c_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-14.2.0-h69a702a_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgfortran-14.2.0-h69a702a_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-14.2.0-hf1ad2bd_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgomp-14.2.0-h767d61c_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-31_h7ac8fdf_openblas.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.1-hb9d3cd8_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.29-pthreads_h94d23a6_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libsodium-1.0.20-h4ab18f5_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.49.1-hee588c1_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-14.2.0-h8f9b012_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-14.2.0-h4852527_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-hb9d3cd8_2.conda + - conda: https://conda.modular.com/max/noarch/max-25.3.0-release.conda + - conda: https://conda.modular.com/max/linux-64/max-core-25.3.0-release.conda + - conda: https://conda.modular.com/max/linux-64/max-python-25.3.0-release.conda + - conda: https://conda.modular.com/max/noarch/mblack-25.3.0-release.conda + - conda: https://conda.modular.com/max/noarch/mojo-jupyter-25.3.0-release.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mypy_extensions-1.0.0-pyha770c72_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h2d0b736_3.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/numpy-1.26.4-py312heda63a1_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/openssl-3.5.0-h7b32b05_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-25.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pathspec-0.12.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/platformdirs-4.3.7-pyh29332c3_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/python-3.12.10-h9e4cc4f_0_cpython.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0.post0-pyhff2d567_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-gil-3.12.10-hd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python_abi-3.12-7_cp312.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/pyzmq-26.4.0-py312hbf22597_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8c095d6_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/six-1.17.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/tornado-6.4.2-py312h66e93f0_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tqdm-4.67.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/traitlets-5.14.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.13.2-pyh29332c3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/zeromq-4.3.5-h3b0a872_7.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/zipp-3.21.0-pyhd8ed1ab_1.conda + linux-aarch64: + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/_openmp_mutex-4.5-2_gnu.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/bzip2-1.0.8-h68df207_7.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.1.31-hbd8a1cb_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/click-8.1.8-pyh707e725_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/cpython-3.12.10-py312hd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-8.6.1-pyha770c72_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_client-8.6.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_core-5.7.2-pyh31011fe_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/keyutils-1.6.1-h4e544f5_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/krb5-1.21.3-h50a48e9_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/ld_impl_linux-aarch64-2.43-h80caac9_4.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libblas-3.9.0-31_h1a9f1db_openblas.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libcblas-3.9.0-31_hab92f65_openblas.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libedit-3.1.20250104-pl5321h976ea20_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libexpat-2.7.0-h5ad3122_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libffi-3.4.6-he21f813_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-14.2.0-he277a41_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-ng-14.2.0-he9431aa_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgfortran-14.2.0-he9431aa_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgfortran5-14.2.0-hb6113d0_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libgomp-14.2.0-he277a41_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/liblapack-3.9.0-31_h411afd4_openblas.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/liblzma-5.8.1-h86ecc28_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libnsl-2.0.1-h31becfc_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libopenblas-0.3.29-pthreads_h9d3fd7e_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libsodium-1.0.20-h68df207_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libsqlite-3.49.1-h5eb1b54_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libstdcxx-14.2.0-h3f4de04_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libstdcxx-ng-14.2.0-hf1166c9_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libuuid-2.38.1-hb4cce97_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libxcrypt-4.4.36-h31becfc_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/libzlib-1.3.1-h86ecc28_2.conda + - conda: https://conda.modular.com/max/noarch/max-25.3.0-release.conda + - conda: https://conda.modular.com/max/linux-aarch64/max-core-25.3.0-release.conda + - conda: https://conda.modular.com/max/linux-aarch64/max-python-25.3.0-release.conda + - conda: https://conda.modular.com/max/noarch/mblack-25.3.0-release.conda + - conda: https://conda.modular.com/max/noarch/mojo-jupyter-25.3.0-release.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mypy_extensions-1.0.0-pyha770c72_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/ncurses-6.5-ha32ae93_3.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/numpy-1.26.4-py312h470d778_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/openssl-3.5.0-hd08dc88_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-25.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pathspec-0.12.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/platformdirs-4.3.7-pyh29332c3_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/python-3.12.10-h1683364_0_cpython.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0.post0-pyhff2d567_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-gil-3.12.10-hd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python_abi-3.12-7_cp312.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/pyzmq-26.4.0-py312h2427ae1_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/readline-8.2-h8382b9d_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/six-1.17.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/tk-8.6.13-h194ca79_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/tornado-6.4.2-py312h52516f5_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tqdm-4.67.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/traitlets-5.14.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.13.2-pyh29332c3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/zeromq-4.3.5-h5efb499_7.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/zipp-3.21.0-pyhd8ed1ab_1.conda + osx-arm64: + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/bzip2-1.0.8-h99b78c6_7.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.1.31-hbd8a1cb_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/click-8.1.8-pyh707e725_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/cpython-3.12.10-py312hd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-8.6.1-pyha770c72_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_client-8.6.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_core-5.7.2-pyh31011fe_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/krb5-1.21.3-h237132a_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libblas-3.9.0-31_h10e41b3_openblas.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcblas-3.9.0-31_hb3479ef_openblas.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcxx-20.1.3-ha82da77_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libedit-3.1.20250104-pl5321hafb1f1b_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libexpat-2.7.0-h286801f_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libffi-3.4.6-h1da3d7d_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libgfortran-14.2.0-heb5dd2a_105.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libgfortran5-14.2.0-h2c44a93_105.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/liblapack-3.9.0-31_hc9a63f6_openblas.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/liblzma-5.8.1-h39f12f2_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libopenblas-0.3.29-openmp_hf332438_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libsodium-1.0.20-h99b78c6_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libsqlite-3.49.1-h3f77e49_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libzlib-1.3.1-h8359307_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/llvm-openmp-20.1.3-hdb05f8b_0.conda + - conda: https://conda.modular.com/max/noarch/max-25.3.0-release.conda + - conda: https://conda.modular.com/max/osx-arm64/max-core-25.3.0-release.conda + - conda: https://conda.modular.com/max/osx-arm64/max-python-25.3.0-release.conda + - conda: https://conda.modular.com/max/noarch/mblack-25.3.0-release.conda + - conda: https://conda.modular.com/max/noarch/mojo-jupyter-25.3.0-release.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mypy_extensions-1.0.0-pyha770c72_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/ncurses-6.5-h5e97a16_3.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/numpy-1.26.4-py312h8442bc7_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/openssl-3.5.0-h81ee809_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-25.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pathspec-0.12.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/platformdirs-4.3.7-pyh29332c3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/python-3.12.10-hc22306f_0_cpython.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0.post0-pyhff2d567_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-gil-3.12.10-hd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python_abi-3.12-7_cp312.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pyzmq-26.4.0-py312hf4875e0_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/readline-8.2-h1d1bf99_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/six-1.17.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/tk-8.6.13-h5083fa2_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/tornado-6.4.2-py312hea69d52_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tqdm-4.67.1-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/traitlets-5.14.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.13.2-pyh29332c3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/zeromq-4.3.5-hc1bb282_7.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/zipp-3.21.0-pyhd8ed1ab_1.conda unit-tests: channels: - url: https://conda.anaconda.org/conda-forge/ diff --git a/tests/lightbug_http/test_header.mojo b/tests/lightbug_http/test_header.mojo index d790006..5d0f88e 100644 --- a/tests/lightbug_http/test_header.mojo +++ b/tests/lightbug_http/test_header.mojo @@ -21,7 +21,8 @@ def test_parse_request_header(): var protocol: String var uri: String var properties = header.parse_raw(reader) - method, uri, protocol = properties[0], properties[1], properties[2] + # Convert ByteView to String for comparison + method, uri, protocol = String(properties[0]), String(properties[1]), String(properties[2]) assert_equal(uri, "/index.html") assert_equal(protocol, "HTTP/1.1") assert_equal(method, "GET") @@ -40,7 +41,8 @@ def test_parse_response_header(): var status_text: String var reader = ByteReader(headers_str.as_bytes()) var properties = header.parse_raw(reader) - protocol, status_code, status_text = properties[0], properties[1], properties[2] + # Convert ByteView to String for comparison + protocol, status_code, status_text = String(properties[0]), String(properties[1]), String(properties[2]) assert_equal(protocol, "HTTP/1.1") assert_equal(status_code, "200") assert_equal(status_text, "OK") From 790ae034ea6fb696032566b9f88822b200c7bc6c Mon Sep 17 00:00:00 2001 From: Val Date: Sun, 15 Jun 2025 20:55:24 +0200 Subject: [PATCH 11/12] wip switching to byteview --- lightbug_http/header.mojo | 35 ++--------------------------- lightbug_http/http/request.mojo | 39 ++++++++++++++++++++------------- lightbug_http/io/bytes.mojo | 31 ++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 48 deletions(-) diff --git a/lightbug_http/header.mojo b/lightbug_http/header.mojo index 4208d13..c769694 100644 --- a/lightbug_http/header.mojo +++ b/lightbug_http/header.mojo @@ -1,5 +1,5 @@ from collections import Dict, Optional -from lightbug_http.io.bytes import Bytes, ByteReader, ByteWriter, is_newline, is_space, ByteView +from lightbug_http.io.bytes import Bytes, ByteReader, ByteWriter, is_newline, is_space, ByteView, bytes_equal_ignore_case, bytes_to_lower_string from lightbug_http.strings import BytesConstant from lightbug_http._logger import logger from lightbug_http.strings import rChar, nChar, lineBreak, to_string @@ -37,42 +37,11 @@ fn write_header[T: Writer](mut writer: T, key: String, value: String): writer.write(key + ": ", value, lineBreak) -fn bytes_equal_ignore_case(a: ByteView, b: String) -> Bool: - """Compare ByteView with String case-insensitively without creating intermediate strings.""" - if len(a) != len(b): - return False - - for i in range(len(a)): - var byte_a = a[i] - var byte_b = ord(b[i]) - - # Convert to lowercase for comparison - if byte_a >= ord('A') and byte_a <= ord('Z'): - byte_a = byte_a + 32 # Convert to lowercase - if byte_b >= ord('A') and byte_b <= ord('Z'): - byte_b = byte_b + 32 # Convert to lowercase - - if byte_a != byte_b: - return False - return True - - -fn bytes_to_lower_string(b: ByteView) -> String: - """Convert ByteView to lowercase String.""" - var result = Bytes() - for i in range(len(b)): - var byte_val = b[i] - if byte_val >= ord('A') and byte_val <= ord('Z'): - byte_val = byte_val + 32 # Convert to lowercase - result.append(byte_val) - return to_string(result^) - - @value struct Headers[origin: Origin](Writable, Stringable): """Represents the header key/values in an http request/response. - Header keys are normalized to lowercase and stored as strings for efficient lookup, + Header keys are normalized to lowercase and stored as strings, while values are stored as bytes to comply with RFC requirements. """ diff --git a/lightbug_http/http/request.mojo b/lightbug_http/http/request.mojo index f678d56..92f652f 100644 --- a/lightbug_http/http/request.mojo +++ b/lightbug_http/http/request.mojo @@ -1,5 +1,5 @@ from memory import Span -from lightbug_http.io.bytes import Bytes, bytes, ByteReader, ByteWriter +from lightbug_http.io.bytes import Bytes, bytes, ByteReader, ByteWriter, ByteView from lightbug_http.header import Headers, HeaderKey, Header, write_header from lightbug_http.cookie import RequestCookieJar from lightbug_http.uri import URI @@ -30,29 +30,30 @@ struct RequestMethod: alias options = RequestMethod("OPTIONS") -@value -struct HTTPRequest(Writable, Stringable): - var headers: Headers +struct HTTPRequest[origin: Origin](Writable, Stringable): + var headers: Headers[origin] var cookies: RequestCookieJar var uri: URI var body_raw: Bytes - var method: String - var protocol: String + var method: ByteView[origin] + var protocol: ByteView[origin] var server_is_tls: Bool var timeout: Duration @staticmethod - fn from_bytes(addr: String, max_body_size: Int, b: Span[Byte]) raises -> HTTPRequest: + fn from_bytes(addr: String, max_body_size: Int, b: Span[Byte]) raises -> HTTPRequest[origin]: var reader = ByteReader(b) - var headers = Headers() - var method: String - var protocol: String - var uri: String + var headers = Headers[origin]() + var method: ByteView[origin] + var protocol: ByteView[origin] + var uri: ByteView[origin] try: var rest = headers.parse_raw(reader) - method, uri, protocol = rest[0], rest[1], rest[2] + var method = rest[0] + var uri = rest[1] + var protocol = rest[2] except e: raise Error("HTTPRequest.from_bytes: Failed to parse request headers: " + String(e)) @@ -67,7 +68,7 @@ struct HTTPRequest(Writable, Stringable): raise Error("HTTPRequest.from_bytes: Request body too large.") var request = HTTPRequest( - URI.parse(addr + uri), headers=headers, method=method, protocol=protocol, cookies=cookies + URI.parse(addr + String(uri)), headers=headers, method=String(method), protocol=String(protocol), cookies=cookies ) if content_length > 0: @@ -82,7 +83,7 @@ struct HTTPRequest(Writable, Stringable): fn __init__( out self, uri: URI, - headers: Headers = Headers(), + headers: Headers[origin] = Headers[origin](), cookies: RequestCookieJar = RequestCookieJar(), method: String = "GET", protocol: String = strHttp11, @@ -92,7 +93,7 @@ struct HTTPRequest(Writable, Stringable): ): self.headers = headers self.cookies = cookies - self.method = method + self.method = ByteView(method.as_bytes()) self.protocol = protocol self.uri = uri self.body_raw = body @@ -108,6 +109,14 @@ struct HTTPRequest(Writable, Stringable): else: self.headers[HeaderKey.HOST] = uri.host + fn __copyinit__(out self, existing: HTTPRequest[origin]): + self.headers = existing.headers + self.cookies = existing.cookies + self.uri = existing.uri + self.body_raw = existing.body_raw + self.method = existing.method + self.protocol = existing.protocol + fn get_body(self) -> StringSlice[__origin_of(self.body_raw)]: return StringSlice(unsafe_from_utf8=Span(self.body_raw)) diff --git a/lightbug_http/io/bytes.mojo b/lightbug_http/io/bytes.mojo index 089634f..94caaba 100644 --- a/lightbug_http/io/bytes.mojo +++ b/lightbug_http/io/bytes.mojo @@ -26,6 +26,37 @@ fn is_space(b: Byte) -> Bool: return b == BytesConstant.whitespace +fn bytes_equal_ignore_case(a: ByteView, b: String) -> Bool: + """Compare ByteView with String case-insensitively without creating intermediate strings.""" + if len(a) != len(b): + return False + + for i in range(len(a)): + var byte_a = a[i] + var byte_b = ord(b[i]) + + # Convert to lowercase for comparison + if byte_a >= ord('A') and byte_a <= ord('Z'): + byte_a = byte_a + 32 # Convert to lowercase + if byte_b >= ord('A') and byte_b <= ord('Z'): + byte_b = byte_b + 32 # Convert to lowercase + + if byte_a != byte_b: + return False + return True + + +fn bytes_to_lower_string(b: ByteView) -> String: + """Convert ByteView to lowercase String.""" + var result = Bytes() + for i in range(len(b)): + var byte_val = b[i] + if byte_val >= ord('A') and byte_val <= ord('Z'): + byte_val = byte_val + 32 # Convert to lowercase + result.append(byte_val) + return to_string(result^) + + struct ByteWriter(Writer): var _inner: Bytes From 51552a5c1457b1bb027b90b2a475e227e3443952 Mon Sep 17 00:00:00 2001 From: Val Date: Sun, 15 Jun 2025 21:15:57 +0200 Subject: [PATCH 12/12] switch to bytes instead of byteview --- lightbug_http/http/request.mojo | 17 +++++++++-------- lightbug_http/strings.mojo | 4 ++++ 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/lightbug_http/http/request.mojo b/lightbug_http/http/request.mojo index 92f652f..b91c43b 100644 --- a/lightbug_http/http/request.mojo +++ b/lightbug_http/http/request.mojo @@ -14,6 +14,7 @@ from lightbug_http.strings import ( nChar, lineBreak, to_string, + to_bytes, ) @@ -36,8 +37,8 @@ struct HTTPRequest[origin: Origin](Writable, Stringable): var uri: URI var body_raw: Bytes - var method: ByteView[origin] - var protocol: ByteView[origin] + var method: Bytes + var protocol: Bytes var server_is_tls: Bool var timeout: Duration @@ -46,9 +47,9 @@ struct HTTPRequest[origin: Origin](Writable, Stringable): fn from_bytes(addr: String, max_body_size: Int, b: Span[Byte]) raises -> HTTPRequest[origin]: var reader = ByteReader(b) var headers = Headers[origin]() - var method: ByteView[origin] - var protocol: ByteView[origin] - var uri: ByteView[origin] + var method: Bytes + var protocol: Bytes + var uri: Bytes try: var rest = headers.parse_raw(reader) var method = rest[0] @@ -68,7 +69,7 @@ struct HTTPRequest[origin: Origin](Writable, Stringable): raise Error("HTTPRequest.from_bytes: Request body too large.") var request = HTTPRequest( - URI.parse(addr + String(uri)), headers=headers, method=String(method), protocol=String(protocol), cookies=cookies + URI.parse(addr + to_string(uri)), headers=headers, method=to_string(method), protocol=to_string(protocol), cookies=cookies ) if content_length > 0: @@ -93,8 +94,8 @@ struct HTTPRequest[origin: Origin](Writable, Stringable): ): self.headers = headers self.cookies = cookies - self.method = ByteView(method.as_bytes()) - self.protocol = protocol + self.method = to_bytes(method) + self.protocol = to_bytes(protocol) self.uri = uri self.body_raw = body self.server_is_tls = server_is_tls diff --git a/lightbug_http/strings.mojo b/lightbug_http/strings.mojo index 56c3883..5826b03 100644 --- a/lightbug_http/strings.mojo +++ b/lightbug_http/strings.mojo @@ -58,6 +58,10 @@ fn to_string(owned bytes: Bytes) -> String: return result^ +fn to_bytes(s: String) -> Bytes: + return Bytes(s.as_bytes()) + + fn find_all(s: String, sub_str: String) -> List[Int]: match_idxs = List[Int]() var current_idx: Int = s.find(sub_str)