Skip to content

Commit 2e7e5c1

Browse files
committed
unescaped latin-1 permitted in URI, but no ctrls
* reject ascii controls * reject ascii <>{}`^|\ * permit latin-1, including nbsp+shy * permit " (quotation mark)
1 parent 903792f commit 2e7e5c1

File tree

7 files changed

+67
-0
lines changed

7 files changed

+67
-0
lines changed

gunicorn/http/message.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,35 @@
2929
VERSION_RE = re.compile(r"HTTP/(\d)\.(\d)")
3030
RFC9110_5_5_INVALID_AND_DANGEROUS = re.compile(r"[\0\r\n]")
3131

32+
RFC3986_2_URI_SPECIALS = (
33+
# gen-delims
34+
":/?#[]@"
35+
# sub-delims
36+
"!$&'()*+,;="
37+
# for unreserved
38+
"-._~"
39+
# for pct-encoded
40+
"%"
41+
# notably absent from this list (must be pct-encoded):
42+
# \N{SPACE}
43+
# <> and {}
44+
# ` a.k.a \N{GRAVE ACCENT}
45+
# ^ a.k.a \N{CIRCUMFLEX ACCENT}
46+
# | a.k.a \N{VERTICAL LINE}
47+
# backslash a.k.a \N{REVERSE SOLIDUS}
48+
)
49+
GUNICORN_NONSTANDARD_URI_CHARACTERS = (
50+
"\N{QUOTATION MARK}"
51+
# used in tests/requests/valid/027.http (utf8 decoded as latin-1)
52+
# "\N{LATIN CAPITAL LETTER A WITH TILDE}"
53+
# "\N{NO-BREAK SPACE}"
54+
# includes the above - all latin-1 upper bits
55+
# also includes "\N{SOFT HYPHEN}"
56+
+ bytes(range(0xA0, 0xff + 1)).decode("latin-1")
57+
)
58+
GUNICORN_URI_SPECIALS = RFC3986_2_URI_SPECIALS + GUNICORN_NONSTANDARD_URI_CHARACTERS
59+
URI_CHARACTERS_RE = re.compile(r"[%s0-9a-zA-Z]+" % (re.escape(GUNICORN_URI_SPECIALS)))
60+
3261

3362
class Message:
3463
def __init__(self, cfg, unreader, peer_addr):
@@ -425,6 +454,7 @@ def parse_request_line(self, line_bytes):
425454
if self.cfg.casefold_http_method:
426455
self.method = self.method.upper()
427456

457+
# https://datatracker.ietf.org/doc/html/rfc9112#section-3.2
428458
# URI
429459
self.uri = bits[1]
430460

@@ -438,6 +468,9 @@ def parse_request_line(self, line_bytes):
438468
# => manually reject one always invalid URI: empty
439469
if len(self.uri) == 0:
440470
raise InvalidRequestLine(bytes_to_str(line_bytes))
471+
# => reject URI exceeding characters listed in RFC 3986
472+
if not URI_CHARACTERS_RE.fullmatch(self.uri):
473+
raise InvalidRequestLine(bytes_to_str(line_bytes))
441474

442475
try:
443476
parts = split_request_uri(self.uri)
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
GET /one\0/two HTTP/1.1\r\n
2+
Content-Length: 3\r\n
3+
\r\n
4+
WOW

tests/requests/invalid/nonascii_05.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from gunicorn.http.errors import InvalidRequestLine
2+
3+
request = InvalidRequestLine

tests/requests/valid/041.http

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
GET scheme+ext://user+ext:password!@[::1]:8000/path?query#frag HTTP/1.1\r\n
2+
Host: localhost\r\n
3+
CONTENT-LENGTH: 3\r\n
4+
\r\n
5+
odd

tests/requests/valid/041.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
request = {
2+
"method": "GET",
3+
"uri": uri("scheme+ext://user+ext:password!@[::1]:8000/path?query#frag"),
4+
"version": (1, 1),
5+
"headers": [
6+
("HOST", "localhost"),
7+
("CONTENT-LENGTH", "3"),
8+
],
9+
"body": b'odd'
10+
}

tests/requests/valid/042.http

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
OPTIONS * HTTP/1.1\r\n
2+
Content-Length: 0\r\n
3+
\r\n

tests/requests/valid/042.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
request = {
2+
"method": "OPTIONS",
3+
"uri": uri("*"),
4+
"version": (1, 1),
5+
"headers": [
6+
("CONTENT-LENGTH", "0"),
7+
],
8+
"body": b''
9+
}

0 commit comments

Comments
 (0)