diff --git a/httptools/parser/url_parser.pyx b/httptools/parser/url_parser.pyx index 49908f3..e855b5f 100644 --- a/httptools/parser/url_parser.pyx +++ b/httptools/parser/url_parser.pyx @@ -12,6 +12,8 @@ from . cimport url_cparser as uparser __all__ = ('parse_url',) +DEF MAX_URL_LENGTH = (1 << 16) - 1 + @cython.freelist(250) cdef class URL: cdef readonly bytes schema @@ -63,6 +65,14 @@ def parse_url(url): PyObject_GetBuffer(url, &py_buf, PyBUF_SIMPLE) try: + if py_buf.len > MAX_URL_LENGTH: + # http_parser stores URL field offsets/lengths as uint16_t, + # so URLs longer than this will cause silent truncation. + # See https://github.com/MagicStack/httptools/issues/142 + raise HttpParserInvalidURLError( + "url is too long: url length of {} bytes exceeds the " + "maximum of {} bytes".format(py_buf.len, MAX_URL_LENGTH)) + buf_data = py_buf.buf res = uparser.http_parser_parse_url(buf_data, py_buf.len, 0, parsed) diff --git a/tests/test_parser.py b/tests/test_parser.py index 86584c3..96dc6f0 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -686,3 +686,9 @@ def test_parser_url_9(self): def test_parser_url_10(self): with self.assertRaisesRegex(TypeError, 'a bytes-like object'): self.parse('dsf://aaa') + + def test_parser_url_too_long(self): + url = b'http://h/' + b'a' * 65535 + with self.assertRaisesRegex(httptools.HttpParserInvalidURLError, + 'url is too long'): + self.parse(url)