Skip to content

Commit 7285644

Browse files
adamchainzsarahboyce
authored andcommitted
[5.0.x] Fixed CVE-2024-38875 -- Mitigated potential DoS in urlize and urlizetrunc template filters.
Thank you to Elias Myllymäki for the report. Co-authored-by: Sarah Boyce <42296566+sarahboyce@users.noreply.github.com>
1 parent 8303400 commit 7285644

File tree

4 files changed

+86
-24
lines changed

4 files changed

+86
-24
lines changed

django/utils/html.py

+66-24
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from django.utils.deprecation import RemovedInDjango60Warning
1111
from django.utils.encoding import punycode
12-
from django.utils.functional import Promise, keep_lazy, keep_lazy_text
12+
from django.utils.functional import Promise, cached_property, keep_lazy, keep_lazy_text
1313
from django.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS
1414
from django.utils.regex_helper import _lazy_re_compile
1515
from django.utils.safestring import SafeData, SafeString, mark_safe
@@ -255,6 +255,16 @@ def unquote_quote(segment):
255255
return urlunsplit((scheme, netloc, path, query, fragment))
256256

257257

258+
class CountsDict(dict):
259+
def __init__(self, *args, word, **kwargs):
260+
super().__init__(*args, *kwargs)
261+
self.word = word
262+
263+
def __missing__(self, key):
264+
self[key] = self.word.count(key)
265+
return self[key]
266+
267+
258268
class Urlizer:
259269
"""
260270
Convert any URLs in text into clickable links.
@@ -360,40 +370,72 @@ def trim_url(self, x, *, limit):
360370
return x
361371
return "%s…" % x[: max(0, limit - 1)]
362372

373+
@cached_property
374+
def wrapping_punctuation_openings(self):
375+
return "".join(dict(self.wrapping_punctuation).keys())
376+
377+
@cached_property
378+
def trailing_punctuation_chars_no_semicolon(self):
379+
return self.trailing_punctuation_chars.replace(";", "")
380+
381+
@cached_property
382+
def trailing_punctuation_chars_has_semicolon(self):
383+
return ";" in self.trailing_punctuation_chars
384+
363385
def trim_punctuation(self, word):
364386
"""
365387
Trim trailing and wrapping punctuation from `word`. Return the items of
366388
the new state.
367389
"""
368-
lead, middle, trail = "", word, ""
390+
# Strip all opening wrapping punctuation.
391+
middle = word.lstrip(self.wrapping_punctuation_openings)
392+
lead = word[: len(word) - len(middle)]
393+
trail = ""
394+
369395
# Continue trimming until middle remains unchanged.
370396
trimmed_something = True
371-
while trimmed_something:
397+
counts = CountsDict(word=middle)
398+
while trimmed_something and middle:
372399
trimmed_something = False
373400
# Trim wrapping punctuation.
374401
for opening, closing in self.wrapping_punctuation:
375-
if middle.startswith(opening):
376-
middle = middle.removeprefix(opening)
377-
lead += opening
378-
trimmed_something = True
379-
# Keep parentheses at the end only if they're balanced.
380-
if (
381-
middle.endswith(closing)
382-
and middle.count(closing) == middle.count(opening) + 1
383-
):
384-
middle = middle.removesuffix(closing)
385-
trail = closing + trail
386-
trimmed_something = True
387-
# Trim trailing punctuation (after trimming wrapping punctuation,
388-
# as encoded entities contain ';'). Unescape entities to avoid
389-
# breaking them by removing ';'.
390-
middle_unescaped = html.unescape(middle)
391-
stripped = middle_unescaped.rstrip(self.trailing_punctuation_chars)
392-
if middle_unescaped != stripped:
393-
punctuation_count = len(middle_unescaped) - len(stripped)
394-
trail = middle[-punctuation_count:] + trail
395-
middle = middle[:-punctuation_count]
402+
if counts[opening] < counts[closing]:
403+
rstripped = middle.rstrip(closing)
404+
if rstripped != middle:
405+
strip = counts[closing] - counts[opening]
406+
trail = middle[-strip:]
407+
middle = middle[:-strip]
408+
trimmed_something = True
409+
counts[closing] -= strip
410+
411+
rstripped = middle.rstrip(self.trailing_punctuation_chars_no_semicolon)
412+
if rstripped != middle:
413+
trail = middle[len(rstripped) :] + trail
414+
middle = rstripped
396415
trimmed_something = True
416+
417+
if self.trailing_punctuation_chars_has_semicolon and middle.endswith(";"):
418+
# Only strip if not part of an HTML entity.
419+
amp = middle.rfind("&")
420+
if amp == -1:
421+
can_strip = True
422+
else:
423+
potential_entity = middle[amp:]
424+
escaped = html.unescape(potential_entity)
425+
can_strip = (escaped == potential_entity) or escaped.endswith(";")
426+
427+
if can_strip:
428+
rstripped = middle.rstrip(";")
429+
amount_stripped = len(middle) - len(rstripped)
430+
if amp > -1 and amount_stripped > 1:
431+
# Leave a trailing semicolon as might be an entity.
432+
trail = middle[len(rstripped) + 1 :] + trail
433+
middle = rstripped + ";"
434+
else:
435+
trail = middle[len(rstripped) :] + trail
436+
middle = rstripped
437+
trimmed_something = True
438+
397439
return lead, middle, trail
398440

399441
@staticmethod

docs/releases/4.2.14.txt

+6
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,9 @@ Django 4.2.14 release notes
77
Django 4.2.14 fixes two security issues with severity "moderate" and two
88
security issues with severity "low" in 4.2.13.
99

10+
CVE-2024-38875: Potential denial-of-service vulnerability in ``django.utils.html.urlize()``
11+
===========================================================================================
12+
13+
:tfilter:`urlize` and :tfilter:`urlizetrunc` were subject to a potential
14+
denial-of-service attack via certain inputs with a very large number of
15+
brackets.

docs/releases/5.0.7.txt

+7
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,13 @@ Django 5.0.7 release notes
77
Django 5.0.7 fixes two security issues with severity "moderate", two security
88
issues with severity "low", and several bugs in 5.0.6.
99

10+
CVE-2024-38875: Potential denial-of-service vulnerability in ``django.utils.html.urlize()``
11+
===========================================================================================
12+
13+
:tfilter:`urlize` and :tfilter:`urlizetrunc` were subject to a potential
14+
denial-of-service attack via certain inputs with a very large number of
15+
brackets.
16+
1017
Bugfixes
1118
========
1219

tests/utils_tests/test_html.py

+7
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,13 @@ def test_urlize_unchanged_inputs(self):
352352
"foo@.example.com",
353353
"foo@localhost",
354354
"foo@localhost.",
355+
# trim_punctuation catastrophic tests
356+
"(" * 100_000 + ":" + ")" * 100_000,
357+
"(" * 100_000 + "&:" + ")" * 100_000,
358+
"([" * 100_000 + ":" + "])" * 100_000,
359+
"[(" * 100_000 + ":" + ")]" * 100_000,
360+
"([[" * 100_000 + ":" + "]])" * 100_000,
361+
"&:" + ";" * 100_000,
355362
)
356363
for value in tests:
357364
with self.subTest(value=value):

0 commit comments

Comments
 (0)