Skip to content

Commit e4e9e21

Browse files
Greg Gutheg-k
authored andcommitted
fix bug 1621692
1 parent 0d88dd8 commit e4e9e21

File tree

2 files changed

+37
-3
lines changed

2 files changed

+37
-3
lines changed

bleach/html5lib_shim.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -533,7 +533,18 @@ def next_possible_entity(text):
533533

534534

535535
class BleachHTMLSerializer(HTMLSerializer):
536-
"""HTMLSerializer that undoes & -> & in attributes"""
536+
"""HTMLSerializer that undoes & -> & in attributes and sets
537+
escape_rcdata to True
538+
"""
539+
540+
# per the HTMLSerializer.__init__ docstring:
541+
#
542+
# Whether to escape characters that need to be
543+
# escaped within normal elements within rcdata elements such as
544+
# style.
545+
#
546+
escape_rcdata = True
547+
537548
def escape_base_amp(self, stoken):
538549
"""Escapes just bare & in HTML attribute values"""
539550
# First, undo escaping of &. We need to do this because html5lib's

tests/test_clean.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from bleach import clean
66
from bleach.html5lib_shim import Filter
77
from bleach.sanitizer import Cleaner
8-
8+
from bleach._vendor.html5lib.constants import rcdataElements
99

1010
def test_clean_idempotent():
1111
"""Make sure that applying the filter twice doesn't change anything."""
@@ -787,7 +787,7 @@ def test_nonexistent_namespace():
787787
(
788788
raw_tag,
789789
"<noscript><%s></noscript><img src=x onerror=alert(1) />" % raw_tag,
790-
"<noscript><%s></noscript>&lt;img src=x onerror=alert(1) /&gt;" % raw_tag,
790+
"<noscript>&lt;%s&gt;</noscript>&lt;img src=x onerror=alert(1) /&gt;" % raw_tag,
791791
)
792792
for raw_tag in _raw_tags
793793
],
@@ -797,6 +797,29 @@ def test_noscript_rawtag_(raw_tag, data, expected):
797797
assert clean(data, tags=["noscript", raw_tag]) == expected
798798

799799

800+
@pytest.mark.parametrize(
801+
"namespace_tag, rc_data_element_tag, data, expected",
802+
[
803+
(
804+
namespace_tag,
805+
rc_data_element_tag,
806+
"<%s><%s><img src=x onerror=alert(1)>" % (namespace_tag, rc_data_element_tag),
807+
"<%s><%s>&lt;img src=x onerror=alert(1)&gt;</%s></%s>" % (namespace_tag, rc_data_element_tag, rc_data_element_tag, namespace_tag),
808+
)
809+
for namespace_tag in ["math", "svg"]
810+
# https://dev.w3.org/html5/html-author/#rcdata-elements
811+
# https://html.spec.whatwg.org/index.html#parsing-html-fragments
812+
# in html5lib: 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', and 'noscript'
813+
for rc_data_element_tag in rcdataElements
814+
],
815+
)
816+
def test_namespace_rc_data_element_strip_false(namespace_tag, rc_data_element_tag, data, expected):
817+
# refs: bug 1621692 / GHSA-m6xf-fq7q-8743
818+
#
819+
# browsers will pull the img out of the namespace and rc data tag resulting in XSS
820+
assert clean(data, tags=[namespace_tag, rc_data_element_tag], strip=False) == expected
821+
822+
800823
def get_ids_and_tests():
801824
"""Retrieves regression tests from data/ directory
802825

0 commit comments

Comments
 (0)