Skip to content

Commit d5e166b

Browse files
committed
replace html5lib with bleach
The sanitizer portion of html5lib will be deprecated, and html5lib is recommending replacing that functionality with bleach
1 parent 8a5053e commit d5e166b

File tree

4 files changed

+8
-17
lines changed

4 files changed

+8
-17
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ classifiers = [
2626
]
2727
dynamic = ["version",]
2828
dependencies = [
29-
'html5lib>=1.0.1',
29+
'bleach',
3030
'regex>1.0; implementation_name != "pypy"',
3131
]
3232
requires-python = '>=3.8'

tests/test_textile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def test_sanitize():
117117
assert result == expect
118118

119119
test = """<p style="width: expression(alert('evil'));">a paragraph of evil text</p>"""
120-
result = '<p style="">a paragraph of evil text</p>'
120+
result = '<p>a paragraph of evil text</p>'
121121
expect = textile.Textile().parse(test, sanitize=True)
122122
assert result == expect
123123

textile/core.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@
2121
from urllib.parse import urlparse, urlsplit, urlunsplit, quote, unquote
2222
from collections import OrderedDict
2323

24-
from textile.tools import sanitizer, imagesize
24+
from bleach import clean
25+
26+
from textile.tools import imagesize
2527
from textile.regex_strings import (align_re_s, cls_re_s, pnct_re_s,
2628
regex_snippets, syms_re_s, table_span_re_s)
2729
from textile.utils import (decode_high, encode_high, encode_html, generate_tag,
@@ -236,10 +238,10 @@ def parse(self, text, rel=None, sanitize=False):
236238

237239
if self.block_tags:
238240
if self.lite:
239-
self.blocktag_whitelist = ['bq', 'p']
241+
self.blocktag_allowlist = ['bq', 'p', 'br']
240242
text = self.block(text)
241243
else:
242-
self.blocktag_whitelist = ['bq', 'p', 'bc', 'notextile',
244+
self.blocktag_allowlist = ['bq', 'p', 'br', 'bc', 'notextile',
243245
'pre', 'h[1-6]', 'fn{0}+'.format(
244246
regex_snippets['digit']), '###']
245247
text = self.block(text)
@@ -263,7 +265,7 @@ def parse(self, text, rel=None, sanitize=False):
263265
text = text.replace('{0}:glyph:'.format(self.uid), '')
264266

265267
if sanitize:
266-
text = sanitizer.sanitize(text)
268+
text = clean(text, tags=self.blocktag_allowlist)
267269

268270
text = self.retrieveTags(text)
269271
text = self.retrieveURLs(text)

textile/tools/sanitizer.py

Lines changed: 0 additions & 11 deletions
This file was deleted.

0 commit comments

Comments
 (0)