我能够使用基于这种方法的过滤器去除标签的内容:https ://bleach.readthedocs.io/en/latest/clean.html?highlight=strip#html5lib-filters-filters 。它确实<style></style>
在输出中留下了一个空白,但这是无害的。
from bleach.sanitizer import Cleaner
from bleach.html5lib_shim import Filter
class StyleTagFilter(Filter):
"""
https://bleach.readthedocs.io/en/latest/clean.html?highlight=strip#html5lib-filters-filters
"""
def __iter__(self):
in_style_tag = False
for token in Filter.__iter__(self):
if token["type"] == "StartTag" and token["name"] == "style":
in_style_tag = True
elif token["type"] == "EndTag":
in_style_tag = False
elif in_style_tag:
# If we are in a style tag, strip the contents
token["data"] = ""
yield token
# You must include "style" in the tags list
cleaner = Cleaner(tags=["div", "style"], strip=True, filters=[StyleTagFilter])
cleaned = cleaner.clean("<div><style>.some_style { font-weight: bold; }</style>Some text</div>")
assert cleaned == "<div><style></style>Some text</div>"