import six
import html5lib
from import eq_
import bleach
from import in_
def test_empty():
eq_('', bleach.clean(''))
def test_nbsp():
if six.PY3:
expected = '\xa0test string\xa0'
expected = six.u('\\xa0test string\\xa0')
eq_(expected, bleach.clean(' test string '))
def test_comments_only():
comment = '<!-- this is a comment -->'
open_comment = '<!-- this is an open comment'
eq_('', bleach.clean(comment))
eq_('', bleach.clean(open_comment))
eq_(comment, bleach.clean(comment, strip_comments=False))
eq_('{0!s}-->'.format(open_comment), bleach.clean(open_comment,
def test_with_comments():
html = '<!-- comment -->Just text'
eq_('Just text', bleach.clean(html))
eq_(html, bleach.clean(html, strip_comments=False))
def test_no_html():
eq_('no html string', bleach.clean('no html string'))
def test_allowed_html():
eq_('an <strong>allowed</strong> tag',
bleach.clean('an <strong>allowed</strong> tag'))
eq_('another <em>good</em> tag',
bleach.clean('another <em>good</em> tag'))
def test_bad_html():
eq_('a <em>fixed tag</em>',
bleach.clean('a <em>fixed tag'))
def test_function_arguments():
TAGS = ['span', 'br']
ATTRS = {'span': ['style']}
eq_('a <br><span style="">test</span>',
bleach.clean('a <br/><span style="color:red">test</span>',
tags=TAGS, attributes=ATTRS))
def test_named_arguments():
ATTRS = {'a': ['rel', 'href']}
s = ('<a href="" rel="alternate"></a>',
'<a rel="alternate" href=""></a>')
eq_('<a href=""></a>', bleach.clean(s[0]))
in_(s, bleach.clean(s[0], attributes=ATTRS))
def test_disallowed_html():
eq_('a <script>safe()</script> test',
bleach.clean('a <script>safe()</script> test'))
eq_('a <style>body{}</style> test',
bleach.clean('a <style>body{}</style> test'))
def test_bad_href():
eq_('<em>no link</em>',
bleach.clean('<em href="fail">no link</em>'))
def test_bare_entities():
eq_('an & entity', bleach.clean('an & entity'))
eq_('an < entity', bleach.clean('an < entity'))
eq_('tag < <em>and</em> entity',
bleach.clean('tag < <em>and</em> entity'))
eq_('&', bleach.clean('&'))
def test_escaped_entities():
s = '<em>strong</em>'
eq_(s, bleach.clean(s))
def test_serializer():
s = '<table></table>'
eq_(s, bleach.clean(s, tags=['table']))
eq_('test<table></table>', bleach.linkify('<table>test</table>'))
eq_('<p>test</p>', bleach.clean('<p>test</p>', tags=['p']))
def test_no_href_links():
s = '<a name="anchor">x</a>'
eq_(s, bleach.linkify(s))
def test_weird_strings():
s = '</3'
eq_(bleach.clean(s), '')
def test_xml_render():
parser = html5lib.HTMLParser()
eq_(bleach._render(parser.parseFragment('')), '')
def test_stripping():
eq_('a test <em>with</em> <b>html</b> tags',
bleach.clean('a test <em>with</em> <b>html</b> tags', strip=True))
eq_('a test <em>with</em> <b>html</b> tags',
bleach.clean('a test <em>with</em> <img src=""> '
'<b>html</b> tags', strip=True))
s = '<p><a href="">link text</a></p>'
eq_('<p>link text</p>', bleach.clean(s, tags=['p'], strip=True))
s = '<p><span>multiply <span>nested <span>text</span></span></span></p>'
eq_('<p>multiply nested text</p>', bleach.clean(s, tags=['p'], strip=True))
s = ('<p><a href=""><img src="">'
eq_('<p><a href=""></a></p>',
bleach.clean(s, tags=['p', 'a'], strip=True))
def test_allowed_styles():
ATTR = ['style']
STYLE = ['color']
blank = '<b style=""></b>'
s = '<b style="color: blue;"></b>'
eq_(blank, bleach.clean('<b style="top:0"></b>', attributes=ATTR))
eq_(s, bleach.clean(s, attributes=ATTR, styles=STYLE))
eq_(s, bleach.clean('<b style="top: 0; color: blue;"></b>',
attributes=ATTR, styles=STYLE))
def test_idempotent():
"""Make sure that applying the filter twice doesn't change anything."""
dirty = '<span>invalid & </span> < extra<em>'
clean = bleach.clean(dirty)
eq_(clean, bleach.clean(clean))
linked = bleach.linkify(dirty)
eq_(linked, bleach.linkify(linked))
def test_rel_already_there():
"""Make sure rel attribute is updated not replaced"""
linked = ('Click <a href="" rel="tooltip">'
link_good = (('Click <a href="" rel="tooltip nofollow">'
('Click <a rel="tooltip nofollow" href="">'
in_(link_good, bleach.linkify(linked))
in_(link_good, bleach.linkify(link_good[0]))
def test_lowercase_html():
"""We should output lowercase HTML."""
dirty = '<EM CLASS="FOO">BAR</EM>'
clean = '<em class="FOO">BAR</em>'
eq_(clean, bleach.clean(dirty, attributes=['class']))
def test_wildcard_attributes():
ATTR = {
'*': ['id'],
'img': ['src'],
TAG = ['img', 'em']
dirty = ('both <em id="foo" style="color: black">can</em> have '
'<img id="bar" src="foo"/>')
clean = ('both <em id="foo">can</em> have <img src="foo" id="bar">',
'both <em id="foo">can</em> have <img id="bar" src="foo">')
in_(clean, bleach.clean(dirty, tags=TAG, attributes=ATTR))
def test_sarcasm():
"""Jokes should crash.<sarcasm/>"""
dirty = 'Yeah right <sarcasm/>'
clean = 'Yeah right <sarcasm/>'
eq_(clean, bleach.clean(dirty))