Repository URL to install this package:
|
Version:
0.5.7 ▾
|
# coding=utf-8
import datetime
from decimal import Decimal
import re
import six
import unicodedata
_re_pattern = re.compile('[^\w\s-]', flags=re.U)
_re_pattern_allow_dots = re.compile('[^\.\w\s-]', flags=re.U)
_re_spaces = re.compile('[-\s]+', flags=re.U)
_PROTECTED_TYPES = six.integer_types + (type(None), float, Decimal,
datetime.datetime, datetime.date, datetime.time)
def is_protected_type(obj):
"""Determine if the object instance is of a protected type.
Objects of protected types are preserved as-is when passed to
force_text(strings_only=True).
"""
return isinstance(obj, _PROTECTED_TYPES)
def force_text(s, encoding='utf-8', strings_only=False, errors='strict'):
"""
Similar to smart_text, except that lazy instances are resolved to
strings, rather than kept as lazy objects.
If strings_only is True, don't convert (some) non-string-like objects.
"""
# Handle the common case first for performance reasons.
if issubclass(type(s), six.text_type):
return s
if strings_only and is_protected_type(s):
return s
try:
if not issubclass(type(s), six.string_types):
if six.PY3:
if isinstance(s, bytes):
s = six.text_type(s, encoding, errors)
else:
s = six.text_type(s)
elif hasattr(s, '__unicode__'):
s = six.text_type(s)
else:
s = six.text_type(bytes(s), encoding, errors)
else:
# Note: We use .decode() here, instead of six.text_type(s, encoding,
# errors), so that if s is a SafeBytes, it ends up being a
# SafeText at the end.
s = s.decode(encoding, errors)
except UnicodeDecodeError as e:
if not isinstance(s, Exception):
raise ValueError(s, *e.args)
else:
# If we get to here, the caller has passed in an Exception
# subclass populated with non-ASCII bytestring data without a
# working unicode method. Try to handle this without raising a
# further exception by individually forcing the exception args
# to unicode.
s = ' '.join(force_text(arg, encoding, strings_only, errors)
for arg in s)
return s
def slugify(value, allow_dots=False, allow_unicode=False):
"""
Converts to lowercase, removes non-word characters (alphanumerics and
underscores) and converts spaces to hyphens. Also strips leading and
trailing whitespace. Modified to optionally allow dots.
Adapted from Django 1.9
"""
if allow_dots:
pattern = _re_pattern_allow_dots
else:
pattern = _re_pattern
value = force_text(value)
if allow_unicode:
value = unicodedata.normalize('NFKC', value)
value = pattern.sub('', value).strip().lower()
return _re_spaces.sub('-', value)
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
value = pattern.sub('', value).strip().lower()
return _re_spaces.sub('-', value)