Repository URL to install this package:
# Various microbenchmarks comparing unicode and byte string performance
# Please keep this file both 2.x and 3.x compatible!
import timeit
import itertools
import operator
import re
import sys
import datetime
import optparse
VERSION = '2.0'
def p(*args):
sys.stdout.write(' '.join(str(s) for s in args) + '\n')
if sys.version_info >= (3,):
BYTES = bytes_from_str = lambda x: x.encode('ascii')
UNICODE = unicode_from_str = lambda x: x
else:
BYTES = bytes_from_str = lambda x: x
UNICODE = unicode_from_str = lambda x: x.decode('ascii')
class UnsupportedType(TypeError):
pass
p('stringbench v%s' % VERSION)
p(sys.version)
p(datetime.datetime.now())
REPEAT = 1
REPEAT = 3
#REPEAT = 7
if __name__ != "__main__":
raise SystemExit("Must run as main program")
parser = optparse.OptionParser()
parser.add_option("-R", "--skip-re", dest="skip_re",
action="store_true",
help="skip regular expression tests")
parser.add_option("-8", "--8-bit", dest="bytes_only",
action="store_true",
help="only do 8-bit string benchmarks")
parser.add_option("-u", "--unicode", dest="unicode_only",
action="store_true",
help="only do Unicode string benchmarks")
_RANGE_1000 = list(range(1000))
_RANGE_100 = list(range(100))
_RANGE_10 = list(range(10))
dups = {}
def bench(s, group, repeat_count):
def blah(f):
if f.__name__ in dups:
raise AssertionError("Multiple functions with same name: %r" %
(f.__name__,))
dups[f.__name__] = 1
f.comment = s
f.is_bench = True
f.group = group
f.repeat_count = repeat_count
return f
return blah
def uses_re(f):
f.uses_re = True
####### 'in' comparisons
@bench('"A" in "A"*1000', "early match, single character", 1000)
def in_test_quick_match_single_character(STR):
s1 = STR("A" * 1000)
s2 = STR("A")
for x in _RANGE_1000:
s2 in s1
@bench('"B" in "A"*1000', "no match, single character", 1000)
def in_test_no_match_single_character(STR):
s1 = STR("A" * 1000)
s2 = STR("B")
for x in _RANGE_1000:
s2 in s1
@bench('"AB" in "AB"*1000', "early match, two characters", 1000)
def in_test_quick_match_two_characters(STR):
s1 = STR("AB" * 1000)
s2 = STR("AB")
for x in _RANGE_1000:
s2 in s1
@bench('"BC" in "AB"*1000', "no match, two characters", 1000)
def in_test_no_match_two_character(STR):
s1 = STR("AB" * 1000)
s2 = STR("BC")
for x in _RANGE_1000:
s2 in s1
@bench('"BC" in ("AB"*300+"C")', "late match, two characters", 1000)
def in_test_slow_match_two_characters(STR):
s1 = STR("AB" * 300+"C")
s2 = STR("BC")
for x in _RANGE_1000:
s2 in s1
@bench('s="ABC"*33; (s+"E") in ((s+"D")*300+s+"E")',
"late match, 100 characters", 100)
def in_test_slow_match_100_characters(STR):
m = STR("ABC"*33)
d = STR("D")
e = STR("E")
s1 = (m+d)*300 + m+e
s2 = m+e
for x in _RANGE_100:
s2 in s1
# Try with regex
@uses_re
@bench('s="ABC"*33; re.compile(s+"D").search((s+"D")*300+s+"E")',
"late match, 100 characters", 100)
def re_test_slow_match_100_characters(STR):
m = STR("ABC"*33)
d = STR("D")
e = STR("E")
s1 = (m+d)*300 + m+e
s2 = m+e
pat = re.compile(s2)
search = pat.search
for x in _RANGE_100:
search(s1)
#### same tests as 'in' but use 'find'
@bench('("A"*1000).find("A")', "early match, single character", 1000)
def find_test_quick_match_single_character(STR):
s1 = STR("A" * 1000)
s2 = STR("A")
s1_find = s1.find
for x in _RANGE_1000:
s1_find(s2)
@bench('("A"*1000).find("B")', "no match, single character", 1000)
def find_test_no_match_single_character(STR):
s1 = STR("A" * 1000)
s2 = STR("B")
s1_find = s1.find
for x in _RANGE_1000:
s1_find(s2)
@bench('("AB"*1000).find("AB")', "early match, two characters", 1000)
def find_test_quick_match_two_characters(STR):
s1 = STR("AB" * 1000)
s2 = STR("AB")
s1_find = s1.find
for x in _RANGE_1000:
s1_find(s2)
@bench('("AB"*1000).find("BC")', "no match, two characters", 1000)
def find_test_no_match_two_character(STR):
s1 = STR("AB" * 1000)
s2 = STR("BC")
s1_find = s1.find
for x in _RANGE_1000:
s1_find(s2)
@bench('("AB"*1000).find("CA")', "no match, two characters", 1000)
def find_test_no_match_two_character_bis(STR):
s1 = STR("AB" * 1000)
s2 = STR("CA")
s1_find = s1.find
for x in _RANGE_1000:
s1_find(s2)
@bench('("AB"*300+"C").find("BC")', "late match, two characters", 1000)
def find_test_slow_match_two_characters(STR):
s1 = STR("AB" * 300+"C")
s2 = STR("BC")
s1_find = s1.find
for x in _RANGE_1000:
s1_find(s2)
@bench('("AB"*300+"CA").find("CA")', "late match, two characters", 1000)
def find_test_slow_match_two_characters_bis(STR):
s1 = STR("AB" * 300+"CA")
s2 = STR("CA")
s1_find = s1.find
for x in _RANGE_1000:
s1_find(s2)
@bench('s="ABC"*33; ((s+"D")*500+s+"E").find(s+"E")',
"late match, 100 characters", 100)
def find_test_slow_match_100_characters(STR):
m = STR("ABC"*33)
d = STR("D")
e = STR("E")
s1 = (m+d)*500 + m+e
s2 = m+e
s1_find = s1.find
for x in _RANGE_100:
s1_find(s2)
@bench('s="ABC"*33; ((s+"D")*500+"E"+s).find("E"+s)',
"late match, 100 characters", 100)
def find_test_slow_match_100_characters_bis(STR):
m = STR("ABC"*33)
d = STR("D")
e = STR("E")
s1 = (m+d)*500 + e+m
s2 = e+m
s1_find = s1.find
for x in _RANGE_100:
s1_find(s2)
#### Same tests for 'rfind'
@bench('("A"*1000).rfind("A")', "early match, single character", 1000)
def rfind_test_quick_match_single_character(STR):
s1 = STR("A" * 1000)
s2 = STR("A")
s1_rfind = s1.rfind
for x in _RANGE_1000:
s1_rfind(s2)
@bench('("A"*1000).rfind("B")', "no match, single character", 1000)
def rfind_test_no_match_single_character(STR):
s1 = STR("A" * 1000)
s2 = STR("B")
s1_rfind = s1.rfind
for x in _RANGE_1000:
s1_rfind(s2)
@bench('("AB"*1000).rfind("AB")', "early match, two characters", 1000)
def rfind_test_quick_match_two_characters(STR):
s1 = STR("AB" * 1000)
s2 = STR("AB")
s1_rfind = s1.rfind
for x in _RANGE_1000:
s1_rfind(s2)
@bench('("AB"*1000).rfind("BC")', "no match, two characters", 1000)
def rfind_test_no_match_two_character(STR):
s1 = STR("AB" * 1000)
s2 = STR("BC")
s1_rfind = s1.rfind
for x in _RANGE_1000:
s1_rfind(s2)
@bench('("AB"*1000).rfind("CA")', "no match, two characters", 1000)
def rfind_test_no_match_two_character_bis(STR):
s1 = STR("AB" * 1000)
s2 = STR("CA")
s1_rfind = s1.rfind
for x in _RANGE_1000:
s1_rfind(s2)
@bench('("C"+"AB"*300).rfind("CA")', "late match, two characters", 1000)
def rfind_test_slow_match_two_characters(STR):
s1 = STR("C" + "AB" * 300)
s2 = STR("CA")
s1_rfind = s1.rfind
for x in _RANGE_1000:
s1_rfind(s2)
@bench('("BC"+"AB"*300).rfind("BC")', "late match, two characters", 1000)
def rfind_test_slow_match_two_characters_bis(STR):
s1 = STR("BC" + "AB" * 300)
s2 = STR("BC")
s1_rfind = s1.rfind
for x in _RANGE_1000:
s1_rfind(s2)
@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rfind("E"+s)',
"late match, 100 characters", 100)
def rfind_test_slow_match_100_characters(STR):
m = STR("ABC"*33)
d = STR("D")
e = STR("E")
s1 = e+m + (d+m)*500
s2 = e+m
s1_rfind = s1.rfind
for x in _RANGE_100:
s1_rfind(s2)
@bench('s="ABC"*33; (s+"E"+("D"+s)*500).rfind(s+"E")',
"late match, 100 characters", 100)
def rfind_test_slow_match_100_characters_bis(STR):
m = STR("ABC"*33)
d = STR("D")
e = STR("E")
s1 = m+e + (d+m)*500
s2 = m+e
s1_rfind = s1.rfind
for x in _RANGE_100:
s1_rfind(s2)
#### Now with index.
# Skip the ones which fail because that would include exception overhead.
@bench('("A"*1000).index("A")', "early match, single character", 1000)
def index_test_quick_match_single_character(STR):
s1 = STR("A" * 1000)
s2 = STR("A")
s1_index = s1.index
for x in _RANGE_1000:
s1_index(s2)
@bench('("AB"*1000).index("AB")', "early match, two characters", 1000)
def index_test_quick_match_two_characters(STR):
s1 = STR("AB" * 1000)
s2 = STR("AB")
s1_index = s1.index
for x in _RANGE_1000:
s1_index(s2)
@bench('("AB"*300+"C").index("BC")', "late match, two characters", 1000)
def index_test_slow_match_two_characters(STR):
s1 = STR("AB" * 300+"C")
s2 = STR("BC")
s1_index = s1.index
for x in _RANGE_1000:
s1_index(s2)
@bench('s="ABC"*33; ((s+"D")*500+s+"E").index(s+"E")',
"late match, 100 characters", 100)
def index_test_slow_match_100_characters(STR):
m = STR("ABC"*33)
d = STR("D")
e = STR("E")
s1 = (m+d)*500 + m+e
s2 = m+e
s1_index = s1.index
for x in _RANGE_100:
s1_index(s2)
Loading ...