Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

agriconnect / python3.8-examples   deb

Repository URL to install this package:

/ usr / share / doc / python3.8 / examples / stringbench / stringbench.py


# Various microbenchmarks comparing unicode and byte string performance
# Please keep this file both 2.x and 3.x compatible!

import timeit
import itertools
import operator
import re
import sys
import datetime
import optparse

VERSION = '2.0'

def p(*args):
    sys.stdout.write(' '.join(str(s) for s in args) + '\n')

if sys.version_info >= (3,):
    BYTES = bytes_from_str = lambda x: x.encode('ascii')
    UNICODE = unicode_from_str = lambda x: x
else:
    BYTES = bytes_from_str = lambda x: x
    UNICODE = unicode_from_str = lambda x: x.decode('ascii')

class UnsupportedType(TypeError):
    pass


p('stringbench v%s' % VERSION)
p(sys.version)
p(datetime.datetime.now())

REPEAT = 1
REPEAT = 3
#REPEAT = 7

if __name__ != "__main__":
    raise SystemExit("Must run as main program")

parser = optparse.OptionParser()
parser.add_option("-R", "--skip-re", dest="skip_re",
                  action="store_true",
                  help="skip regular expression tests")
parser.add_option("-8", "--8-bit", dest="bytes_only",
                  action="store_true",
                  help="only do 8-bit string benchmarks")
parser.add_option("-u", "--unicode", dest="unicode_only",
                  action="store_true",
                  help="only do Unicode string benchmarks")


_RANGE_1000 = list(range(1000))
_RANGE_100 = list(range(100))
_RANGE_10 = list(range(10))

dups = {}
def bench(s, group, repeat_count):
    def blah(f):
        if f.__name__ in dups:
            raise AssertionError("Multiple functions with same name: %r" %
                                 (f.__name__,))
        dups[f.__name__] = 1
        f.comment = s
        f.is_bench = True
        f.group = group
        f.repeat_count = repeat_count
        return f
    return blah

def uses_re(f):
    f.uses_re = True

####### 'in' comparisons

@bench('"A" in "A"*1000', "early match, single character", 1000)
def in_test_quick_match_single_character(STR):
    s1 = STR("A" * 1000)
    s2 = STR("A")
    for x in _RANGE_1000:
        s2 in s1

@bench('"B" in "A"*1000', "no match, single character", 1000)
def in_test_no_match_single_character(STR):
    s1 = STR("A" * 1000)
    s2 = STR("B")
    for x in _RANGE_1000:
        s2 in s1


@bench('"AB" in "AB"*1000', "early match, two characters", 1000)
def in_test_quick_match_two_characters(STR):
    s1 = STR("AB" * 1000)
    s2 = STR("AB")
    for x in _RANGE_1000:
        s2 in s1

@bench('"BC" in "AB"*1000', "no match, two characters", 1000)
def in_test_no_match_two_character(STR):
    s1 = STR("AB" * 1000)
    s2 = STR("BC")
    for x in _RANGE_1000:
        s2 in s1

@bench('"BC" in ("AB"*300+"C")', "late match, two characters", 1000)
def in_test_slow_match_two_characters(STR):
    s1 = STR("AB" * 300+"C")
    s2 = STR("BC")
    for x in _RANGE_1000:
        s2 in s1

@bench('s="ABC"*33; (s+"E") in ((s+"D")*300+s+"E")',
       "late match, 100 characters", 100)
def in_test_slow_match_100_characters(STR):
    m = STR("ABC"*33)
    d = STR("D")
    e = STR("E")
    s1 = (m+d)*300 + m+e
    s2 = m+e
    for x in _RANGE_100:
        s2 in s1

# Try with regex
@uses_re
@bench('s="ABC"*33; re.compile(s+"D").search((s+"D")*300+s+"E")',
       "late match, 100 characters", 100)
def re_test_slow_match_100_characters(STR):
    m = STR("ABC"*33)
    d = STR("D")
    e = STR("E")
    s1 = (m+d)*300 + m+e
    s2 = m+e
    pat = re.compile(s2)
    search = pat.search
    for x in _RANGE_100:
        search(s1)


#### same tests as 'in' but use 'find'

@bench('("A"*1000).find("A")', "early match, single character", 1000)
def find_test_quick_match_single_character(STR):
    s1 = STR("A" * 1000)
    s2 = STR("A")
    s1_find = s1.find
    for x in _RANGE_1000:
        s1_find(s2)

@bench('("A"*1000).find("B")', "no match, single character", 1000)
def find_test_no_match_single_character(STR):
    s1 = STR("A" * 1000)
    s2 = STR("B")
    s1_find = s1.find
    for x in _RANGE_1000:
        s1_find(s2)


@bench('("AB"*1000).find("AB")', "early match, two characters", 1000)
def find_test_quick_match_two_characters(STR):
    s1 = STR("AB" * 1000)
    s2 = STR("AB")
    s1_find = s1.find
    for x in _RANGE_1000:
        s1_find(s2)

@bench('("AB"*1000).find("BC")', "no match, two characters", 1000)
def find_test_no_match_two_character(STR):
    s1 = STR("AB" * 1000)
    s2 = STR("BC")
    s1_find = s1.find
    for x in _RANGE_1000:
        s1_find(s2)

@bench('("AB"*1000).find("CA")', "no match, two characters", 1000)
def find_test_no_match_two_character_bis(STR):
    s1 = STR("AB" * 1000)
    s2 = STR("CA")
    s1_find = s1.find
    for x in _RANGE_1000:
        s1_find(s2)

@bench('("AB"*300+"C").find("BC")', "late match, two characters", 1000)
def find_test_slow_match_two_characters(STR):
    s1 = STR("AB" * 300+"C")
    s2 = STR("BC")
    s1_find = s1.find
    for x in _RANGE_1000:
        s1_find(s2)

@bench('("AB"*300+"CA").find("CA")', "late match, two characters", 1000)
def find_test_slow_match_two_characters_bis(STR):
    s1 = STR("AB" * 300+"CA")
    s2 = STR("CA")
    s1_find = s1.find
    for x in _RANGE_1000:
        s1_find(s2)

@bench('s="ABC"*33; ((s+"D")*500+s+"E").find(s+"E")',
       "late match, 100 characters", 100)
def find_test_slow_match_100_characters(STR):
    m = STR("ABC"*33)
    d = STR("D")
    e = STR("E")
    s1 = (m+d)*500 + m+e
    s2 = m+e
    s1_find = s1.find
    for x in _RANGE_100:
        s1_find(s2)

@bench('s="ABC"*33; ((s+"D")*500+"E"+s).find("E"+s)',
       "late match, 100 characters", 100)
def find_test_slow_match_100_characters_bis(STR):
    m = STR("ABC"*33)
    d = STR("D")
    e = STR("E")
    s1 = (m+d)*500 + e+m
    s2 = e+m
    s1_find = s1.find
    for x in _RANGE_100:
        s1_find(s2)


#### Same tests for 'rfind'

@bench('("A"*1000).rfind("A")', "early match, single character", 1000)
def rfind_test_quick_match_single_character(STR):
    s1 = STR("A" * 1000)
    s2 = STR("A")
    s1_rfind = s1.rfind
    for x in _RANGE_1000:
        s1_rfind(s2)

@bench('("A"*1000).rfind("B")', "no match, single character", 1000)
def rfind_test_no_match_single_character(STR):
    s1 = STR("A" * 1000)
    s2 = STR("B")
    s1_rfind = s1.rfind
    for x in _RANGE_1000:
        s1_rfind(s2)


@bench('("AB"*1000).rfind("AB")', "early match, two characters", 1000)
def rfind_test_quick_match_two_characters(STR):
    s1 = STR("AB" * 1000)
    s2 = STR("AB")
    s1_rfind = s1.rfind
    for x in _RANGE_1000:
        s1_rfind(s2)

@bench('("AB"*1000).rfind("BC")', "no match, two characters", 1000)
def rfind_test_no_match_two_character(STR):
    s1 = STR("AB" * 1000)
    s2 = STR("BC")
    s1_rfind = s1.rfind
    for x in _RANGE_1000:
        s1_rfind(s2)

@bench('("AB"*1000).rfind("CA")', "no match, two characters", 1000)
def rfind_test_no_match_two_character_bis(STR):
    s1 = STR("AB" * 1000)
    s2 = STR("CA")
    s1_rfind = s1.rfind
    for x in _RANGE_1000:
        s1_rfind(s2)

@bench('("C"+"AB"*300).rfind("CA")', "late match, two characters", 1000)
def rfind_test_slow_match_two_characters(STR):
    s1 = STR("C" + "AB" * 300)
    s2 = STR("CA")
    s1_rfind = s1.rfind
    for x in _RANGE_1000:
        s1_rfind(s2)

@bench('("BC"+"AB"*300).rfind("BC")', "late match, two characters", 1000)
def rfind_test_slow_match_two_characters_bis(STR):
    s1 = STR("BC" + "AB" * 300)
    s2 = STR("BC")
    s1_rfind = s1.rfind
    for x in _RANGE_1000:
        s1_rfind(s2)

@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rfind("E"+s)',
       "late match, 100 characters", 100)
def rfind_test_slow_match_100_characters(STR):
    m = STR("ABC"*33)
    d = STR("D")
    e = STR("E")
    s1 = e+m + (d+m)*500
    s2 = e+m
    s1_rfind = s1.rfind
    for x in _RANGE_100:
        s1_rfind(s2)

@bench('s="ABC"*33; (s+"E"+("D"+s)*500).rfind(s+"E")',
       "late match, 100 characters", 100)
def rfind_test_slow_match_100_characters_bis(STR):
    m = STR("ABC"*33)
    d = STR("D")
    e = STR("E")
    s1 = m+e + (d+m)*500
    s2 = m+e
    s1_rfind = s1.rfind
    for x in _RANGE_100:
        s1_rfind(s2)


#### Now with index.
# Skip the ones which fail because that would include exception overhead.

@bench('("A"*1000).index("A")', "early match, single character", 1000)
def index_test_quick_match_single_character(STR):
    s1 = STR("A" * 1000)
    s2 = STR("A")
    s1_index = s1.index
    for x in _RANGE_1000:
        s1_index(s2)

@bench('("AB"*1000).index("AB")', "early match, two characters", 1000)
def index_test_quick_match_two_characters(STR):
    s1 = STR("AB" * 1000)
    s2 = STR("AB")
    s1_index = s1.index
    for x in _RANGE_1000:
        s1_index(s2)

@bench('("AB"*300+"C").index("BC")', "late match, two characters", 1000)
def index_test_slow_match_two_characters(STR):
    s1 = STR("AB" * 300+"C")
    s2 = STR("BC")
    s1_index = s1.index
    for x in _RANGE_1000:
        s1_index(s2)

@bench('s="ABC"*33; ((s+"D")*500+s+"E").index(s+"E")',
       "late match, 100 characters", 100)
def index_test_slow_match_100_characters(STR):
    m = STR("ABC"*33)
    d = STR("D")
    e = STR("E")
    s1 = (m+d)*500 + m+e
    s2 = m+e
    s1_index = s1.index
    for x in _RANGE_100:
        s1_index(s2)

Loading ...