Counting words - which implementation is faster?



examples/dictionary/count_words_speed.py
from collections import defaultdict
from collections import Counter
import timeit

def generate_list_of_words(number, repeat):
    #words = ['Wombat', 'Rhino', 'Sloth', 'Tarantula', 'Sloth', 'Rhino', 'Sloth']
    words = []
    for ix in range(number):
        for _ in range(repeat):
            words.append(str(ix))
    return words

def plain_counter(words):
    counter = {}
    for word in words:
        if word not in counter:
            counter[word] = 0
        counter[word] += 1
    return counter

def counter_with_exceptions(words):
    counter = {}
    for word in words:
        try:
            counter[word] += 1
        except KeyError:
           counter[word] = 1
    return counter


def counter_with_counter(words):
    counter = Counter()
    for word in words:
       counter[word] += 1
    return counter

def counter_with_default_dict(words):
    counter = defaultdict(int)
    for word in words:
       counter[word] += 1
    return counter


def main():
    #words = generate_list_of_words(1000, 1)

    #counter = plain_counter(words)
    #counter = counter_with_counter(words)
    #counter = counter_with_default_dict(words)
    #counter = counter_with_exceptions(words)
    #for word in sorted(counter.keys()):
    #   print("{}:{}".format(word, counter[word]))

    for repeat in [1, 10, 20, 50]:
        different = int(1000 / repeat)
        print(f'repeat {repeat}  different {different}')
        for name in ['plain_counter', 'counter_with_counter', 'counter_with_default_dict', 'counter_with_exceptions']:
            print("{:26} {}".format(name, timeit.timeit(f'{name}(words)',
                number=10000,
                setup=f'from __main__ import {name}, generate_list_of_words; words = generate_list_of_words({different}, {repeat})')))
        print()

if __name__ == "__main__":
    main()

repeat 1  different 1000
plain_counter              0.6091844770126045
counter_with_counter       1.232734862016514
counter_with_default_dict  0.7378899219911546
counter_with_exceptions    1.4480015779845417

repeat 10  different 100
plain_counter              0.4949962190585211
counter_with_counter       0.7886336819501594
counter_with_default_dict  0.4284116430208087
counter_with_exceptions    0.4748374510090798

repeat 20  different 50
plain_counter              0.4847069630632177
counter_with_counter       0.7627606929745525
counter_with_default_dict  0.4116779019823298
counter_with_exceptions    0.407719356007874

repeat 50  different 20
plain_counter              0.4709314970532432
counter_with_counter       0.7357207209570333
counter_with_default_dict  0.3903243549866602
counter_with_exceptions    0.36094399297144264