Solution: Process N files in parallel



examples/multiprocess/create_text_files.py
import sys
import string
import random


def main():
    if len(sys.argv) != 3:
        exit(f"Usage: {sys.argv[0]} NUMBER_OF_FILES   NUMBER_OF_ROWS")
    number_of_files = int(sys.argv[1])
    number_of_rows = int(sys.argv[2])
    characters = string.ascii_letters + ' ' + string.digits
    # print(number_of_rows)
    for file_id in range(1, number_of_files + 1):
        filename = f"{file_id}.txt"
        # print(filename)
        with open(filename, "w") as fh:
            for _ in range(number_of_rows):
                length = random.randrange(0, 81)
                # print(length)
                row = ''.join(random.choices(characters, k=length))
                fh.write(row + "\n")

if __name__ == "__main__":
    main()

examples/multiprocess/count_digits.py
import sys
import string

def count_digits(filename):
    count = {}
    for cr in string.digits:
        count[cr] = 0
    with open(filename) as fh:
        for row in fh:
            for cr in row:
                if cr in string.digits:
                    count[cr] += 1

    return {
        "filename": filename,
        "count": count,
    }


def print_table(results):
    dw = 6
    width = 0
    for res in results:
        width = max(width, len(res["filename"]))

    print(" " * (width + 1), end="")
    for n in range(10):
        print(f"{n:{dw}}", end="")
    print("")

    for res in results:
        print(f'{res["filename"]:{width}} ', end="")
        for digit in string.digits:
            print(f"{res['count'][digit]:{dw}}", end="")
        print("")

    total = {}
    for digit in string.digits:
        total[digit] = 0
    for res in results:
        for digit in string.digits:
            total[digit] += res["count"][digit]

    name = "TOTAL"
    print(f'{name:{width}} ', end="")
    for digit in string.digits:
        print(f"{total[digit]:{dw}}", end="")
    print("")


def main():
    if len(sys.argv) < 2:
        exit(f"Usage: {sys.argv[0]} FILENAMEs")

    files = sys.argv[1:]
    results = []
    for filename in files:
        result = count_digits(filename)
        results.append(result)
    print_table(results)


if __name__ == "__main__":
    main()

examples/multiprocess/count_digits_map.py
import sys
import count_digits

def main():
    if len(sys.argv) < 2:
        exit(f"Usage: {sys.argv[0]} FILENAMEs")

    files = sys.argv[1:]
    results = map(count_digits.count_digits, files)
    count_digits.print_table(list(results))

if __name__ == "__main__":
    main()

examples/multiprocess/count_digits_multiprocessing_map.py
import sys
import count_digits
import multiprocessing as mp

def main():
    if len(sys.argv) < 3:
        exit(f"Usage: {sys.argv[0]} POOL FILENAMEs")

    size = int(sys.argv[1])
    files = sys.argv[2:]

    with mp.Pool(size) as pool:
        results = pool.map(count_digits.count_digits, files)
    count_digits.print_table(list(results))

if __name__ == "__main__":
    main()