Solution: Count Amino Acids


Generate random DNA sequence


examples/dictionary/generate_dna.py
import sys
import random

if len(sys.argv) != 2:
    exit("Need a number")
count = int(sys.argv[1])

dna = []
for _ in range(count):
    dna.append(random.choice(['A', 'C', 'T', 'G']))
print(''.join(dna))

examples/dictionary/count_amino_acids.py
dna = 'CACCCATGAGATGTCTTAACGCTGCTTTCATTATAGCCG'

aa_by_codon = {
    'ACG' : '?',
    'CAC' : 'Histidin',
    'CAU' : 'Histidin',
    'CCA' : 'Proline',
    'CCG' : 'Proline',
    'GAT' : '?',
    'GTC' : '?',
    'TGA' : '?',
    'TTA' : '?',
    'CTG' : '?',
    'CTT' : '?',
    'TCA' : '?',
    'TAG' : '?',
    #...
}

count = {}

for i in range(0, len(dna)-2, 3):
    codon = dna[i:i+3]
    #print(codon)
    aa = aa_by_codon[codon]
    if aa not in count:
        count[aa] = 0
    count[aa] += 1

for aa in sorted(count.keys()):
    print("{}  {}".format(aa, count[aa]))

examples/dictionary/amino_acid_counter.py
seq = input('Type your DNA sequence here: ').upper()

codon_table = {
    'Phe' : ['TTT', 'TTC'],
    'Leu' : ['TTA', 'TTG', 'CTT', 'CTC', 'CTA', 'CTG'],
    'Ile' : ['ATT', 'ATC', 'ATA'],
    'Met' : ['ATG'],
    'Val' : ['GTT', 'GTC', 'GTA', 'GTG'],
    'Ser' : ['TCT', 'TCC', 'TCA', 'TCG', 'AGT', 'AGC'],
    'Pro' : ['CCT', 'CCC', 'CCA', 'CCG'],
    'Thr' : ['ACT', 'ACC', 'ACA', 'ACG'],
    'Ala' : ['GCT', 'GCC', 'GCA', 'GCG'],
    'Tyr' : ['TAT', 'TAC'],
    'His' : ['CAT', 'CAC'],
    'Gln' : ['CAA', 'CAG'],
    'Asn' : ['AAT', 'AAC'],
    'Lys' : ['AAA', 'AAG'],
    'Asp' : ['GAT', 'GAC'],
    'Glu' : ['GAA', 'GAG'],
    'Cys' : ['TGT', 'TGC'],
    'Trp' : ['TGG'],
    'Arg' : ['CGT', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'],
    'Gly' : ['GGT', 'GGC', 'GGA', 'GGG'],
    'STOP' : ['TAA', 'TAG', 'TGA']
}

amino_acids = []
counter = {}
protein_sequence = []

while seq:
    amino_acids.append(seq[:3])
    seq = seq[3:]

for codon in amino_acids:
    if len(codon) < 3:
        print('The remaining bases: {} are not coding for an amino acid'.format(codon))
    for aa in codon_table:
        if codon in codon_table[aa]:
            if aa in counter:
                counter[aa] += 1
            else:
                counter[aa] = 1
            protein_sequence.append(aa)
            break

print(''.join(protein_sequence))

ordered = sorted(counter.keys())
for aa in ordered:
    print('{} {} - {:>5.2f} %'.format(aa, counter[aa], counter[aa]/len(protein_sequence)*100))