Calculate Genome metrics



examples/pandas/genome_calculation.py
import pandas as pd
import numpy as np
import datetime
import sys

filename = 'raw_data.xlsx'
if len(sys.argv) == 2:
    filename = sys.argv[1]

def calculate_averages(row):
    v1 = row.iloc[0:3].mean()
    v2 = row.iloc[3:6].mean()
    return np.log2(v1/v2)

start_time = datetime.datetime.now()
df = pd.read_excel(filename, index_col='genome name')
load_time = datetime.datetime.now()
print(load_time - start_time)

print(df.head())

calculated_value = df.apply(calculate_averages, axis=1)

threshold = 0.2
filtered_df = df[calculated_value > threshold]

print(filtered_df.head())

calculate_time = datetime.datetime.now()
print(calculate_time - load_time)