Pandas read selected rows in chunks



examples/pandas/read_file_in_chunks_select_rows.py
import sys
import pandas as pd

filename = "survey_results_public.csv"
if len(sys.argv) == 2:
    filename = sys.argv[1]

# Load only data from a specific country.

country_name = 'Israel'
chunks = []

for chunk in pd.read_csv(filename, chunksize=10000):
    print(chunk.size)
    part = chunk[ chunk['Country'] == country_name ]
    print(part.size)
    print('--')
    chunks.append(part)

df = pd.concat(chunks)

print(df.count())
print(df.size)

examples/pandas/read_file_in_chunks_select_rows_append.py
import sys
import pandas as pd

filename = "survey_results_public.csv"
if len(sys.argv) == 2:
    filename = sys.argv[1]

# Load only data from a specific country.

country_name = 'Israel'
df = None
for chunk in pd.read_csv(filename, chunksize=10000):
    part = chunk[ chunk['Country'] == country_name ]
    if df is None:
        df = part.copy(deep = True)
    else:
        df = df.append(part.copy(deep = True), ignore_index = True)


print(df.count())
print(df.size)