diff --git a/data_visualisation.py b/data_visualisation.py new file mode 100644 index 0000000..709afe2 --- /dev/null +++ b/data_visualisation.py @@ -0,0 +1,22 @@ +import pandas as pd +import numpy as np + +filename = "48yrs/cores.dat" +df = pd.read_csv(filename, header = None, sep = "\s+") +df.columns = ["year", "cores"] +df = df.dropna() +df['year'] = df.year.astype(float) + +cores_missing = df['cores'].isna() +print(df.loc[cores_missing,:]) + +print(df['year'].isna().sum()) +print(df['cores'].isna().sum()) + +print(df) +print("-------------") +print(df.dtypes) +print("-------------") +print(df.shape) +print("-------------") +print(df.info())