import pandas as pd import seaborn as sns import matplotlib.pyplot as plt def get(project, series) -> int: try: return series[(project, True)] except Exception: return 0 if __name__ == '__main__': data = pd.read_csv('imports_data.csv') total_files = data.groupby('project').size() ml = data.groupby(['project', 'is_ml']).size() ml_strict = data.groupby(['project', 'is_ml_strict']).size() help_df = pd.DataFrame(columns=['project', 'type', 'value']) for project in data['project'].unique(): tot_files = total_files[project] help_df = help_df.append( {'project': project, 'type': 'all', 'value': get(project, ml)/tot_files}, ignore_index=True ) help_df = help_df.append( {'project': project, 'type': 'wo_pandas_numpy_scipy', 'value': get(project, ml_strict) / tot_files}, ignore_index=True ) colors = ['#cab2d6', '#6a3d9a'] sns.set_palette(sns.color_palette(colors)) sns.catplot(x='type', y='value', kind='box', data=help_df)\ .set(title='Percentuale di file con import di ML') \ .set(xlabel='Librerie ML') \ .set(ylabel='') plt.tight_layout() plt.savefig('../src/figures/imports.pdf') #plt.show()