import pandas as pd import seaborn as sns import matplotlib.pyplot as plt if __name__ == '__main__': data = pd.read_csv('sampling_all.csv') data['label'] = data['Classification'].apply(lambda x: x.split(';')[0].strip()) data = data[~data['label'].isin(['?', 'chinese', 'page not found'])] data['on_pipe'] = data['label'].apply(lambda x: 'No ML' if x == 'no pipeline' else 'ML') g = sns.catplot(x="on_pipe", kind="count", data=data)\ .set(title='Istanze delle issues in base al tipo')\ .set(xlabel='tipo') ax = g.facet_axis(0, 0) for p in ax.patches: ax.text( p.get_x() + p.get_width() * 0.43, p.get_height() + 3, p.get_height(), color='black', rotation='horizontal', size='large') plt.tight_layout() plt.savefig('../src/figures/count-type.pdf') #plt.show() exit() plt.close() data = data[data['label'] != 'no pipeline'] g = sns.catplot(y='label', kind='count', data=data, color='green')\ .set(title='Istanze delle issues in base alla fase') \ .set(ylabel='fase') ax = g.facet_axis(0, 0) for p in ax.patches: ax.text( p.get_width() + 0.25, p.get_y() + p.get_height() / 2, p.get_width(), color='black', rotation='horizontal', size='large') plt.tight_layout() plt.savefig('../src/figures/count-phases.pdf')