master-thesis/util/barplot-issues-labelled.py
2021-08-20 12:22:04 +02:00

48 lines
1.4 KiB
Python
Executable File

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
if __name__ == '__main__':
data = pd.read_csv('sampling_all.csv')
data['label'] = data['Classification'].apply(lambda x: x.split(';')[0].strip())
data = data[~data['label'].isin(['?', 'chinese', 'page not found'])]
data['on_pipe'] = data['label'].apply(lambda x: 'No ML' if x == 'no pipeline' else 'ML')
g = sns.catplot(x="on_pipe", kind="count", data=data)\
.set(title='Istanze delle issues in base al tipo')\
.set(xlabel='tipo')
ax = g.facet_axis(0, 0)
for p in ax.patches:
ax.text(
p.get_x() + p.get_width() * 0.43,
p.get_height() + 3,
p.get_height(),
color='black', rotation='horizontal', size='large')
plt.tight_layout()
plt.savefig('../src/figures/count-type.pdf')
#plt.show()
exit()
plt.close()
data = data[data['label'] != 'no pipeline']
g = sns.catplot(y='label', kind='count', data=data, color='green')\
.set(title='Istanze delle issues in base alla fase') \
.set(ylabel='fase')
ax = g.facet_axis(0, 0)
for p in ax.patches:
ax.text(
p.get_width() + 0.25,
p.get_y() + p.get_height() / 2,
p.get_width(),
color='black', rotation='horizontal', size='large')
plt.tight_layout()
plt.savefig('../src/figures/count-phases.pdf')