master-thesis/util/tests.py

21 lines
560 B
Python
Executable File

import pandas as pd
from scipy.stats import ranksums
from cliffsDelta import cliffsDelta
def evaluate(feature: str):
print(f'====={feature}=====')
print(ranksums(ml_data[feature], no_ml_data[feature]))
print(cliffsDelta(ml_data[feature], no_ml_data[feature]))
if __name__ == '__main__':
data = pd.read_csv('commit_analysis.csv')
ml_data = data[data['is_ml']]
no_ml_data = data[~data['is_ml']]
evaluate('file_entropy')
evaluate('line_entropy')
evaluate('n_comments')
evaluate('words_mean')
evaluate('day_to_fix')