import pandas as pd from scipy.stats import ranksums from cliffsDelta import cliffsDelta def evaluate(feature: str): print(f'====={feature}=====') print(ranksums(ml_data[feature], no_ml_data[feature])) print(cliffsDelta(ml_data[feature], no_ml_data[feature])) if __name__ == '__main__': data = pd.read_csv('commit_analysis.csv') ml_data = data[data['is_ml']] no_ml_data = data[~data['is_ml']] evaluate('file_entropy') evaluate('line_entropy') evaluate('n_comments') evaluate('words_mean') evaluate('day_to_fix')