from __future__ import division def cliffsDelta(lst1, lst2, **dull): """Returns delta and true if there are more than 'dull' differences""" if not dull: dull = {'small': 0.147, 'medium': 0.33, 'large': 0.474} # effect sizes from (Hess and Kromrey, 2004) m, n = len(lst1), len(lst2) lst2 = sorted(lst2) j = more = less = 0 for repeats, x in runs(sorted(lst1)): while j <= (n - 1) and lst2[j] < x: j += 1 more += j*repeats while j <= (n - 1) and lst2[j] == x: j += 1 less += (n - j)*repeats d = (more - less) / (m*n) size = lookup_size(d, dull) return d, size def lookup_size(delta: float, dull: dict) -> str: """ :type delta: float :type dull: dict, a dictionary of small, medium, large thresholds. """ delta = abs(delta) if delta < dull['small']: return 'negligible' if dull['small'] <= delta < dull['medium']: return 'small' if dull['medium'] <= delta < dull['large']: return 'medium' if delta >= dull['large']: return 'large' def runs(lst): """Iterator, chunks repeated values""" for j, two in enumerate(lst): if j == 0: one, i = two, 0 if one != two: yield j - i, one i = j one = two yield j - i + 1, two