50 lines
1.3 KiB
Python
Executable File
50 lines
1.3 KiB
Python
Executable File
from __future__ import division
|
|
|
|
|
|
def cliffsDelta(lst1, lst2, **dull):
|
|
|
|
"""Returns delta and true if there are more than 'dull' differences"""
|
|
if not dull:
|
|
dull = {'small': 0.147, 'medium': 0.33, 'large': 0.474} # effect sizes from (Hess and Kromrey, 2004)
|
|
m, n = len(lst1), len(lst2)
|
|
lst2 = sorted(lst2)
|
|
j = more = less = 0
|
|
for repeats, x in runs(sorted(lst1)):
|
|
while j <= (n - 1) and lst2[j] < x:
|
|
j += 1
|
|
more += j*repeats
|
|
while j <= (n - 1) and lst2[j] == x:
|
|
j += 1
|
|
less += (n - j)*repeats
|
|
d = (more - less) / (m*n)
|
|
size = lookup_size(d, dull)
|
|
return d, size
|
|
|
|
|
|
def lookup_size(delta: float, dull: dict) -> str:
|
|
"""
|
|
:type delta: float
|
|
:type dull: dict, a dictionary of small, medium, large thresholds.
|
|
"""
|
|
delta = abs(delta)
|
|
if delta < dull['small']:
|
|
return 'negligible'
|
|
if dull['small'] <= delta < dull['medium']:
|
|
return 'small'
|
|
if dull['medium'] <= delta < dull['large']:
|
|
return 'medium'
|
|
if delta >= dull['large']:
|
|
return 'large'
|
|
|
|
|
|
def runs(lst):
|
|
"""Iterator, chunks repeated values"""
|
|
for j, two in enumerate(lst):
|
|
if j == 0:
|
|
one, i = two, 0
|
|
if one != two:
|
|
yield j - i, one
|
|
i = j
|
|
one = two
|
|
yield j - i + 1, two
|