# -*- coding: utf-8 -*-
"""
Created on Mon Dec 10 09:18:14 2012
Author: Josef Perktold
"""
import numpy as np
from numpy.testing import assert_almost_equal, assert_equal, assert_allclose
from statsmodels.stats.inter_rater import (fleiss_kappa, cohens_kappa,
to_table, aggregate_raters)
from statsmodels.tools.testing import Holder
table0 = np.asarray('''\
1 0 0 0 0 14 1.000
2 0 2 6 4 2 0.253
3 0 0 3 5 6 0.308
4 0 3 9 2 0 0.440
5 2 2 8 1 1 0.330
6 7 7 0 0 0 0.462
7 3 2 6 3 0 0.242
8 2 5 3 2 2 0.176
9 6 5 2 1 0 0.286
10 0 2 2 3 7 0.286'''.split(), float).reshape(10,-1)
table1 = table0[:, 1:-1]
table10 = [[0, 4, 1],
[0, 8, 0],
[0, 1, 5]]
#Fleiss 1971, Fleiss has only the transformed table
diagnoses = np.array( [[4, 4, 4, 4, 4, 4],
[2, 2, 2, 5, 5, 5],
[2, 3, 3, 3, 3, 5],
[5, 5, 5, 5, 5, 5],
[2, 2, 2, 4, 4, 4],
[1, 1, 3, 3, 3, 3],
[3, 3, 3, 3, 5, 5],
[1, 1, 3, 3, 3, 4],
[1, 1, 4, 4, 4, 4],
[5, 5, 5, 5, 5, 5],
[1, 4, 4, 4, 4, 4],
[1, 2, 4, 4, 4, 4],
[2, 2, 2, 3, 3, 3],
[1, 4, 4, 4, 4, 4],
[2, 2, 4, 4, 4, 5],
[3, 3, 3, 3, 3, 5],
[1, 1, 1, 4, 5, 5],
[1, 1, 1, 1, 1, 2],
[2, 2, 4, 4, 4, 4],
[1, 3, 3, 5, 5, 5],
[5, 5, 5, 5, 5, 5],
[2, 4, 4, 4, 4, 4],
[2, 2, 4, 5, 5, 5],
[1, 1, 4, 4, 4, 4],
[1, 4, 4, 4, 4, 5],
[2, 2, 2, 2, 2, 4],
[1, 1, 1, 1, 5, 5],
[2, 2, 4, 4, 4, 4],
[1, 3, 3, 3, 3, 3],
[5, 5, 5, 5, 5, 5]])
diagnoses_rownames = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', ]
diagnoses_colnames = ['rater1', 'rater2', 'rater3', 'rater4', 'rater5', 'rater6', ]
def test_fleiss_kappa():
#currently only example from Wikipedia page
kappa_wp = 0.210
assert_almost_equal(fleiss_kappa(table1), kappa_wp, decimal=3)
def test_fleis_randolph():
# reference numbers from online calculator
# http://justusrandolph.net/kappa/#dInfo
table = [[7, 0], [7, 0]]
assert_equal(fleiss_kappa(table, method='unif'), 1)
table = [[6.99, 0.01], [6.99, 0.01]]
# % Overall Agreement 0.996671
# Fixed Marginal Kappa: -0.166667
# Free Marginal Kappa: 0.993343
assert_allclose(fleiss_kappa(table), -0.166667, atol=6e-6)
assert_allclose(fleiss_kappa(table, method='unif'), 0.993343, atol=6e-6)
table = [[7, 1], [3, 5]]
# % Overall Agreement 0.607143
# Fixed Marginal Kappa: 0.161905
# Free Marginal Kappa: 0.214286
assert_allclose(fleiss_kappa(table, method='fleiss'), 0.161905, atol=6e-6)
assert_allclose(fleiss_kappa(table, method='randolph'), 0.214286, atol=6e-6)
table = [[7, 0], [0, 7]]
# % Overall Agreement 1.000000
# Fixed Marginal Kappa: 1.000000
# Free Marginal Kappa: 1.000000
assert_allclose(fleiss_kappa(table), 1)
assert_allclose(fleiss_kappa(table, method='uniform'), 1)
table = [[6, 1, 0], [0, 7, 0]]
# % Overall Agreement 0.857143
# Fixed Marginal Kappa: 0.708333
# Free Marginal Kappa: 0.785714
assert_allclose(fleiss_kappa(table), 0.708333, atol=6e-6)
assert_allclose(fleiss_kappa(table, method='rand'), 0.785714, atol=6e-6)
class CheckCohens(object):
def test_results(self):
res = self.res
res2 = self.res2
res_ = [res.kappa, res.std_kappa, res.kappa_low, res.kappa_upp, res.std_kappa0,
res.z_value, res.pvalue_one_sided, res.pvalue_two_sided]
assert_almost_equal(res_, res2, decimal=4)
assert_equal(str(res), self.res_string)
class TestUnweightedCohens(CheckCohens):
# comparison to printout of a SAS example
@classmethod
def setup_class(cls):
#temporary: res instance is at last position
cls.res = cohens_kappa(table10)
res10_sas = [0.4842, 0.1380, 0.2137, 0.7547]
res10_sash0 = [0.1484, 3.2626, 0.0006, 0.0011] #for test H0:kappa=0
cls.res2 = res10_sas + res10_sash0 #concatenate
cls.res_string = '''\
Simple Kappa Coefficient
--------------------------------
Kappa 0.4842
ASE 0.1380
95% Lower Conf Limit 0.2137
95% Upper Conf Limit 0.7547
Test of H0: Simple Kappa = 0
ASE under H0 0.1484
Z 3.2626
One-sided Pr > Z 0.0006
Two-sided Pr > |Z| 0.0011''' + '\n'
def test_option(self):
kappa = cohens_kappa(table10, return_results=False)
assert_almost_equal(kappa, self.res2[0], decimal=4)
class TestWeightedCohens(CheckCohens):
#comparison to printout of a SAS example
@classmethod
def setup_class(cls):
#temporary: res instance is at last position
cls.res = cohens_kappa(table10, weights=[0, 1, 2])
res10w_sas = [0.4701, 0.1457, 0.1845, 0.7558]
res10w_sash0 = [0.1426, 3.2971, 0.0005, 0.0010] #for test H0:kappa=0
cls.res2 = res10w_sas + res10w_sash0 #concatenate
cls.res_string = '''\
Weighted Kappa Coefficient
--------------------------------
Kappa 0.4701
ASE 0.1457
95% Lower Conf Limit 0.1845
95% Upper Conf Limit 0.7558
Test of H0: Weighted Kappa = 0
ASE under H0 0.1426
Z 3.2971
One-sided Pr > Z 0.0005
Two-sided Pr > |Z| 0.0010''' + '\n'
def test_option(self):
kappa = cohens_kappa(table10, weights=[0, 1, 2], return_results=False)
assert_almost_equal(kappa, self.res2[0], decimal=4)
def test_cohenskappa_weights():
#some tests for equivalent results with different options
np.random.seed(9743678)
table = np.random.randint(0, 10, size=(5,5)) + 5*np.eye(5)
#example aggregation, 2 groups of levels
mat = np.array([[1,1,1, 0,0],[0,0,0,1,1]])
table_agg = np.dot(np.dot(mat, table), mat.T)
res1 = cohens_kappa(table, weights=np.arange(5) > 2, wt='linear')
res2 = cohens_kappa(table_agg, weights=np.arange(2), wt='linear')
assert_almost_equal(res1.kappa, res2.kappa, decimal=14)
assert_almost_equal(res1.var_kappa, res2.var_kappa, decimal=14)
#equivalence toeplitz with linear for special cases
res1 = cohens_kappa(table, weights=2*np.arange(5), wt='linear')
res2 = cohens_kappa(table, weights=2*np.arange(5), wt='toeplitz')
res3 = cohens_kappa(table, weights=res1.weights[0], wt='toeplitz')
#2-Dim weights
res4 = cohens_kappa(table, weights=res1.weights)
assert_almost_equal(res1.kappa, res2.kappa, decimal=14)
assert_almost_equal(res1.var_kappa, res2.var_kappa, decimal=14)
assert_almost_equal(res1.kappa, res3.kappa, decimal=14)
assert_almost_equal(res1.var_kappa, res3.var_kappa, decimal=14)
assert_almost_equal(res1.kappa, res4.kappa, decimal=14)
assert_almost_equal(res1.var_kappa, res4.var_kappa, decimal=14)
#equivalence toeplitz with quadratic for special cases
res1 = cohens_kappa(table, weights=5*np.arange(5)**2, wt='toeplitz')
res2 = cohens_kappa(table, weights=5*np.arange(5), wt='quadratic')
assert_almost_equal(res1.kappa, res2.kappa, decimal=14)
assert_almost_equal(res1.var_kappa, res2.var_kappa, decimal=14)
anxiety = np.array([
3, 3, 3, 4, 5, 5, 2, 3, 5, 2, 2, 6, 1, 5, 2, 2, 1, 2, 4, 3, 3, 6, 4,
6, 2, 4, 2, 4, 3, 3, 2, 3, 3, 3, 2, 2, 1, 3, 3, 4, 2, 1, 4, 4, 3, 2,
1, 6, 1, 1, 1, 2, 3, 3, 1, 1, 3, 3, 2, 2
]).reshape(20,3, order='F')
anxiety_rownames = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', ]
anxiety_colnames = ['rater1', 'rater2', 'rater3', ]
def test_cohens_kappa_irr():
ck_w3 = Holder()
ck_w4 = Holder()
#>r = kappa2(anxiety[,1:2], c(0,0,0,1,1,1))
#> cat_items(r, pref="ck_w3.")
ck_w3.method = "Cohen's Kappa for 2 Raters (Weights: 0,0,0,1,1,1)"
ck_w3.irr_name = 'Kappa'
ck_w3.value = 0.1891892
ck_w3.stat_name = 'z'
ck_w3.statistic = 0.5079002
ck_w3.p_value = 0.6115233
#> r = kappa2(anxiety[,1:2], c(0,0,1,1,2,2))
#> cat_items(r, pref="ck_w4.")
ck_w4.method = "Cohen's Kappa for 2 Raters (Weights: 0,0,1,1,2,2)"
ck_w4.irr_name = 'Kappa'
ck_w4.value = 0.2820513
ck_w4.stat_name = 'z'
ck_w4.statistic = 1.257410
ck_w4.p_value = 0.2086053
ck_w1 = Holder()
ck_w2 = Holder()
ck_w3 = Holder()
ck_w4 = Holder()
#> r = kappa2(anxiety[,2:3])
#> cat_items(r, pref="ck_w1.")
ck_w1.method = "Cohen's Kappa for 2 Raters (Weights: unweighted)"
ck_w1.irr_name = 'Kappa'
ck_w1.value = -0.006289308
ck_w1.stat_name = 'z'
ck_w1.statistic = -0.0604067
ck_w1.p_value = 0.9518317
#> r = kappa2(anxiety[,2:3], "equal")
#> cat_items(r, pref="ck_w2.")
ck_w2.method = "Cohen's Kappa for 2 Raters (Weights: equal)"
ck_w2.irr_name = 'Kappa'
ck_w2.value = 0.1459075
ck_w2.stat_name = 'z'
ck_w2.statistic = 1.282472
ck_w2.p_value = 0.1996772
#> r = kappa2(anxiety[,2:3], "squared")
#> cat_items(r, pref="ck_w3.")
ck_w3.method = "Cohen's Kappa for 2 Raters (Weights: squared)"
ck_w3.irr_name = 'Kappa'
ck_w3.value = 0.2520325
ck_w3.stat_name = 'z'
ck_w3.statistic = 1.437451
ck_w3.p_value = 0.1505898
#> r = kappa2(anxiety[,2:3], c(0,0,1,1,2))
#> cat_items(r, pref="ck_w4.")
ck_w4.method = "Cohen's Kappa for 2 Raters (Weights: 0,0,1,1,2)"
ck_w4.irr_name = 'Kappa'
ck_w4.value = 0.2391304
ck_w4.stat_name = 'z'
ck_w4.statistic = 1.223734
ck_w4.p_value = 0.2210526
all_cases = [(ck_w1, None, None),
(ck_w2, None, 'linear'),
(ck_w2, np.arange(5), None),
(ck_w2, np.arange(5), 'toeplitz'),
(ck_w3, None, 'quadratic'),
(ck_w3, np.arange(5)**2, 'toeplitz'),
(ck_w3, 4*np.arange(5)**2, 'toeplitz'),
(ck_w4, [0,0,1,1,2], 'toeplitz')]
#Note R:irr drops the missing category level 4 and uses the reduced matrix
r = np.histogramdd(anxiety[:,1:], ([1, 2, 3, 4, 6, 7], [1, 2, 3, 4, 6, 7]))
for res2, w, wt in all_cases:
msg = repr(w) + repr(wt)
res1 = cohens_kappa(r[0], weights=w, wt=wt)
assert_almost_equal(res1.kappa, res2.value, decimal=6, err_msg=msg)
assert_almost_equal(res1.z_value, res2.statistic, decimal=5, err_msg=msg)
assert_almost_equal(res1.pvalue_two_sided, res2.p_value, decimal=6, err_msg=msg)
def test_fleiss_kappa_irr():
fleiss = Holder()
#> r = kappam.fleiss(diagnoses)
#> cat_items(r, pref="fleiss.")
fleiss.method = "Fleiss' Kappa for m Raters"
fleiss.irr_name = 'Kappa'
fleiss.value = 0.4302445
fleiss.stat_name = 'z'
fleiss.statistic = 17.65183
fleiss.p_value = 0
data_ = aggregate_raters(diagnoses)[0]
res1_kappa = fleiss_kappa(data_)
assert_almost_equal(res1_kappa, fleiss.value, decimal=7)
def test_to_table():
data = diagnoses
res1 = to_table(data[:,:2]-1, 5)
res0 = np.asarray([[(data[:,:2]-1 == [i,j]).all(1).sum()
for j in range(5)]
for i in range(5)] )
assert_equal(res1[0], res0)
res2 = to_table(data[:,:2])
assert_equal(res2[0], res0)
bins = [0.5, 1.5, 2.5, 3.5, 4.5, 5.5]
res3 = to_table(data[:,:2], bins)
assert_equal(res3[0], res0)
#more than 2 columns
res4 = to_table(data[:,:3]-1, bins=[-0.5, 0.5, 1.5, 2.5, 3.5, 4.5])
res5 = to_table(data[:,:3]-1, bins=5)
assert_equal(res4[0].sum(-1), res0)
assert_equal(res5[0].sum(-1), res0)
def test_aggregate_raters():
data = diagnoses
resf = aggregate_raters(data)
colsum = np.array([26, 26, 30, 55, 43])
assert_equal(resf[0].sum(0), colsum)