Kendall W

Content Disclaimer
Copyright @2020.
All Rights Reserved.

StatsToDo: Kendall's Coefficient for Concordance by Ranks (W)

Links : Home Index (Subjects) Contact StatsToDo

Explanations and References Javascript Program

Input Data

Data Entry
The data is a table of ordinal values or ranks
  - Each rows from a subject
  - Each column from a judge, rater, or instrument
  - Each cell contains the rank or value the judge (col) assigned to that subject(row)

Although Kendall's W is designed for ranks, this page can calculate using values, as the values are converted into ranks before calculation.

A minimum of 3 columns should be used. Concordance for 2 raters should use Cohen's Kappa

R Codes

R provides Kendall's W calculations in package irr

#Program 1: uses package irr from R

dat = ("
3	2	4
5	2	5
1	1	3
4	3	5
3	2	3
1	1	2
2	4	1
2	3	4
3	3	5
5	4	5
       ")

mx = read.table(textConnection(dat),header=FALSE)
#install.packages("irr")  # if not already installed
library(irr)
kendall(mx, correct = TRUE)

The results arre

> kendall(mx, correct = TRUE)
 Kendall's coefficient of concordance Wt

 Subjects = 10 
   Raters = 3 
       Wt = 0.678 

 Chisq(9) = 18.3 
  p-value = 0.0318

The results are the same as that from the algorithm as described by Siegel and Castellan (SC) (see references in Explanation panel), except for small sample size. The irr package uses the Chi sq to determine statistical significance regardless of sample size. CS however argued that estimate Type I Error (p) so calculated is less reliable if the sample size is <=7, and produced a table (Table T) for small sample size The algorithm that follows is derived from that described by CS, and provides the table for testing statistical significance when sample size <=7

#Program 2: Uses algorithm desceibed by Siegel and Castellan

# Subroutines for Type I Error when sample size <=7
TestSig <- function(lo,hi,W)        # short hand to produce the results in text
{
  if(W<lo)  return  ("p>0.05 not significant") 
  if(W>hi)  return ("p<0.01") 
  return ("0.05>p>0.01")
}  

TableT <- function(g,n,W)  # Table T from Siegel and Castellan (see references in Explanation panel)
{
  if(n<3 | g>20) { return ("Insufficient data for significant testing") }
  if((n==3 & g<8) | (n==4 & g<4) | (n>4 && g<3) ) { return ("p>0.05 not significant") }
  if(n==3)
  {
    if(g==8) return (TestSig(0.376,0.522,W))
    if(g==9) return (TestSig(0.333,0.469,W))
    if(g==10) return (TestSig(0.300,0.425,W))
    if(g<=12) return (TestSig(0.250,0.359,W))
    if(g<=14) return (TestSig(0.214,0.311,W))
    if(g==15) return (TestSig(0.200,0.291,W))
    if(g==16) return (TestSig(0.187,0.274,W))
    if(g<=18) return (TestSig(0.166,0.245,W))
    if(g<=20) return (TestSig(0.150,0.221,W))
  }
  if(n==4)
  {
    if(g==4) return (TestSig(0.619,0.768,W))
    if(g==5) return (TestSig(0.501,0.644,W))
    if(g==6) return (TestSig(0.421,0.553,W))
    if(g<=8) return (TestSig(0.318,0.429,W))
    if(g<=10) return (TestSig(0.256,0.351,W))
    if(g<=15) return (TestSig(0.171,0.240,W))
    if(g<=20) return (TestSig(0.129,0.182,W))
  }
  if(n==5)
  {
    if(g==3) return (TestSig(0.716,0.840,W))
    if(g==4) return (TestSig(0.552,0.683,W))
    if(g==5) return (TestSig(0.449,0.571,W))
    if(g==6) return (TestSig(0.378,0.489,W))
    if(g<=8) return (TestSig(0.287,0.379,W))
    if(g<=10) return (TestSig(0.155,0.211,W))
    if(g<=15) return (TestSig(0.187,0.274,W))
    if(g<=20) return (TestSig(0.117,0.160,W))
  }
  if(n==6)
  {
    if(g==3) return (TestSig(0.660,0.780,W))
    if(g==4) return (TestSig(0.512,0.629,W))
    if(g==5) return (TestSig(0.417,0.524,W))
    if(g==6) return (TestSig(0.351,0.448,W))
    if(g<=8) return (TestSig(0.267,0.347,W))
    if(g<=10) return (TestSig(0.215,0.282,W))
    if(g<=15) return (TestSig(0.145,0.193,W))
    if(g<=20) return (TestSig(0.109,0.146,W))
  }
  if(n==7)
  {
    if(g==3) return (TestSig(0.624,0.737,W))
    if(g==4) return (TestSig(0.484,0.592,W))
    if(g==5) return (TestSig(0.395,0.491,W))
    if(g==6) return (TestSig(0.333,0.419,W))
    if(g<=8) return (TestSig(0.253,0.324,W))
    if(g<=10) return (TestSig(0.204,0.263,W))
    if(g<=15) return (TestSig(0.137,0.179,W))
    if(g<=20) return (TestSig(0.103,0.136,W))
  }
}

# Main algorithm for Kendall W by algorithm described in Siegel and Castellan
dat = ("
3	2	4
5	2	5
1	1	3
4	3	5
3	2	3
1	1	2
2	4	1
2	3	4
3	3	5
5	4	5       ")
mxDat = read.table(textConnection(dat),header=FALSE)
n = nrow(mxDat)      # sample size
df = df = n - 1      # degrees of freedom
g = ncol(mxDat)      # number of raters or instruments
mxRank <- mxDat      # rank by R (minimum = 1)
# changr to min rank = 0
for(j in 1:g)        
{
  mxRank[,j] <- rank(mxDat[,j])
}
mxRank <- mxRank - 1  # rank (minimum = 0)
mxRank # display the ranks

#Calculate W
SR2 = 0
arDA1 = array(0,n)
for(i in 1 : n)
{
  arDA1[i] = 0;
  for(j in 1:g)
  {
    arDA1[i] = arDA1[i] + mxRank[i,j]
  }
  SR2 = SR2 + arDA1[i] * arDA1[i]
}
# Corrections for ties
ETj = 0;
arDA2 = array(0,g)
for(j in 1 : g)
{
  arDA2[j] = 0
  for(i in 1 : (n-1))
  {
    if(mxRank[i,j]<9999)
    {
      t = 1
      w = mxRank[i,j]
      for(kk in (i+1) : n)
      {
        if(mxRank[kk,j]==w)t = t + 1
      }
      if(t>1)
      {
        arDA2[j] = arDA2[j] + t * t * t - t
        mxRank[i,j] = 9999
        for(kk in (i+1) : n)
        {
          if(mxRank[kk,j]==w) mxRank[kk,j] = 9999 
        }
      }
    }
  }
  ETj = ETj + arDA2[j]  
}
# Final calculations for W
W = (12.0 * SR2 - (3.0 * g * g * n * df * df)) / (1.0 * g * g * n * (n * n - 1) - g * ETj);
if(n>7)           # adequate sample size >7
{
  chiSq =  g *  df *  W
  p = 1 - pchisq(chiSq, df=df)
  print(paste("Kendall's W = ", W," df=", df ))
  print(paste("Chi Sq = ", chiSq," p =", p ))
} else  # small sample size requires Table T from Segal & Castellan
{
  print(paste("Kendall's W = ", W," df=", df ))
  print(TableT(g,n,W))
}

The results are as follow. Please note W from both algorithms are the same. The difference is how statistical significance (p) is determined when the sample size id <=7

> mxRank # display the ranks
    V1  V2  V3
1  5.0 3.0 4.5
2  8.5 3.0 7.5
3  0.5 0.5 2.5
4  7.0 6.0 7.5
5  5.0 3.0 2.5
6  0.5 0.5 1.0
7  2.5 8.5 0.0
8  2.5 6.0 4.5
9  5.0 6.0 7.5
10 8.5 8.5 7.5

+   print(paste("Kendall's W = ", W," df=", df ))
+   print(paste("Chi Sq = ", chiSq," p =", p ))

[1] "Kendall's W =  0.678111587982833  df= 9"
[1] "Chi Sq =  18.3090128755365  p = 0.0317530628234405"

If the algorithms are used on the first 7 rows of data, the irr algorithm, using chi squares, found W to be statistically insignificant, but the Segal anf Castellan algorithm, using its table, found W to be significant at the p<0.05 level

> mxRank # display the ranks
   V1  V2  V3
1 3.5 3.0 4.0
2 6.0 3.0 5.5
3 0.5 0.5 2.5
4 5.0 5.0 5.5
5 3.5 3.0 2.5
6 0.5 0.5 1.0
7 2.0 6.0 0.0

# results from irr algorithm
> kendall(mx, correct = TRUE)
 Kendall's coefficient of concordance Wt

 Subjects = 7 
   Raters = 3 
       Wt = 0.65 

 Chisq(6) = 11.7 
  p-value = 0.0691 

# results from Segal and Castellan algorithm

   print(paste("Kendall's W = ", W," df=", df ))
+   print(TableT(g,n,W))

[1] "Kendall's W =  0.649895178197065  df= 6"
[1] "0.05>p>0.01"

StatsToDo: Kendall's Coefficient for Concordance by Ranks (W)

Example

References