【R】TukeyとGames-Howell多重比較の結果にCLDを追加する

多重比較の結果を分かりやすく示す方法として、アルファベットの添字（Compact Letter Display：CLD）が広く用いられています。Rでは multcomp パッケージの cld() 関数を使えば、簡単にCLDを出力できます。

しかし実際の解析では、意味は正しくても直感的には解釈しづらい「crazy cld」 に遭遇することがあります。この問題についてはstackoverflowでも議論されています。

Attention Required! | Cloudflare

正確な原因は明示されていませんが、CLD を導出するアルゴリズムそのものに起因している可能性が考えられます。

このアルゴリズム的背景については、以下の資料が参考になります。

https://hueffner.de/falk/slides/statistik07.pdf

そこで今回は、青木先生の実装されているTukeyとGames-Howellのコードをもとに、自分で仕組みを理解しながら使えるCLD出力を実装していきます。

実装の概要
関数の全コード
Compact Letter Display（CLD）の作成
使用例
1. 引数の説明
2. 実行例（iris データ）
まとめ

実装の概要

今回作成する関数、multiple_comparison()では、次のことができます。

Tukey または Games–Howell による多重比較
各群の n・平均・分散を同時に算出
p値に基づいて compact letter display（CLD）を自動生成
平均値の大小順にCLDを割り当て

関数の全コード

# 多重比較(Tukey & Games-Howell)
# http://aoki2.si.gunma-u.ac.jp/lecture/Average/Tukey1.html

multiple_comparison <- function(data,
                                group,                                       
                                method = c("Tukey", "Games-Howell"),           
                                cld_decreasing = TRUE,                   
                                cld_letters = letters,                         
                                cld_significance_level = 0.05)                                        
{
  # 共通処理
  OK <- complete.cases(data, group)                    
  data <- data[OK]
  group <- factor(group[OK])
  group_levels <- levels(group)       
  n <- tapply(data, group, length)                     
  a <- length(n)                                       
  phi.e <- sum(n)-a                                    
  Mean <- tapply(data, group, mean)                    
  Variance <- tapply(data, group, var)                 
  result <- cbind(n, Mean, Variance)
  rownames(result) <- group_levels
  method <- match.arg(method)
  
  # CLD計算用の内部関数
  calculate_cld <- function(p_values, group_levels, Mean, cld_letters, cld_decreasing, cld_significance_level) {
    # グループを平均値で並べ替え
    group_order <- names(sort(Mean, decreasing=cld_decreasing))
    n_groups <- length(group_order)
    
    # p値行列の作成
    p_matrix <- matrix(NA, nrow=n_groups, ncol=n_groups,
                       dimnames=list(group_order, group_order))
    diag(p_matrix) <- ""
    p_matrix[upper.tri(p_matrix)] <- "-"
    
    # 下三角にp値結果を設定
    for (i in 2:n_groups) {
      for (j in 1:(i-1)) {
        g1 <- group_order[i]
        g2 <- group_order[j]
        
        # 比較ラベルを作成（グループレベル名を使用）
        comparison_label1 <- paste(g1, g2, sep=":")
        comparison_label2 <- paste(g2, g1, sep=":")
        
        # p値を取得
        if (comparison_label1 %in% rownames(p_values)) {
          p_val <- p_values[comparison_label1, "p"]
        } else if (comparison_label2 %in% rownames(p_values)) {
          p_val <- p_values[comparison_label2, "p"]
        } else {
          p_val <- NA
        }
        
        p_matrix[i, j] <- if (!is.na(p_val) && p_val <= cld_significance_level) "*" else ""
      }
    }
    
    # アルファベットラベルの割り当て
    star_counts <- colSums(p_matrix == "*", na.rm=TRUE)
    labels <- character(n_groups)
    current_label <- cld_letters[1]
    label_idx <- 1
    prev_count <- -1
    
    for (j in seq_along(star_counts)) {
      if (star_counts[j] != prev_count) {
        current_label <- cld_letters[label_idx]
        label_idx <- label_idx + 1
        prev_count <- star_counts[j]
      }
      labels[j] <- current_label
    }
    
    # ラベル行列の作成
    label_matrix <- p_matrix
    for (j in seq_along(labels)) {
      label_matrix[, j] <- ifelse(p_matrix[, j] == "", labels[j], p_matrix[, j])
    }
    
    # CLDの生成
    cld_result <- apply(label_matrix, 1, function(row) {
      chars <- unique(row[row != "-" & row != "*"])
      paste(sort(chars), collapse="")
    })
    
    # 元のグループ順序で返す
    return(cld_result[group_levels])
  }
  
  if (method == "Tukey") {                                
    v.e <- sum((n-1)*Variance)/phi.e
    t <- combn(group_levels, 2, function(ij)
      abs(diff(Mean[ij]))/sqrt(v.e*sum(1/n[ij])) )
    p <- ptukey(t*sqrt(2), a, phi.e, lower.tail=FALSE)
    Tukey <- cbind(t, p) 
    rownames(Tukey) <- combn(group_levels, 2, paste, collapse=":")
    
    cld_letters_result <- calculate_cld(Tukey, group_levels, Mean, cld_letters, cld_decreasing, cld_significance_level)
    return(list(result=result, Tukey=Tukey, phi=phi.e, v=v.e, cld=cld_letters_result))
  }
  else {
    t.df <- combn(group_levels, 2, function(ij) { 
      t <- abs(diff(Mean[ij]))/sqrt(sum(Variance[ij]/n[ij]))
      df <- sum(Variance[ij]/n[ij])^2/sum((Variance[ij]/n[ij])^2/(n[ij]-1))
      return(c(t, df))} )
    t <- t.df[1,]
    df <- t.df[2,]
    p <- ptukey(t*sqrt(2), a, df, lower.tail=FALSE)
    Games.Howell <- cbind(t, df, p)      
    rownames(Games.Howell) <- combn(group_levels, 2, paste, collapse=":")
    
    cld_letters_result <- calculate_cld(Games.Howell, group_levels, Mean, cld_letters, cld_decreasing, cld_significance_level)
    return(list(result=result, Games.Howell=Games.Howell, cld=cld_letters_result))
  }
}

Compact Letter Display（CLD）の作成

CLD作成では以下を行っています。

群を平均値順に並び替え
群間比較の p値行列を作成
有意差あり（p ≤ 0.05）を * として記録
有意差のパターンに応じてa, b, c... の文字を自動割り当て

この考え方については、以下のブログが非常に分かりやすく解説しています。

使用例

引数の説明

data : 数値データ
group : グループ因子
method : "Tukey" または "Games-Howell"（デフォルトは Tukey）
cld_decreasing : 平均値の降順で文字を割り当てるか（TRUE / FALSE）
cld_letters : 使用する文字（letters, LETTERS など）
cld_significance_level : 有意水準

実行例（iris データ）

> multiple_comparison(iris$Sepal.Length, iris$Species)
$result
            n  Mean  Variance
setosa     50 5.006 0.1242490
versicolor 50 5.936 0.2664327
virginica  50 6.588 0.4043429

$Tukey
                             t            p
setosa:versicolor     9.032819 3.308465e-14
setosa:virginica     15.365506 2.220446e-15
versicolor:virginica  6.332686 8.287557e-09

$phi
[1] 147

$v
[1] 0.2650082

$cld
    setosa versicolor  virginica 
       "c"        "b"        "a"

平均値の大小関係と有意差が、一目で解釈できるCLDになっています。