# Appendix A

# Sample R code for implementing the Frank and Hall (2001) approach using CART and random forests on an applied dataset.
# The dataset is publicly available from the UCI Machine Learning Repository (https://archive.ics.uci.edu/ml/datasets/Maternal+Health+Risk+Data+Set).

library(caret)

setwd("YOUR_WORKING_DIR")
data = read.csv("Maternal Health Risk Data Set.csv")

str(data)
prop.table(table(data$RiskLevel))

# convert the outcome variable into an ordered factor
data$RiskLevel = factor(data$RiskLevel, 
                        levels = c("low risk", "mid risk", "high risk"),
                        ordered = TRUE)

classes = levels(data$RiskLevel)
k = length(classes)

# split the data into a training (50%) and test (50%) set
set.seed(12345)
train = sample(seq_len(nrow(data)), size = floor(nrow(data)/2), replace = FALSE)
dtrain = data[train, ]
dtest = data[-train, ]

# generate k - 1 modified copies of the training set according to the Frank and Hall (2001) method and store in a list
dtrain_modified = list()
for (j in 1:(k-1)){
  dt = dtrain
  dt$RiskLevel = as.factor(ifelse(dtrain$RiskLevel > classes[j], 1, 0))
  dtrain_modified[[j]] = dt
}

# initialize matrices (one for each algorithm) to store the predicted probabilities from the k-1 binary classifiers
probsCART = probsRF = matrix(ncol = k-1, nrow = nrow(dtest))

# set up a control parameter for 5-fold cross-validation
trnCntrl = trainControl(method ="cv", number = 5)

# for-loop for training the k-1 binary classifiers per algorithm
for (j in 1:(k-1)){
  
  # CART
  modCART_j = train(RiskLevel ~ .,
                    data = dtrain_modified[[j]], 
                    method = "rpart",
                    tuneLength = 10,
                    trControl = trnCntrl)
  pred = predict(modCART_j$finalModel, dtest, type = "prob")[,"1"]
  probsCART[,j] = pred
  
  # random forest
  modRF_j = train(RiskLevel ~ .,
                  data = dtrain_modified[[j]], 
                  method = "rf",
                  tuneLength = 5,
                  trControl = trnCntrl)
  pred = predict(modRF_j$finalModel, dtest, type = "prob")[,"1"]
  probsRF[,j] = pred
  
}

# initialize matrices (one for each algorithm) to store the combined predicted probabilities for each of the k classes
probsCART_k = probsRF_k = data.frame(matrix(ncol = k, nrow = nrow(dtest)))
colnames(probsCART_k) = colnames(probsRF_k) = classes

# combine the k-1 predicted probabilities to obtain the k predicted probabilities according to the Frank and Hall (2001) method
probsCART_k[,1] = 1 - probsCART[, 1]
probsCART_k[,k] = probsCART[, (k-1)]

probsRF_k[,1] = 1 - probsRF[, 1]
probsRF_k[,k] = probsRF[, (k-1)]

for (i in 2:(k-1)){
  probsCART_k[,i] = probsCART[, (i-1)] - probsCART[, i]
  probsRF_k[,i] = probsRF[, (i-1)] - probsRF[, i]
}

# assign each case to the class with the largest predicted probability
predclassCART = colnames(probsCART_k)[max.col(probsCART_k, ties.method = "random")]
predclassCART = factor(predclassCART, levels = classes)

predclassRF = colnames(probsRF_k)[max.col(probsRF_k, ties.method = "random")]
predclassRF = factor(predclassRF, levels = classes)

# overall predictive performance 
# mean absolute error
mean(abs(as.integer(predclassCART) - as.integer(dtest$RiskLevel)))
mean(abs(as.integer(predclassRF) - as.integer(dtest$RiskLevel)))

# Spearman correlation
cor(as.integer(predclassCART), as.integer(dtest$RiskLevel), method = "spearman")
cor(as.integer(predclassRF), as.integer(dtest$RiskLevel), method = "spearman")

# class-level predictive performance
cmCART = confusionMatrix(predclassCART, dtest$RiskLevel)
cmRF = confusionMatrix(predclassRF, dtest$RiskLevel)

cmCART$byClass
cmRF$byClass
