
## select 500

set.seed(10)
data = read.csv("active.csv")
data = data[,c("WS_COR", "LS_COR", "LT_COR", "AGE", "EPT")]
data = data[complete.cases(data),]
data_500 <- data[sample(nrow(data),500),]
colnames(data_500) <- c("WS", "LS", "LT", "AGE", "EPT")

## simulate 500 entries of data

set.seed(10)
common = rnorm(500)
WS = round(15 + 7*common + rnorm(500, 0, 3)) 
WS <- pmin(pmax(WS, 0), 30)

LS = round(17 + 5*common + rnorm(500, 0, 3)) 
LS <- pmin(pmax(LS, 0), 30)

LT = round(7 + 3*common + rnorm(500, 0, 1)) 
LT <- pmin(pmax(LT, 0), 15)

EPT <- round(14 +  rnorm(500, 0, 5)) 
EPT <- pmin(pmax(EPT, 0), 28)

data_500 <- data.frame(
  "WS" = WS,
  "LS" = LS,
  "LT" = LT,
  "AGE" = sample(65:90, 500, replace = T),
  "EPT" = EPT
)


# MAR
miss <- rep(NA, nrow(data_500))
for (i in 1:nrow(data_500)){
  miss[i] <- rbinom(1, 1, data_500$AGE[i]*0.01-60*0.01)
}
data_500_mar <- data_500
data_500_mar[,"EPT"][miss==1] <- NA
summary(data_500_mar)


# MNAR selection
miss2 <- rep(NA, nrow(data_500))
for (i in 1:nrow(data_500)){
  miss2[i] <- rbinom(1, 1, data_500$EPT[i]*0.01)
}
data_500_mnar <- data_500
data_500_mnar[,"EPT"][miss2==1] <- NA
summary(data_500_mnar)

# MNAR pattern-mixture
miss3 <- rep(0, nrow(data_500))
miss3[(data_500$EPT>23) & (data_500$EPT<27)] <- 1
data_500_mnar_2 <- data_500
data_500_mnar_2[,"EPT"][miss3==1] <- NA
summary(data_500_mnar_2)

save(data_500, data_500_mar, data_500_mnar,
     data_500_mnar_2, file="data_500.RData")