## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
library(CCI)

## -----------------------------------------------------------------------------
NormalData <- function(N){
  Z1 <- stats::rnorm(N,0,1)
  Z2 <- stats::rnorm(N,0,1)
  X <- stats::rnorm(N, Z1 + Z2, 1)
  Y <- stats::rnorm(N, Z1 + Z2, 1)

  df <- data.frame(Z1, Z2, X, Y)
  return(df)
}

## -----------------------------------------------------------------------------
set.seed(123)
data <- NormalData(400)
simple_test <- CCI.test(formula = Y ~ X | Z1 + Z2, data = data, seed = 42) # Setting seed for reproducibility
simple_test

## -----------------------------------------------------------------------------
fail_test <- CCI.test(formula = Y ~ X | Z1, data = data, seed = 42) 
summary(fail_test)

## -----------------------------------------------------------------------------
summary(CCI.test(formula = Y ~ X | Z1, 
                 data = data, 
                 method = 'xgboost',
                 nperm = 250,
                 parametric = TRUE,
                 nrounds = 400,
                 min_child_weight = 2,
                 colsample_bytree = 2,
                 eta = 0.1,
                 max_depth = 10,
                 seed = 42))

## -----------------------------------------------------------------------------
set.seed(13)
dat <- NormalData(500)
cci <- CCI.test(Y ~ X | Z1 + Z2, data = dat, seed = 1)
plot(cci)

## -----------------------------------------------------------------------------
HardCase <- function(N) {
  Z1 <- stats::runif(N, -2, 2)
  Z2 <- stats::runif(N, -2, 2)
  hZ <- sin(Z1) * cos(Z2)
  X <- hZ + 0.2 * stats::rnorm(N)
  Y <- hZ^2 + 0.2 * stats::rnorm(N)
  data.frame(X, Y, Z1, Z2)
}

## -----------------------------------------------------------------------------
set.seed(1)
data <- HardCase(5000)
tic <- Sys.time()
summary(CCI.test(Y ~ X | Z1 + Z2, data = data))
toc <- Sys.time()
toc - tic

## -----------------------------------------------------------------------------
tic <- Sys.time()
summary(CCI.test(Y ~ X | Z1 + Z2, data = data, method = "KNN"))
toc <- Sys.time()
toc - tic

## -----------------------------------------------------------------------------
tic <- Sys.time()
summary(CCI.test(Y ~ X | Z1 + Z2, data = data, method = "KNN", subsample = "No"))
toc <- Sys.time()
toc - tic

## -----------------------------------------------------------------------------
set.seed(1)
data <- HardCase(50000)
tic <- Sys.time()
summary(CCI.test(Y ~ X | Z1, data = data, method = "KNN"))
toc <- Sys.time()
toc - tic

## -----------------------------------------------------------------------------
ExpLogThreshold <- function(N) {
  Z1 <- stats::rnorm(N)
  Z2 <- stats::rnorm(N)
  X <- exp(Z1) + Z2 + stats::rnorm(N, 0, 0.2)
  Y <- ifelse(log(abs(Z1) + 1) + Z2 > 0.5, "Goblin",
              ifelse(log(abs(Z1) + 1) + Z2 > 0, "Orc",
                     ifelse(log(abs(Z1) + 1) > -0.5, "Troll", "Elf")))
  return(data.frame(Z1, Z2, X, Y))
}

## -----------------------------------------------------------------------------
set.seed(123)
dat <- ExpLogThreshold(500)
dat$Y <- as.factor(dat$Y)
summary(CCI.test(Y ~ X | Z1 + Z2, data = dat))
summary(CCI.test(Y ~ X | Z1, data = dat)) 

## -----------------------------------------------------------------------------
set.seed(123)
dat <- ExpLogThreshold(500)
dat$Y <- as.numeric(as.factor(dat$Y))
summary(CCI.test(Y ~ X | Z1 + Z2, data = dat, metric = "Kappa"))

