## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
library(SmoothPLS)
library(ggplot2)
library(dplyr)

## -----------------------------------------------------------------------------
nind = 50 # number of individuals (train set)
start = 0 # First time
end = 100 # end time
lambda_0 = 0.2 # Exponential law parameter for state 0 
lambda_1 = 0.1 # Exponential law parameter for state 1
prob_start = 0.55 # Probability of starting with state 1

curve_type = 'cat'

TTRatio = 0.2 # Train Test Ratio 
NotS_ratio = 0.2 # noise variance over total variance for Y
beta_0_real = 5.4321 # Intercept value for the link between X(t) and Y

nbasis = 15 # number of basis functions
norder = 4 # 4 for cubic splines basis

## -----------------------------------------------------------------------------
id_df = data.frame(id=rep(1,5), time=seq(0, 40, 10), state=c(0, 1, 1, 0, 1))
id_df

## ----fig.alt="Decay plot"-----------------------------------------------------
func = function(t){0.01*t^2 - t}
plot(0:40, func(0:40))

## -----------------------------------------------------------------------------
evaluate_id_func_integral(id_df, func)

## -----------------------------------------------------------------------------
id_df_regul = regularize_time_series(id_df, time_seq = seq(0, 40, 2),
                                     curve_type = 'cat')
id_df_regul

## -----------------------------------------------------------------------------
print(id_df$time)
print(id_df_regul$time)

## -----------------------------------------------------------------------------
id_df_long = convert_to_wide_format(id_df_regul)
id_df_long

## -----------------------------------------------------------------------------
basis = create_bspline_basis(0, 100, 10, 4)
coef = c(10, 8, 6, 4, 2, 1, 3, 5, 7, 9)
fd_obj = fda::fd(coef = coef, basisobj = basis)
func_from_fd = from_fd_to_func(coef = coef, basis = basis)

## ----fig.alt="fd object"------------------------------------------------------
plot(fd_obj)

## -----------------------------------------------------------------------------
fda::eval.fd(fd_obj, 13)

## -----------------------------------------------------------------------------
df = generate_X_df(nind = nind, start = start,end =  end, curve_type = 'cat',
                   lambda_0 = lambda_0, lambda_1 = lambda_1,
                   prob_start = prob_start)
head(df)

## -----------------------------------------------------------------------------
df_2 = generate_X_df(nind=20, start = 13, end = 60, curve_type = 'cat',
              lambda_0 = 0.21, lambda_1 = 0.13, prob_start = 0.55)
length(unique(df_2$id))

## ----fig.alt="Binary CFD individuals"-----------------------------------------
plot_CFD_individuals(df)

## ----fig.alt="3 binary CFD individuals"---------------------------------------
plot_CFD_individuals(df_2, n_ind_to_plot = 3)

## -----------------------------------------------------------------------------
nind_test = number_of_test_id(TTRatio = TTRatio, nind = nind)

df_test = generate_X_df(nind_test, start, end, curve_type = 'cat', 
                        lambda_0, lambda_1, prob_start)

length(unique(df_test$id))

## -----------------------------------------------------------------------------
df_test_2 = generate_X_df(nind=number_of_test_id(TTRatio = TTRatio, nind = 80), 
                          start, end, curve_type = 'cat',
                          lambda_0, lambda_1, prob_start)
# Here the number of individuals will be 20 because : 
# 20 = 0.2 (80 + 20) or 
# 20 = floor(80*TTRatio/(1-TTRatio))
length(unique(df_test_2$id))

## ----fig.alt="beta_1_real_func"-----------------------------------------------
plot(x=0:100, y=beta_1_real_func(0:100, 100), type='l', main="Beta_1")

## ----fig.alt="beta_2_real_func"-----------------------------------------------
plot(x=0:100, y=beta_2_real_func(0:100, 100), type='l', main="Beta_2")

## ----fig.alt="beta_3_real_func"-----------------------------------------------
plot(x=0:100, y=beta_3_real_func(0:100, 100), type='l', main="Beta_3")

## ----fig.alt="Other beta functions"-------------------------------------------
plot(x=0:100, y=beta_4_real_func(0:100, 100), type='l', main="Beta_4")
plot(x=0:100, y=beta_5_real_func(0:100, 100), type='l', main="Beta_5")
plot(x=0:100, y=beta_6_real_func(0:100, 100), type='l', main="Beta_6")
plot(x=0:100, y=beta_7_real_func(0:100, 100), type='l', main="Beta_7")

## -----------------------------------------------------------------------------
Y_df = generate_Y_df(df, curve_type = 'cat', 
                     beta_1_real_func, beta_0_real, NotS_ratio)
names(Y_df)

## -----------------------------------------------------------------------------
head(Y_df)

## -----------------------------------------------------------------------------
var(Y_df$Y_real)
var(Y_df$Y_noised)
var(Y_df$Y_real)/var(Y_df$Y_noised)
(var(Y_df$Y_noised) - var(Y_df$Y_real))/var(Y_df$Y_noised)

## ----fig.alt="Y_df real and noised value histograms"--------------------------
oldpar <- par(mfrow=c(1,2))
hist(Y_df$Y_real)
hist(Y_df$Y_noised)
par(oldpar)

## -----------------------------------------------------------------------------
Y_df_test = generate_Y_df(df_test, curve_type = 'cat',
                          beta_1_real_func, beta_0_real, 
                          NotS_ratio)
head(Y_df_test)

## -----------------------------------------------------------------------------
df = generate_X_df(nind = nind, start = start,end =  end, curve_type = 'num',
                   noise_sd = 0.15, seed = 123)
head(df)

## ----fig.alt="Noised cosinus curves"------------------------------------------
# Visualisation
ggplot(df, aes(x = time, y = value, group = id, color = factor(id))) +
  geom_line(alpha = 0.8) +
  labs(title = "Noised cosinus curves",
       x = "Time", y = "Value",
       color = "Individual") +
  theme_minimal()

## -----------------------------------------------------------------------------
Y_df = generate_Y_df(df = df, curve_type = 'num',
                     beta_real_func_or_list = beta_1_real_func,
                     beta_0_real = beta_0_real, NotS_ratio = NotS_ratio,
                     seed = 123)
head(Y_df)

## -----------------------------------------------------------------------------
id_df_regul = regularize_time_series(df, time_seq = seq(0, 40, 2),
                                     curve_type = 'num')
id_df_regul

## -----------------------------------------------------------------------------
id_df_long = convert_to_wide_format(id_df_regul)
id_df_long

## -----------------------------------------------------------------------------
N_states = 4

## -----------------------------------------------------------------------------
# Initialized the lambdas values
lambdas = lambda_determination(N_states)
lambdas

## -----------------------------------------------------------------------------
# Initialized the transition matrix
transition_df = transfer_probabilities(N_states)
transition_df

## -----------------------------------------------------------------------------
df = generate_X_df_multistates(nind = 100, N_states, start=0, end=100,
                              lambdas,  transition_df)
head(df)

## ----fig.alt="Multistates individuals"----------------------------------------
plot_CFD_individuals(df)

## ----fig.alt="Multistates individuals plot by cfda"---------------------------
cfda::plotData(df)

## -----------------------------------------------------------------------------
proba = cfda::estimate_pt(df)

## ----fig.alt="Marginal probabilities"-----------------------------------------
plot(proba, ribbon = FALSE)
plot(proba, ribbon = TRUE)

## -----------------------------------------------------------------------------
head(df)

## -----------------------------------------------------------------------------
str(df$state)
unique(df$state)
order(unique(df$state)) # Warning, give the indices of the order!
state_ordered = unique(df$state)[order(unique(df$state))]
state_ordered

## -----------------------------------------------------------------------------
si_df = state_indicator(df, id_col='id', time_col='time')
names(si_df)

## -----------------------------------------------------------------------------
head(si_df)

## -----------------------------------------------------------------------------
split_df = split_in_state_df(si_df, id_col='id', time_col='time')
names(split_df)
mode(split_df)

## -----------------------------------------------------------------------------
names(split_df)[4]
head(split_df[[4]])

## -----------------------------------------------------------------------------
states_df = build_df_per_state(split_df, id_col='id', time_col='time')
names(states_df)
mode(states_df)

## ----fig.alt="Indicator function per state"-----------------------------------
plot_CFD_individuals(states_df[[1]])

## -----------------------------------------------------------------------------
df_list = cat_data_to_indicator(df)
names(df_list)
head(df_list$state_1)

