## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
options(rmarkdown.html_vignette.check_title = FALSE)

## ----setup--------------------------------------------------------------------
library(BGmisc)

has_openmx <- requireNamespace("OpenMx", quietly = TRUE)
has_mvtnorm <- requireNamespace("mvtnorm", quietly = TRUE)

if (has_openmx) {
  library(OpenMx)
} else {
  message(
    "OpenMx is not installed. Code will be shown but not executed.\n",
    "Install OpenMx with: install.packages('OpenMx')"
  )
}

if (!has_mvtnorm) {
  message(
    "mvtnorm is not installed. Data simulation examples will not run.\n",
    "Install mvtnorm with: install.packages('mvtnorm')"
  )
} else {
  library(mvtnorm)
}

run_models <- has_openmx && has_mvtnorm

## ----simulate-pedigree--------------------------------------------------------
set.seed(421)

ped <- simulatePedigree(
  kpc = 3,    # 3 children per couple
  Ngen = 4,   # 4 generations
  sexR = 0.5, # equal sex ratio
  marR = 0.6  # 60% mating rate
)

head(ped)

## ----ped-summary--------------------------------------------------------------
summarizeFamilies(ped, famID = "fam")$family_summary

## ----compute-additive---------------------------------------------------------
add_matrix <- ped2add(ped, sparse = FALSE)
add_matrix[1:5, 1:5]

## ----compute-nuclear----------------------------------------------------------
cn_matrix <- ped2cn(ped, sparse = FALSE)
cn_matrix[1:5, 1:5]

## ----compute-mito-------------------------------------------------------------
mt_matrix <- ped2mit(ped, sparse = FALSE)
mt_matrix[1:5, 1:5]

## ----compute-extended---------------------------------------------------------
ce_matrix <- ped2ce(ped)
ce_matrix[1:5, 1:5]

## ----identify-full-model------------------------------------------------------
id_full <- identifyComponentModel(
  A  = add_matrix,
  Cn = cn_matrix,
  Ce = ce_matrix,
  Mt = mt_matrix,
  E  = diag(1, nrow(add_matrix))
)
id_full

## ----include=FALSE------------------------------------------------------------
identified <- id_full$identified

identified_text <- "The full model is identified. We can proceed to fit it. However, to illustrate the process of checking identification and refining the model, we will also show how to interpret the details of the identification check. Below, I have provided an unidentified model to demonstrate how to use the `nidp` element of the result to understand which components are confounded."

not_identified_text <- "The full model is NOT identified. We will need to refine the model by dropping or fixing some components."


## ----identify-full-model-details, include = identified------------------------
# show if model is identified

identifyComponentModel(
  A  = add_matrix,
  A2  = add_matrix,
  Cn = cn_matrix,
  Ce = ce_matrix,
  Mt = mt_matrix,
  E  = diag(1, nrow(add_matrix))
)


## ----identify-reduced---------------------------------------------------------
# A simpler model: A + Cn + E
id_ace <- identifyComponentModel(
  A  = list(add_matrix),
  Cn = list(cn_matrix),
  E  = diag(1, nrow(add_matrix))
)
id_ace

## ----identify-multi-----------------------------------------------------------
set.seed(99)
ped2 <- simulatePedigree(kpc = 4, Ngen = 3, marR = 0.5) |> makeTwins()
add2 <- ped2add(ped2, sparse = FALSE)
cn2  <- ped2cn(ped2, sparse = FALSE)
ce2  <- ped2ce(ped2)
mt2  <- ped2mit(ped2, sparse = FALSE)

# Check the full model across two families
n_combined <- nrow(add_matrix) + nrow(add2)
id_two_fam <- identifyComponentModel(
  A  = list(add_matrix, add2),
  Cn = list(cn_matrix, cn2),
  Ce = list(ce_matrix, ce2),
  Mt = list(mt_matrix, mt2),
  E  = diag(1, n_combined)
)
id_two_fam

## ----simulate-phenotype, eval = has_mvtnorm-----------------------------------
# True variance components (proportions of total variance)
true_var <- list(
  ad2 = 0.50,  # additive genetic
  cn2 = 0.10,  # common nuclear environment
  ce2 = 0.00,  # common extended environment (set to 0 for identifiability)
  mt2 = 0.00,  # mitochondrial (set to 0 for simplicity)
  ee2 = 0.40   # unique environment (residual)
)

# Build the implied covariance matrix
# V = ad2*A + cn2*Cn + ce2*U + mt2*Mt + ee2*I
n <- nrow(add_matrix)
I_mat <- diag(1, n)
U_mat <- matrix(1, n, n)

V_true <- true_var$ad2 * add_matrix +
  true_var$cn2 * cn_matrix +
  true_var$ce2 * U_mat +
  true_var$mt2 * mt_matrix +
  true_var$ee2 * I_mat

# Simulate one realization of the phenotype vector
set.seed(123)
y <- mvtnorm::rmvnorm(1, sigma = V_true)


# Create named variable labels (required by OpenMx)
ytemp <- paste("S", rownames(add_matrix))

## ----show-phenotype-----------------------------------------------------------

if (!exists("y")) {
  y <- rep(NA, nrow(add_matrix))
}


## ----build-covariance, eval = run_models--------------------------------------
# Starting values for the optimizer
start_vars <- list(
  ad2 = 0.3, dd2 = 0, cn2 = 0.1,
  ce2 = 0.1, mt2 = 0.1, am2 = 0,
  ee2 = 0.4
)

# Build variance component sub-model
vc_model <- buildPedigreeModelCovariance(
  vars = start_vars,
  Vad = TRUE,   # estimate additive genetic variance
  Vdd = FALSE,  # do not estimate dominance
  Vcn = TRUE,   # estimate common nuclear environment
  Vce = TRUE,   # estimate common extended environment
  Vmt = TRUE,   # estimate mitochondrial
  Vam = FALSE,  # do not estimate A x Mt interaction
  Ver = TRUE    # estimate unique environment
)
vc_model

summary(vc_model)

## ----build-group, eval = run_models-------------------------------------------
# Format the observed data as a 1-row matrix with named columns
obs_data <- matrix(y, nrow = 1, dimnames = list(NULL, ytemp))

# Build the family group model
family_group <- buildOneFamilyGroup(
  group_name = "family1",
  Addmat = add_matrix,
  Nucmat = cn_matrix,
  Extmat = ce_matrix,
  Mtdmat = mt_matrix,
  full_df_row = obs_data,
  ytemp = ytemp
)

## ----build-full, eval = run_models--------------------------------------------
full_model <- buildPedigreeMx(
  model_name = "PedigreeVCModel",
  vars = start_vars,
  group_models = list(family_group)
)
full_model$submodels 

## ----fit-model, eval = run_models---------------------------------------------
fitted_model <- mxRun(full_model)
smr <- summary(fitted_model)

## ----show-results, eval = run_models------------------------------------------
# Extract variance component estimates
estimates <- c(
  Vad = fitted_model$ModelOne$Vad$values[1, 1],
  Vcn = fitted_model$ModelOne$Vcn$values[1, 1],
  Vce = fitted_model$ModelOne$Vce$values[1, 1],
  Vmt = fitted_model$ModelOne$Vmt$values[1, 1],
  Ver = fitted_model$ModelOne$Ver$values[1, 1]
)

# Compare estimates to true values
truth <- c(
  Vad = true_var$ad2,
  Vcn = true_var$cn2,
  Vce = true_var$ce2,
  Vmt = true_var$mt2,
  Ver = true_var$ee2
)

comparison <- data.frame(
  Component = names(truth),
  True = truth,
  Estimated = round(estimates, 4)
)
comparison

## ----show-fit-stats, eval = run_models----------------------------------------
cat("-2 Log Likelihood:", smr$Minus2LogLikelihood, "\n")
cat("Converged:", fitted_model$output$status$code == 0, "\n")

## ----multi-ped-simulate, eval = run_models------------------------------------
set.seed(2024)
n_families <- 5

# Storage
ped_list <- vector("list", n_families)
add_list <- vector("list", n_families)
cn_list <- vector("list", n_families)
mt_list <- vector("list", n_families)
ce_list <- vector("list", n_families)
y_list <- vector("list", n_families)
ytemp_list <- vector("list", n_families)

for (i in seq_len(n_families)) {
  # Simulate each family with slightly different structure
  ped_i <- simulatePedigree(kpc = 3, Ngen = 4, marR = 0.6)
  ped_list[[i]] <- ped_i

  # Compute relatedness matrices
  A_i <- as.matrix(ped2add(ped_i))
  Cn_i <- as.matrix(ped2cn(ped_i))
  Mt_i <- as.matrix(ped2mit(ped_i))
  Ce_i <- ped2ce(ped_i)
  n_i <- nrow(A_i)

  add_list[[i]] <- A_i
  cn_list[[i]] <- Cn_i
  mt_list[[i]] <- Mt_i
  ce_list[[i]] <- Ce_i

  # Build implied covariance and simulate data
  I_i <- diag(1, n_i)
  U_i <- matrix(1, n_i, n_i)
  V_i <- true_var$ad2 * A_i +
    true_var$cn2 * Cn_i +
    true_var$ce2 * U_i +
    true_var$mt2 * Mt_i +
    true_var$ee2 * I_i

  y_list[[i]] <- mvtnorm::rmvnorm(1, sigma = V_i)
  ytemp_list[[i]] <- paste("S", rownames(A_i))
}

cat("Simulated", n_families, "families\n")
cat("Family sizes:", vapply(ped_list, nrow, integer(1)), "\n")

## ----multi-ped-fit, eval = run_models-----------------------------------------
# Build group models for each family
group_models <- lapply(seq_len(n_families), function(i) {
  obs_data_i <- matrix(y_list[[i]], nrow = 1, dimnames = list(NULL, ytemp_list[[i]]))

  buildOneFamilyGroup(
    group_name = paste0("ped", i),
    Addmat = add_list[[i]],
    Nucmat = cn_list[[i]],
    Extmat = ce_list[[i]],
    Mtdmat = mt_list[[i]],
    full_df_row = obs_data_i,
    ytemp = ytemp_list[[i]]
  )
})

# Build the multi-group model
multi_model <- buildPedigreeMx(
  model_name = "MultiPedigreeModel",
  vars = start_vars,
  group_models = group_models
)

# Fit the model
fitted_multi <- mxRun(multi_model)
smr_multi <- summary(fitted_multi)

## ----multi-ped-results, eval = run_models-------------------------------------
# Extract and compare estimates
estimates_multi <- c(
  Vad = fitted_multi$ModelOne$Vad$values[1, 1],
  Vcn = fitted_multi$ModelOne$Vcn$values[1, 1],
  Vce = fitted_multi$ModelOne$Vce$values[1, 1],
  Vmt = fitted_multi$ModelOne$Vmt$values[1, 1],
  Ver = fitted_multi$ModelOne$Ver$values[1, 1]
)

comparison_multi <- data.frame(
  Component = c("Additive genetic (Vad)", "Common nuclear (Vcn)",
                "Common extended (Vce)", "Mitochondrial (Vmt)",
                "Unique environment (Ver)"),
  True = truth,
  Estimated = round(estimates_multi, 4)
)
comparison_multi

cat("\n-2 Log Likelihood:", smr_multi$Minus2LogLikelihood, "\n")
cat("Converged:", fitted_multi$output$status$code == 0, "\n")

## ----fit-highlevel, eval = run_models-----------------------------------------
fitted_easy <- fitPedigreeModel(
  model_name = "EasyFit",
  vars = start_vars,
  data = NULL,
  group_models = group_models,
  tryhard = TRUE
)

summary(fitted_easy)