## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) options(rmarkdown.html_vignette.check_title = FALSE) ## ----setup-------------------------------------------------------------------- library(BGmisc) has_openmx <- requireNamespace("OpenMx", quietly = TRUE) has_mvtnorm <- requireNamespace("mvtnorm", quietly = TRUE) if (has_openmx) { library(OpenMx) } else { message( "OpenMx is not installed. Code will be shown but not executed.\n", "Install OpenMx with: install.packages('OpenMx')" ) } if (!has_mvtnorm) { message( "mvtnorm is not installed. Data simulation examples will not run.\n", "Install mvtnorm with: install.packages('mvtnorm')" ) } else { library(mvtnorm) } run_models <- has_openmx && has_mvtnorm ## ----simulate-pedigree-------------------------------------------------------- set.seed(421) ped <- simulatePedigree( kpc = 3, # 3 children per couple Ngen = 4, # 4 generations sexR = 0.5, # equal sex ratio marR = 0.6 # 60% mating rate ) head(ped) ## ----ped-summary-------------------------------------------------------------- summarizeFamilies(ped, famID = "fam")$family_summary ## ----compute-additive--------------------------------------------------------- add_matrix <- ped2add(ped, sparse = FALSE) add_matrix[1:5, 1:5] ## ----compute-nuclear---------------------------------------------------------- cn_matrix <- ped2cn(ped, sparse = FALSE) cn_matrix[1:5, 1:5] ## ----compute-mito------------------------------------------------------------- mt_matrix <- ped2mit(ped, sparse = FALSE) mt_matrix[1:5, 1:5] ## ----compute-extended--------------------------------------------------------- ce_matrix <- ped2ce(ped) ce_matrix[1:5, 1:5] ## ----identify-full-model------------------------------------------------------ id_full <- identifyComponentModel( A = add_matrix, Cn = cn_matrix, Ce = ce_matrix, Mt = mt_matrix, E = diag(1, nrow(add_matrix)) ) id_full ## ----include=FALSE------------------------------------------------------------ identified <- id_full$identified identified_text <- "The full model is identified. We can proceed to fit it. However, to illustrate the process of checking identification and refining the model, we will also show how to interpret the details of the identification check. Below, I have provided an unidentified model to demonstrate how to use the `nidp` element of the result to understand which components are confounded." not_identified_text <- "The full model is NOT identified. We will need to refine the model by dropping or fixing some components." ## ----identify-full-model-details, include = identified------------------------ # show if model is identified identifyComponentModel( A = add_matrix, A2 = add_matrix, Cn = cn_matrix, Ce = ce_matrix, Mt = mt_matrix, E = diag(1, nrow(add_matrix)) ) ## ----identify-reduced--------------------------------------------------------- # A simpler model: A + Cn + E id_ace <- identifyComponentModel( A = list(add_matrix), Cn = list(cn_matrix), E = diag(1, nrow(add_matrix)) ) id_ace ## ----identify-multi----------------------------------------------------------- set.seed(99) ped2 <- simulatePedigree(kpc = 4, Ngen = 3, marR = 0.5) |> makeTwins() add2 <- ped2add(ped2, sparse = FALSE) cn2 <- ped2cn(ped2, sparse = FALSE) ce2 <- ped2ce(ped2) mt2 <- ped2mit(ped2, sparse = FALSE) # Check the full model across two families n_combined <- nrow(add_matrix) + nrow(add2) id_two_fam <- identifyComponentModel( A = list(add_matrix, add2), Cn = list(cn_matrix, cn2), Ce = list(ce_matrix, ce2), Mt = list(mt_matrix, mt2), E = diag(1, n_combined) ) id_two_fam ## ----simulate-phenotype, eval = has_mvtnorm----------------------------------- # True variance components (proportions of total variance) true_var <- list( ad2 = 0.50, # additive genetic cn2 = 0.10, # common nuclear environment ce2 = 0.00, # common extended environment (set to 0 for identifiability) mt2 = 0.00, # mitochondrial (set to 0 for simplicity) ee2 = 0.40 # unique environment (residual) ) # Build the implied covariance matrix # V = ad2*A + cn2*Cn + ce2*U + mt2*Mt + ee2*I n <- nrow(add_matrix) I_mat <- diag(1, n) U_mat <- matrix(1, n, n) V_true <- true_var$ad2 * add_matrix + true_var$cn2 * cn_matrix + true_var$ce2 * U_mat + true_var$mt2 * mt_matrix + true_var$ee2 * I_mat # Simulate one realization of the phenotype vector set.seed(123) y <- mvtnorm::rmvnorm(1, sigma = V_true) # Create named variable labels (required by OpenMx) ytemp <- paste("S", rownames(add_matrix)) ## ----show-phenotype----------------------------------------------------------- if (!exists("y")) { y <- rep(NA, nrow(add_matrix)) } ## ----build-covariance, eval = run_models-------------------------------------- # Starting values for the optimizer start_vars <- list( ad2 = 0.3, dd2 = 0, cn2 = 0.1, ce2 = 0.1, mt2 = 0.1, am2 = 0, ee2 = 0.4 ) # Build variance component sub-model vc_model <- buildPedigreeModelCovariance( vars = start_vars, Vad = TRUE, # estimate additive genetic variance Vdd = FALSE, # do not estimate dominance Vcn = TRUE, # estimate common nuclear environment Vce = TRUE, # estimate common extended environment Vmt = TRUE, # estimate mitochondrial Vam = FALSE, # do not estimate A x Mt interaction Ver = TRUE # estimate unique environment ) vc_model summary(vc_model) ## ----build-group, eval = run_models------------------------------------------- # Format the observed data as a 1-row matrix with named columns obs_data <- matrix(y, nrow = 1, dimnames = list(NULL, ytemp)) # Build the family group model family_group <- buildOneFamilyGroup( group_name = "family1", Addmat = add_matrix, Nucmat = cn_matrix, Extmat = ce_matrix, Mtdmat = mt_matrix, full_df_row = obs_data, ytemp = ytemp ) ## ----build-full, eval = run_models-------------------------------------------- full_model <- buildPedigreeMx( model_name = "PedigreeVCModel", vars = start_vars, group_models = list(family_group) ) full_model$submodels ## ----fit-model, eval = run_models--------------------------------------------- fitted_model <- mxRun(full_model) smr <- summary(fitted_model) ## ----show-results, eval = run_models------------------------------------------ # Extract variance component estimates estimates <- c( Vad = fitted_model$ModelOne$Vad$values[1, 1], Vcn = fitted_model$ModelOne$Vcn$values[1, 1], Vce = fitted_model$ModelOne$Vce$values[1, 1], Vmt = fitted_model$ModelOne$Vmt$values[1, 1], Ver = fitted_model$ModelOne$Ver$values[1, 1] ) # Compare estimates to true values truth <- c( Vad = true_var$ad2, Vcn = true_var$cn2, Vce = true_var$ce2, Vmt = true_var$mt2, Ver = true_var$ee2 ) comparison <- data.frame( Component = names(truth), True = truth, Estimated = round(estimates, 4) ) comparison ## ----show-fit-stats, eval = run_models---------------------------------------- cat("-2 Log Likelihood:", smr$Minus2LogLikelihood, "\n") cat("Converged:", fitted_model$output$status$code == 0, "\n") ## ----multi-ped-simulate, eval = run_models------------------------------------ set.seed(2024) n_families <- 5 # Storage ped_list <- vector("list", n_families) add_list <- vector("list", n_families) cn_list <- vector("list", n_families) mt_list <- vector("list", n_families) ce_list <- vector("list", n_families) y_list <- vector("list", n_families) ytemp_list <- vector("list", n_families) for (i in seq_len(n_families)) { # Simulate each family with slightly different structure ped_i <- simulatePedigree(kpc = 3, Ngen = 4, marR = 0.6) ped_list[[i]] <- ped_i # Compute relatedness matrices A_i <- as.matrix(ped2add(ped_i)) Cn_i <- as.matrix(ped2cn(ped_i)) Mt_i <- as.matrix(ped2mit(ped_i)) Ce_i <- ped2ce(ped_i) n_i <- nrow(A_i) add_list[[i]] <- A_i cn_list[[i]] <- Cn_i mt_list[[i]] <- Mt_i ce_list[[i]] <- Ce_i # Build implied covariance and simulate data I_i <- diag(1, n_i) U_i <- matrix(1, n_i, n_i) V_i <- true_var$ad2 * A_i + true_var$cn2 * Cn_i + true_var$ce2 * U_i + true_var$mt2 * Mt_i + true_var$ee2 * I_i y_list[[i]] <- mvtnorm::rmvnorm(1, sigma = V_i) ytemp_list[[i]] <- paste("S", rownames(A_i)) } cat("Simulated", n_families, "families\n") cat("Family sizes:", vapply(ped_list, nrow, integer(1)), "\n") ## ----multi-ped-fit, eval = run_models----------------------------------------- # Build group models for each family group_models <- lapply(seq_len(n_families), function(i) { obs_data_i <- matrix(y_list[[i]], nrow = 1, dimnames = list(NULL, ytemp_list[[i]])) buildOneFamilyGroup( group_name = paste0("ped", i), Addmat = add_list[[i]], Nucmat = cn_list[[i]], Extmat = ce_list[[i]], Mtdmat = mt_list[[i]], full_df_row = obs_data_i, ytemp = ytemp_list[[i]] ) }) # Build the multi-group model multi_model <- buildPedigreeMx( model_name = "MultiPedigreeModel", vars = start_vars, group_models = group_models ) # Fit the model fitted_multi <- mxRun(multi_model) smr_multi <- summary(fitted_multi) ## ----multi-ped-results, eval = run_models------------------------------------- # Extract and compare estimates estimates_multi <- c( Vad = fitted_multi$ModelOne$Vad$values[1, 1], Vcn = fitted_multi$ModelOne$Vcn$values[1, 1], Vce = fitted_multi$ModelOne$Vce$values[1, 1], Vmt = fitted_multi$ModelOne$Vmt$values[1, 1], Ver = fitted_multi$ModelOne$Ver$values[1, 1] ) comparison_multi <- data.frame( Component = c("Additive genetic (Vad)", "Common nuclear (Vcn)", "Common extended (Vce)", "Mitochondrial (Vmt)", "Unique environment (Ver)"), True = truth, Estimated = round(estimates_multi, 4) ) comparison_multi cat("\n-2 Log Likelihood:", smr_multi$Minus2LogLikelihood, "\n") cat("Converged:", fitted_multi$output$status$code == 0, "\n") ## ----fit-highlevel, eval = run_models----------------------------------------- fitted_easy <- fitPedigreeModel( model_name = "EasyFit", vars = start_vars, data = NULL, group_models = group_models, tryhard = TRUE ) summary(fitted_easy)