From b80e928c1f656c2a9d3db20f2ca56ba186da0295 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 11 Feb 2026 18:32:06 +0000 Subject: [PATCH 1/6] Fix MZ twins coded as 0.5 instead of 1.0 in relatedness matrix Implement addMZtwins() which redirects one MZ co-twin's parent links to point to the other twin before adjacency matrix construction. This produces isPar[twin2, twin1] = 1.0 in the sparse matrix (two 0.5 entries summed), so path tracing yields relatedness 1 between MZ pairs. Users provide a twinID column (and optionally zygosity) and pass mz_twins=TRUE to ped2add()/ped2com(). DZ twins are left unchanged when zygosity column is present. https://claude.ai/code/session_01P3RQTYpWtAtheSqi4aPjR5 --- R/buildComponent.R | 23 +++++++-- R/constructAdjacency.R | 76 ++++++++++++++++++++++++++++ tests/testthat/test-buildComponent.R | 62 +++++++++++++++++++++++ 3 files changed, 158 insertions(+), 3 deletions(-) diff --git a/R/buildComponent.R b/R/buildComponent.R index cff3268e..7794e798 100644 --- a/R/buildComponent.R +++ b/R/buildComponent.R @@ -19,7 +19,7 @@ #' @param isChild_method character. The method to use for computing the isChild matrix. Options are "classic" or "partialparent" #' @param adjBeta_method numeric The method to use for computing the building the adjacency_method matrix when using the "beta" build #' @param compress logical. If TRUE, use compression when saving the checkpoint files. Defaults to TRUE. -#' @param mz_twins logical. If TRUE, treat MZ twins as having an additional parent-child relationship for the purposes of computing the relatedness matrix. Defaults to FALSE. +#' @param mz_twins logical. If TRUE, redirect MZ twin parent links in the adjacency matrix so that MZ co-twins are coded with relatedness 1 instead of 0.5. Twin pairs are identified from the \code{twinID} column. When a \code{zygosity} column is also present, only pairs where both members have \code{zygosity == "MZ"} are modified; otherwise all \code{twinID} pairs are assumed to be MZ. Defaults to FALSE. #' @param ... additional arguments to be passed to \code{\link{ped2com}} #' @details The algorithms and methodologies used in this function are further discussed and exemplified in the vignette titled "examplePedigreeFunctions". For more advanced scenarios and detailed explanations, consult this vignette. #' @export @@ -123,9 +123,11 @@ ped2com <- function(ped, component, ped <- standardizeColnames(ped, verbose = config$verbose) } + mz_modified <- NULL if (mz_twins == TRUE && "twinID" %in% colnames(ped)) { - # TODO - # ped <- addMZtwins(ped, verbose = config$verbose) + mz_result <- addMZtwins(ped, verbose = config$verbose) + ped <- mz_result$ped + mz_modified <- mz_result$modified } @@ -313,6 +315,19 @@ ped2com <- function(ped, component, # Assign 1 to all nonzero elements for mitochondrial component } + # Fix diagonal for MZ twins whose parents were redirected + if (!is.null(mz_modified) && length(mz_modified) > 0) { + for (pair in mz_modified) { + idx1 <- which(ped$ID == pair[1]) + idx2 <- which(ped$ID == pair[2]) + if (length(idx1) == 1 && length(idx2) == 1) { + # twin2 (pair[2]) gets inflated diagonal from redirection; + # set it equal to twin1's diagonal (they are genetically identical) + r[idx2, idx2] <- r[idx1, idx1] + } + } + } + if (config$sparse == FALSE) { r <- as.matrix(r) } @@ -343,6 +358,7 @@ ped2add <- function(ped, max_gen = 25, sparse = TRUE, verbose = FALSE, save_rate_parlist = 100000 * save_rate, save_path = "checkpoint/", compress = TRUE, + mz_twins = FALSE, ...) { ped2com( ped = ped, @@ -361,6 +377,7 @@ ped2add <- function(ped, max_gen = 25, sparse = TRUE, verbose = FALSE, save_rate_parlist = save_rate_parlist, save_path = save_path, compress = compress, + mz_twins = mz_twins, ... ) } diff --git a/R/constructAdjacency.R b/R/constructAdjacency.R index 3053fb5b..37777e95 100644 --- a/R/constructAdjacency.R +++ b/R/constructAdjacency.R @@ -559,3 +559,79 @@ isChild <- function(isChild_method, ped) { }) } } + + +#' Modify pedigree to model MZ twins in the adjacency matrix +#' +#' For each MZ twin pair identified by the \code{twinID} column, this function +#' redirects one twin's parental links to point to the other twin. This causes +#' the adjacency matrix to assign a relatedness of 1 between MZ co-twins +#' (because both momID and dadID entries point to the same individual, producing +#' two 0.5 entries that sum to 1.0 in the sparse matrix). +#' +#' @param ped A pedigree data.frame with columns \code{ID}, \code{momID}, +#' \code{dadID}, and \code{twinID}. Optionally a \code{zygosity} column; +#' when present only pairs where both members have \code{zygosity == "MZ"} +#' are modified. +#' @param verbose logical. If TRUE, print progress messages. +#' @return A list with two elements: \code{ped} (the modified pedigree) and +#' \code{modified} (a list of length-2 vectors giving each pair as +#' \code{c(twin1, twin2)} where twin2 is the redirected twin). +#' @keywords internal +addMZtwins <- function(ped, verbose = FALSE) { + if (!"twinID" %in% colnames(ped)) { + return(list(ped = ped, modified = NULL)) + } + + twin_rows <- which(!is.na(ped$twinID)) + + # If zygosity column exists, restrict to MZ pairs + if ("zygosity" %in% colnames(ped)) { + twin_rows <- twin_rows[!is.na(ped$zygosity[twin_rows]) & + ped$zygosity[twin_rows] == "MZ"] + } + + if (length(twin_rows) == 0) { + return(list(ped = ped, modified = NULL)) + } + + processed <- c() + modified <- list() + + for (idx in twin_rows) { + twin_id <- ped$ID[idx] + co_twin_id <- ped$twinID[idx] + + # Skip if already processed this pair + if (twin_id %in% processed || co_twin_id %in% processed) next + + # Determine which twin keeps its parents (twin1) and which gets + # redirected (twin2). Use the smaller ID as twin1 for consistency. + if (twin_id <= co_twin_id) { + twin1 <- twin_id + twin2 <- co_twin_id + } else { + twin1 <- co_twin_id + twin2 <- twin_id + } + + twin2_idx <- which(ped$ID == twin2) + + if (length(twin2_idx) != 1) next + + # Redirect twin2's parents to twin1. In the adjacency matrix this + # produces isPar[twin2, twin1] = 0.5 + 0.5 = 1.0, so path tracing + # gives relatedness(twin1, twin2) = 1. + ped$momID[twin2_idx] <- twin1 + ped$dadID[twin2_idx] <- twin1 + + processed <- c(processed, twin1, twin2) + modified[[length(modified) + 1]] <- c(twin1, twin2) + + if (verbose) { + message("MZ twin pair: redirected ", twin2, "'s parents to ", twin1) + } + } + + return(list(ped = ped, modified = modified)) +} diff --git a/tests/testthat/test-buildComponent.R b/tests/testthat/test-buildComponent.R index b00fa039..b54cddfe 100644 --- a/tests/testthat/test-buildComponent.R +++ b/tests/testthat/test-buildComponent.R @@ -1,3 +1,65 @@ +test_that("MZ twins coded at relatedness 1 via twinID column", { + # Simple pedigree: two parents and two MZ twin children + ped <- data.frame( + ID = c(1, 2, 3, 4), + momID = c(NA, NA, 2, 2), + dadID = c(NA, NA, 1, 1), + sex = c("M", "F", "M", "M"), + twinID = c(NA, NA, 4, 3), + zygosity = c(NA, NA, "MZ", "MZ") + ) + + # Without mz_twins: siblings get 0.5 + r_no_mz <- ped2add(ped, mz_twins = FALSE, sparse = FALSE) + expect_equal(r_no_mz["3", "4"], 0.5) + expect_equal(r_no_mz["4", "3"], 0.5) + + # With mz_twins: MZ twins get 1.0 + r_mz <- ped2add(ped, mz_twins = TRUE, sparse = FALSE) + expect_equal(r_mz["3", "4"], 1.0) + expect_equal(r_mz["4", "3"], 1.0) + + # Self-relatedness should still be 1 + expect_equal(r_mz["3", "3"], 1.0) + expect_equal(r_mz["4", "4"], 1.0) + + # Parent-child relatedness unchanged + expect_equal(r_mz["3", "1"], 0.5) + expect_equal(r_mz["4", "1"], 0.5) + expect_equal(r_mz["3", "2"], 0.5) + expect_equal(r_mz["4", "2"], 0.5) +}) + +test_that("MZ twins without zygosity column assumes all twinID pairs are MZ", { + ped <- data.frame( + ID = c(1, 2, 3, 4), + momID = c(NA, NA, 2, 2), + dadID = c(NA, NA, 1, 1), + sex = c("M", "F", "M", "M"), + twinID = c(NA, NA, 4, 3) + ) + + r_mz <- ped2add(ped, mz_twins = TRUE, sparse = FALSE) + expect_equal(r_mz["3", "4"], 1.0) + expect_equal(r_mz["4", "3"], 1.0) +}) + +test_that("DZ twins with zygosity column are NOT modified", { + ped <- data.frame( + ID = c(1, 2, 3, 4), + momID = c(NA, NA, 2, 2), + dadID = c(NA, NA, 1, 1), + sex = c("M", "F", "M", "F"), + twinID = c(NA, NA, 4, 3), + zygosity = c(NA, NA, "DZ", "DZ") + ) + + r_mz <- ped2add(ped, mz_twins = TRUE, sparse = FALSE) + # DZ twins remain at sibling relatedness = 0.5 + expect_equal(r_mz["3", "4"], 0.5) + expect_equal(r_mz["4", "3"], 0.5) +}) + test_that(".assignParentValue works", { expect_equal(.assignParentValue("generation"), .5) expect_equal(.assignParentValue("additive"), .5) From f228f2bb8539ac71f67cb62dcfb5c3ba6bea0a9b Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 11 Feb 2026 19:14:42 +0000 Subject: [PATCH 2/6] Replace pedigree redirection with r2 column merge for MZ twins The previous approach redirected twin2's parents to twin1 in the pedigree, which inflated twin2's diagonal (1.5 instead of 1.0) and twin2-to-child relatedness (0.75 instead of 0.5). New approach: after path tracing but before tcrossprod, merge twin2's column into twin1's in the r2 matrix. MZ twins share the same genetic source, so this correctly produces: - MZ twin relatedness = 1.0 - Self-relatedness = 1.0 (no inflation) - Parent-child and all downstream values correct - No post-hoc diagonal patching needed https://claude.ai/code/session_01P3RQTYpWtAtheSqi4aPjR5 --- R/buildComponent.R | 38 ++++++++-------- R/constructAdjacency.R | 67 ++++++++++++---------------- tests/testthat/test-buildComponent.R | 27 +++++++++++ 3 files changed, 76 insertions(+), 56 deletions(-) diff --git a/R/buildComponent.R b/R/buildComponent.R index 7794e798..a4e995ca 100644 --- a/R/buildComponent.R +++ b/R/buildComponent.R @@ -19,7 +19,7 @@ #' @param isChild_method character. The method to use for computing the isChild matrix. Options are "classic" or "partialparent" #' @param adjBeta_method numeric The method to use for computing the building the adjacency_method matrix when using the "beta" build #' @param compress logical. If TRUE, use compression when saving the checkpoint files. Defaults to TRUE. -#' @param mz_twins logical. If TRUE, redirect MZ twin parent links in the adjacency matrix so that MZ co-twins are coded with relatedness 1 instead of 0.5. Twin pairs are identified from the \code{twinID} column. When a \code{zygosity} column is also present, only pairs where both members have \code{zygosity == "MZ"} are modified; otherwise all \code{twinID} pairs are assumed to be MZ. Defaults to FALSE. +#' @param mz_twins logical. If TRUE, merge MZ co-twin columns in the r2 matrix before tcrossprod so that MZ twins are coded with relatedness 1 instead of 0.5. Twin pairs are identified from the \code{twinID} column. When a \code{zygosity} column is also present, only pairs where both members have \code{zygosity == "MZ"} are used; otherwise all \code{twinID} pairs are assumed to be MZ. Defaults to FALSE. #' @param ... additional arguments to be passed to \code{\link{ped2com}} #' @details The algorithms and methodologies used in this function are further discussed and exemplified in the vignette titled "examplePedigreeFunctions". For more advanced scenarios and detailed explanations, consult this vignette. #' @export @@ -123,11 +123,9 @@ ped2com <- function(ped, component, ped <- standardizeColnames(ped, verbose = config$verbose) } - mz_modified <- NULL + mz_pairs <- NULL if (mz_twins == TRUE && "twinID" %in% colnames(ped)) { - mz_result <- addMZtwins(ped, verbose = config$verbose) - ped <- mz_result$ped - mz_modified <- mz_result$modified + mz_pairs <- findMZtwins(ped, verbose = config$verbose) } @@ -291,6 +289,23 @@ ped2com <- function(ped, component, compress = config$compress ) + # --- Step 3b: Merge MZ twin columns in r2 --- + # MZ twins are genetically identical, so they represent the same genetic + # source. Merging twin2's column into twin1's column before tcrossprod + # makes every downstream relatedness value correct (diagonal, off-diagonal, + # and descendants) without any post-hoc patching. + if (!is.null(mz_pairs) && length(mz_pairs) > 0) { + for (pair in mz_pairs) { + idx1 <- pair[1] + idx2 <- pair[2] + r2[, idx1] <- r2[, idx1] + r2[, idx2] + r2[, idx2] <- 0 + } + if (config$verbose == TRUE) { + message("Merged ", length(mz_pairs), " MZ twin pair column(s) in r2") + } + } + # --- Step 4: T crossproduct --- if (config$resume == TRUE && file.exists(checkpoint_files$tcrossprod_checkpoint) && @@ -315,19 +330,6 @@ ped2com <- function(ped, component, # Assign 1 to all nonzero elements for mitochondrial component } - # Fix diagonal for MZ twins whose parents were redirected - if (!is.null(mz_modified) && length(mz_modified) > 0) { - for (pair in mz_modified) { - idx1 <- which(ped$ID == pair[1]) - idx2 <- which(ped$ID == pair[2]) - if (length(idx1) == 1 && length(idx2) == 1) { - # twin2 (pair[2]) gets inflated diagonal from redirection; - # set it equal to twin1's diagonal (they are genetically identical) - r[idx2, idx2] <- r[idx1, idx1] - } - } - } - if (config$sparse == FALSE) { r <- as.matrix(r) } diff --git a/R/constructAdjacency.R b/R/constructAdjacency.R index 37777e95..5e41fb5c 100644 --- a/R/constructAdjacency.R +++ b/R/constructAdjacency.R @@ -561,26 +561,23 @@ isChild <- function(isChild_method, ped) { } -#' Modify pedigree to model MZ twins in the adjacency matrix +#' Find MZ twin pairs in a pedigree #' -#' For each MZ twin pair identified by the \code{twinID} column, this function -#' redirects one twin's parental links to point to the other twin. This causes -#' the adjacency matrix to assign a relatedness of 1 between MZ co-twins -#' (because both momID and dadID entries point to the same individual, producing -#' two 0.5 entries that sum to 1.0 in the sparse matrix). +#' Identifies MZ twin pairs from the \code{twinID} column and returns their +#' row indices. These indices are used later to merge the twins' columns in +#' the \code{r2} matrix before \code{tcrossprod}, which correctly produces +#' relatedness 1 between MZ co-twins with no diagonal or downstream artifacts. #' -#' @param ped A pedigree data.frame with columns \code{ID}, \code{momID}, -#' \code{dadID}, and \code{twinID}. Optionally a \code{zygosity} column; -#' when present only pairs where both members have \code{zygosity == "MZ"} -#' are modified. +#' @param ped A pedigree data.frame with columns \code{ID} and \code{twinID}. +#' Optionally a \code{zygosity} column; when present only pairs where both +#' members have \code{zygosity == "MZ"} are used. #' @param verbose logical. If TRUE, print progress messages. -#' @return A list with two elements: \code{ped} (the modified pedigree) and -#' \code{modified} (a list of length-2 vectors giving each pair as -#' \code{c(twin1, twin2)} where twin2 is the redirected twin). +#' @return A list of length-2 integer vectors \code{c(idx1, idx2)} giving the +#' row indices of each MZ pair in the pedigree, or \code{NULL} if none found. #' @keywords internal -addMZtwins <- function(ped, verbose = FALSE) { +findMZtwins <- function(ped, verbose = FALSE) { if (!"twinID" %in% colnames(ped)) { - return(list(ped = ped, modified = NULL)) + return(NULL) } twin_rows <- which(!is.na(ped$twinID)) @@ -592,11 +589,11 @@ addMZtwins <- function(ped, verbose = FALSE) { } if (length(twin_rows) == 0) { - return(list(ped = ped, modified = NULL)) + return(NULL) } processed <- c() - modified <- list() + pairs <- list() for (idx in twin_rows) { twin_id <- ped$ID[idx] @@ -605,33 +602,27 @@ addMZtwins <- function(ped, verbose = FALSE) { # Skip if already processed this pair if (twin_id %in% processed || co_twin_id %in% processed) next - # Determine which twin keeps its parents (twin1) and which gets - # redirected (twin2). Use the smaller ID as twin1 for consistency. - if (twin_id <= co_twin_id) { - twin1 <- twin_id - twin2 <- co_twin_id - } else { - twin1 <- co_twin_id - twin2 <- twin_id - } + idx1 <- which(ped$ID == twin_id) + idx2 <- which(ped$ID == co_twin_id) - twin2_idx <- which(ped$ID == twin2) + if (length(idx1) != 1 || length(idx2) != 1) next - if (length(twin2_idx) != 1) next - - # Redirect twin2's parents to twin1. In the adjacency matrix this - # produces isPar[twin2, twin1] = 0.5 + 0.5 = 1.0, so path tracing - # gives relatedness(twin1, twin2) = 1. - ped$momID[twin2_idx] <- twin1 - ped$dadID[twin2_idx] <- twin1 + # Always put the lower index first for consistency + if (idx1 > idx2) { + tmp <- idx1 + idx1 <- idx2 + idx2 <- tmp + } - processed <- c(processed, twin1, twin2) - modified[[length(modified) + 1]] <- c(twin1, twin2) + processed <- c(processed, twin_id, co_twin_id) + pairs[[length(pairs) + 1]] <- c(idx1, idx2) if (verbose) { - message("MZ twin pair: redirected ", twin2, "'s parents to ", twin1) + message("MZ twin pair found: ", twin_id, " (row ", idx1, + ") and ", co_twin_id, " (row ", idx2, ")") } } - return(list(ped = ped, modified = modified)) + if (length(pairs) == 0) return(NULL) + return(pairs) } diff --git a/tests/testthat/test-buildComponent.R b/tests/testthat/test-buildComponent.R index b54cddfe..5d2f1319 100644 --- a/tests/testthat/test-buildComponent.R +++ b/tests/testthat/test-buildComponent.R @@ -60,6 +60,33 @@ test_that("DZ twins with zygosity column are NOT modified", { expect_equal(r_mz["4", "3"], 0.5) }) +test_that("MZ twins: downstream child relatedness is correct", { + # 3-generation pedigree: parents -> MZ twins -> twin2 has a child + ped <- data.frame( + ID = c(1, 2, 3, 4, 5, 6), + momID = c(NA, NA, 2, 2, NA, 4), + dadID = c(NA, NA, 1, 1, NA, 5), + sex = c("M", "F", "M", "M", "F", "M"), + twinID = c(NA, NA, 4, 3, NA, NA), + zygosity = c(NA, NA, "MZ", "MZ", NA, NA) + ) + + r_mz <- ped2add(ped, mz_twins = TRUE, sparse = FALSE) + + # MZ twins at 1.0 + expect_equal(r_mz["3", "4"], 1.0) + + # Child of twin2 (ID=4) should be 0.5 to twin2 (parent) + expect_equal(r_mz["6", "4"], 0.5) + + # Child of twin2 should ALSO be 0.5 to twin1 (genetically identical to parent) + expect_equal(r_mz["6", "3"], 0.5) + + # Diagonal for both twins should be clean (no inflation) + expect_equal(r_mz["3", "3"], 1.0) + expect_equal(r_mz["4", "4"], 1.0) +}) + test_that(".assignParentValue works", { expect_equal(.assignParentValue("generation"), .5) expect_equal(.assignParentValue("additive"), .5) From 22f3e171219f131e621a5d63802cc78c593e28d3 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 11 Feb 2026 19:21:22 +0000 Subject: [PATCH 3/6] Use symmetric column merge for MZ twins instead of zeroing Both twin columns now get the same normalized values (r2_merged = (col1 + col2) / sqrt(2)) so both twins remain present and contribute equally. Produces the same final relatedness matrix as the zero approach but without erasing one twin from the genetic source matrix. https://claude.ai/code/session_01P3RQTYpWtAtheSqi4aPjR5 --- R/buildComponent.R | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/R/buildComponent.R b/R/buildComponent.R index a4e995ca..78fdc964 100644 --- a/R/buildComponent.R +++ b/R/buildComponent.R @@ -291,15 +291,17 @@ ped2com <- function(ped, component, # --- Step 3b: Merge MZ twin columns in r2 --- # MZ twins are genetically identical, so they represent the same genetic - # source. Merging twin2's column into twin1's column before tcrossprod - # makes every downstream relatedness value correct (diagonal, off-diagonal, - # and descendants) without any post-hoc patching. + # source. Both columns are set to the same normalized values so that + # each twin contributes equally and the total genetic variance is preserved. + # The sqrt(2) normalization ensures two identical columns contribute the + # same total as two independent columns did before merging. if (!is.null(mz_pairs) && length(mz_pairs) > 0) { for (pair in mz_pairs) { idx1 <- pair[1] idx2 <- pair[2] - r2[, idx1] <- r2[, idx1] + r2[, idx2] - r2[, idx2] <- 0 + merged <- (r2[, idx1] + r2[, idx2]) / sqrt(2) + r2[, idx1] <- merged + r2[, idx2] <- merged } if (config$verbose == TRUE) { message("Merged ", length(mz_pairs), " MZ twin pair column(s) in r2") From 85846eb6a5d2476e2606609c3e8745018d333f07 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 11 Feb 2026 22:14:14 +0000 Subject: [PATCH 4/6] Merge-then-restore approach for MZ twins Temporarily absorb twin2's column into twin1's before tcrossprod, then copy twin1's row/col back to twin2 afterward. This keeps the computation correct while ensuring neither twin is erased from the final relatedness matrix. https://claude.ai/code/session_01P3RQTYpWtAtheSqi4aPjR5 --- R/buildComponent.R | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/R/buildComponent.R b/R/buildComponent.R index 78fdc964..71f28929 100644 --- a/R/buildComponent.R +++ b/R/buildComponent.R @@ -289,19 +289,16 @@ ped2com <- function(ped, component, compress = config$compress ) - # --- Step 3b: Merge MZ twin columns in r2 --- - # MZ twins are genetically identical, so they represent the same genetic - # source. Both columns are set to the same normalized values so that - # each twin contributes equally and the total genetic variance is preserved. - # The sqrt(2) normalization ensures two identical columns contribute the - # same total as two independent columns did before merging. + # --- Step 3b: Temporarily merge MZ twin columns in r2 --- + # MZ twins share the same genetic source. We absorb twin2's column into + # twin1's before tcrossprod so all path-traced relatedness flows through a + # single source. After tcrossprod we copy twin1's row/col back to twin2. if (!is.null(mz_pairs) && length(mz_pairs) > 0) { for (pair in mz_pairs) { idx1 <- pair[1] idx2 <- pair[2] - merged <- (r2[, idx1] + r2[, idx2]) / sqrt(2) - r2[, idx1] <- merged - r2[, idx2] <- merged + r2[, idx1] <- r2[, idx1] + r2[, idx2] + r2[, idx2] <- 0 } if (config$verbose == TRUE) { message("Merged ", length(mz_pairs), " MZ twin pair column(s) in r2") @@ -327,6 +324,20 @@ ped2com <- function(ped, component, } } + # --- Step 4b: Restore MZ twins --- + # Copy twin1's row/col to twin2 so both twins appear in the final matrix. + if (!is.null(mz_pairs) && length(mz_pairs) > 0) { + for (pair in mz_pairs) { + idx1 <- pair[1] + idx2 <- pair[2] + r[idx2, ] <- r[idx1, ] + r[, idx2] <- r[, idx1] + } + if (config$verbose == TRUE) { + message("Restored ", length(mz_pairs), " MZ twin pair(s) in relatedness matrix") + } + } + if (config$component %in% c("mitochondrial", "mtdna", "mitochondria")) { r@x <- rep(1, length(r@x)) # Assign 1 to all nonzero elements for mitochondrial component From b6b434677666818d4e7fc2ec1549250484d30c12 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Wed, 11 Feb 2026 18:01:47 -0500 Subject: [PATCH 5/6] Accept lowercase 'mz' and add MZ twin tests Treat both "mz" and "MZ" as monozygotic in findMZtwins (zygosity check now uses %in% c("mz","MZ")). Minor formatting tweak to the verbose message. Added unit tests (tests/testthat/test-buildComponent.R) verifying that MZ twins are coded with relatedness 1 when mz_twins=TRUE, that siblings remain 0.5 when mz_twins=FALSE, self-relatedness stays 1, and parent-child relatedness is unchanged. --- R/constructAdjacency.R | 4 ++-- tests/testthat/test-buildComponent.R | 28 ++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/R/constructAdjacency.R b/R/constructAdjacency.R index 5e41fb5c..d779937b 100644 --- a/R/constructAdjacency.R +++ b/R/constructAdjacency.R @@ -585,7 +585,7 @@ findMZtwins <- function(ped, verbose = FALSE) { # If zygosity column exists, restrict to MZ pairs if ("zygosity" %in% colnames(ped)) { twin_rows <- twin_rows[!is.na(ped$zygosity[twin_rows]) & - ped$zygosity[twin_rows] == "MZ"] + ped$zygosity[twin_rows] %in% c("mz", "MZ")] } if (length(twin_rows) == 0) { @@ -619,7 +619,7 @@ findMZtwins <- function(ped, verbose = FALSE) { if (verbose) { message("MZ twin pair found: ", twin_id, " (row ", idx1, - ") and ", co_twin_id, " (row ", idx2, ")") + ") and ", co_twin_id, " (row ", idx2, ")") } } diff --git a/tests/testthat/test-buildComponent.R b/tests/testthat/test-buildComponent.R index 5d2f1319..97c27975 100644 --- a/tests/testthat/test-buildComponent.R +++ b/tests/testthat/test-buildComponent.R @@ -1,3 +1,31 @@ +test_that("MZ twins coded at relatedness 1 via twinID column", { + # Simple pedigree: two parents and two MZ twin children + ped <- potter + + # Without mz_twins: siblings get 0.5 + r_no_mz <- ped2add(ped, mz_twins = FALSE, sparse = FALSE) + expect_equal(r_no_mz["12", "13"], 0.5) + expect_equal(r_no_mz["13", "12"], 0.5) + + # With mz_twins: MZ twins get 1.0 + r_mz <- ped2add(ped, mz_twins = TRUE, sparse = FALSE) + expect_equal(r_mz["12", "13"], 1.0) + expect_equal(r_mz["13", "12"], 1.0) + + # Self-relatedness should still be 1 + expect_equal(r_mz["12", "12"], 1.0) + expect_equal(r_mz["13", "13"], 1.0) + + # Parent-child relatedness unchanged + expect_equal(r_mz["3", "1"], 0.5) + expect_equal(r_mz["4", "1"], 0.5) + expect_equal(r_mz["3", "2"], 0.5) + expect_equal(r_mz["4", "2"], 0.5) +}) + + + + test_that("MZ twins coded at relatedness 1 via twinID column", { # Simple pedigree: two parents and two MZ twin children ped <- data.frame( From 5f113c440a16575d91e18c317e2a82015ad47152 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Wed, 11 Feb 2026 18:04:31 -0500 Subject: [PATCH 6/6] fix tests --- man/adjustKidsPerCouple.Rd | 13 ++++++++++++- man/buildBetweenGenerations.Rd | 13 ++++++++++++- man/buildWithinGenerations.Rd | 13 ++++++++++++- man/findMZtwins.Rd | 26 ++++++++++++++++++++++++++ man/ped2add.Rd | 3 +++ man/ped2com.Rd | 3 +++ man/ped2fam.Rd | 3 +++ man/ped2graph.Rd | 3 +++ man/ped2maternal.Rd | 3 +++ man/ped2paternal.Rd | 3 +++ man/simulatePedigree.Rd | 13 ++++++++++++- tests/testthat/test-buildComponent.R | 5 +---- tests/testthat/test-simulatePedigree.R | 10 +++++----- 13 files changed, 98 insertions(+), 13 deletions(-) create mode 100644 man/findMZtwins.Rd diff --git a/man/adjustKidsPerCouple.Rd b/man/adjustKidsPerCouple.Rd index bf5b8192..dbe7e500 100644 --- a/man/adjustKidsPerCouple.Rd +++ b/man/adjustKidsPerCouple.Rd @@ -21,7 +21,18 @@ value is 3. Returns an error when kpc equals 1.} generated from a poisson distribution with mean kpc. If FALSE, the number of kids per mate will be fixed at kpc.} -\item{beta}{logical. If TRUE, use the optimized version of the algorithm.} +\item{beta}{logical or character. Controls which algorithm version to use: +\itemize{ + \item{\code{FALSE}, \code{"base"}, or \code{"original"} (default): Use the original algorithm. + Slower but ensures exact reproducibility with set.seed().} + \item{\code{TRUE} or \code{"optimized"}: Use the optimized algorithm with 4-5x speedup. + Produces statistically equivalent results but not identical to base version + due to different random number consumption. Recommended for large simulations + where speed matters more than exact reproducibility.} +} +Note: Both versions are mathematically correct and produce valid pedigrees with the +same statistical properties (sex ratios, mating rates, etc.). The optimized version +uses vectorized operations instead of loops, making it much faster for large pedigrees.} } \value{ A numeric vector with the generated or adjusted number of kids per couple. diff --git a/man/buildBetweenGenerations.Rd b/man/buildBetweenGenerations.Rd index fa87604e..f28622b2 100644 --- a/man/buildBetweenGenerations.Rd +++ b/man/buildBetweenGenerations.Rd @@ -62,7 +62,18 @@ kids per mate will be fixed at kpc.} \item{code_female}{The value to use for females. Default is "F"} -\item{beta}{logical. If TRUE, use the optimized version of the algorithm.} +\item{beta}{logical or character. Controls which algorithm version to use: +\itemize{ + \item{\code{FALSE}, \code{"base"}, or \code{"original"} (default): Use the original algorithm. + Slower but ensures exact reproducibility with set.seed().} + \item{\code{TRUE} or \code{"optimized"}: Use the optimized algorithm with 4-5x speedup. + Produces statistically equivalent results but not identical to base version + due to different random number consumption. Recommended for large simulations + where speed matters more than exact reproducibility.} +} +Note: Both versions are mathematically correct and produce valid pedigrees with the +same statistical properties (sex ratios, mating rates, etc.). The optimized version +uses vectorized operations instead of loops, making it much faster for large pedigrees.} } \value{ The function updates the `df_Fam` data frame in place, adding or modifying columns related to parental and offspring status, diff --git a/man/buildWithinGenerations.Rd b/man/buildWithinGenerations.Rd index 905e42ad..cbddf27a 100644 --- a/man/buildWithinGenerations.Rd +++ b/man/buildWithinGenerations.Rd @@ -20,7 +20,18 @@ buildWithinGenerations( ) } \arguments{ -\item{beta}{logical. If TRUE, use the optimized version of the algorithm.} +\item{beta}{logical or character. Controls which algorithm version to use: +\itemize{ + \item{\code{FALSE}, \code{"base"}, or \code{"original"} (default): Use the original algorithm. + Slower but ensures exact reproducibility with set.seed().} + \item{\code{TRUE} or \code{"optimized"}: Use the optimized algorithm with 4-5x speedup. + Produces statistically equivalent results but not identical to base version + due to different random number consumption. Recommended for large simulations + where speed matters more than exact reproducibility.} +} +Note: Both versions are mathematically correct and produce valid pedigrees with the +same statistical properties (sex ratios, mating rates, etc.). The optimized version +uses vectorized operations instead of loops, making it much faster for large pedigrees.} \item{sizeGens}{A numeric vector containing the sizes of each generation within the pedigree.} diff --git a/man/findMZtwins.Rd b/man/findMZtwins.Rd new file mode 100644 index 00000000..a265c4d2 --- /dev/null +++ b/man/findMZtwins.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/constructAdjacency.R +\name{findMZtwins} +\alias{findMZtwins} +\title{Find MZ twin pairs in a pedigree} +\usage{ +findMZtwins(ped, verbose = FALSE) +} +\arguments{ +\item{ped}{A pedigree data.frame with columns \code{ID} and \code{twinID}. +Optionally a \code{zygosity} column; when present only pairs where both +members have \code{zygosity == "MZ"} are used.} + +\item{verbose}{logical. If TRUE, print progress messages.} +} +\value{ +A list of length-2 integer vectors \code{c(idx1, idx2)} giving the + row indices of each MZ pair in the pedigree, or \code{NULL} if none found. +} +\description{ +Identifies MZ twin pairs from the \code{twinID} column and returns their +row indices. These indices are used later to merge the twins' columns in +the \code{r2} matrix before \code{tcrossprod}, which correctly produces +relatedness 1 between MZ co-twins with no diagonal or downstream artifacts. +} +\keyword{internal} diff --git a/man/ped2add.Rd b/man/ped2add.Rd index dc2fee88..a7e83f8d 100644 --- a/man/ped2add.Rd +++ b/man/ped2add.Rd @@ -21,6 +21,7 @@ ped2add( save_rate_parlist = 1e+05 * save_rate, save_path = "checkpoint/", compress = TRUE, + mz_twins = FALSE, ... ) } @@ -57,6 +58,8 @@ ped2add( \item{compress}{logical. If TRUE, use compression when saving the checkpoint files. Defaults to TRUE.} +\item{mz_twins}{logical. If TRUE, merge MZ co-twin columns in the r2 matrix before tcrossprod so that MZ twins are coded with relatedness 1 instead of 0.5. Twin pairs are identified from the \code{twinID} column. When a \code{zygosity} column is also present, only pairs where both members have \code{zygosity == "MZ"} are used; otherwise all \code{twinID} pairs are assumed to be MZ. Defaults to FALSE.} + \item{...}{additional arguments to be passed to \code{\link{ped2com}}} } \description{ diff --git a/man/ped2com.Rd b/man/ped2com.Rd index f34d6022..27014a25 100644 --- a/man/ped2com.Rd +++ b/man/ped2com.Rd @@ -25,6 +25,7 @@ ped2com( save_path = "checkpoint/", adjBeta_method = NULL, compress = TRUE, + mz_twins = FALSE, ... ) } @@ -69,6 +70,8 @@ ped2com( \item{compress}{logical. If TRUE, use compression when saving the checkpoint files. Defaults to TRUE.} +\item{mz_twins}{logical. If TRUE, merge MZ co-twin columns in the r2 matrix before tcrossprod so that MZ twins are coded with relatedness 1 instead of 0.5. Twin pairs are identified from the \code{twinID} column. When a \code{zygosity} column is also present, only pairs where both members have \code{zygosity == "MZ"} are used; otherwise all \code{twinID} pairs are assumed to be MZ. Defaults to FALSE.} + \item{...}{additional arguments to be passed to \code{\link{ped2com}}} } \description{ diff --git a/man/ped2fam.Rd b/man/ped2fam.Rd index 3052e568..b3cf1e07 100644 --- a/man/ped2fam.Rd +++ b/man/ped2fam.Rd @@ -10,6 +10,7 @@ ped2fam( momID = "momID", dadID = "dadID", famID = "famID", + twinID = "twinID", ... ) } @@ -24,6 +25,8 @@ ped2fam( \item{famID}{character. Name of the column to be created in ped for the family ID variable} +\item{twinID}{character. Name of the column in ped for the twin ID variable, if applicable} + \item{...}{additional arguments to be passed to \code{\link{ped2com}}} } \value{ diff --git a/man/ped2graph.Rd b/man/ped2graph.Rd index 5e7ac7b1..022af3c7 100755 --- a/man/ped2graph.Rd +++ b/man/ped2graph.Rd @@ -9,6 +9,7 @@ ped2graph( personID = "ID", momID = "momID", dadID = "dadID", + twinID = "twinID", directed = TRUE, adjacent = c("parents", "mothers", "fathers"), ... @@ -23,6 +24,8 @@ ped2graph( \item{dadID}{character. Name of the column in ped for the father ID variable} +\item{twinID}{character. Name of the column in ped for the twin ID variable, if applicable} + \item{directed}{Logical scalar. Default is TRUE. Indicates whether or not to create a directed graph.} \item{adjacent}{Character. Relationship that defines adjacency in the graph: parents, mothers, or fathers} diff --git a/man/ped2maternal.Rd b/man/ped2maternal.Rd index 03e02311..1f3bcb2e 100755 --- a/man/ped2maternal.Rd +++ b/man/ped2maternal.Rd @@ -10,6 +10,7 @@ ped2maternal( momID = "momID", dadID = "dadID", matID = "matID", + twinID = "twinID", ... ) } @@ -24,6 +25,8 @@ ped2maternal( \item{matID}{Character. Maternal line ID variable to be created and added to the pedigree} +\item{twinID}{character. Name of the column in ped for the twin ID variable, if applicable} + \item{...}{additional arguments to be passed to \code{\link{ped2com}}} } \description{ diff --git a/man/ped2paternal.Rd b/man/ped2paternal.Rd index e893ec03..7fac27b9 100755 --- a/man/ped2paternal.Rd +++ b/man/ped2paternal.Rd @@ -10,6 +10,7 @@ ped2paternal( momID = "momID", dadID = "dadID", patID = "patID", + twinID = "twinID", ... ) } @@ -24,6 +25,8 @@ ped2paternal( \item{patID}{Character. Paternal line ID variable to be created and added to the pedigree} +\item{twinID}{character. Name of the column in ped for the twin ID variable, if applicable} + \item{...}{additional arguments to be passed to \code{\link{ped2com}}} } \description{ diff --git a/man/simulatePedigree.Rd b/man/simulatePedigree.Rd index fa87632e..d56ab72c 100644 --- a/man/simulatePedigree.Rd +++ b/man/simulatePedigree.Rd @@ -76,7 +76,18 @@ current version.} \item{fam_shift}{An integer to shift the person ID. Default is 1L. This is useful when simulating multiple pedigrees to avoid ID conflicts.} -\item{beta}{logical. If TRUE, use the optimized version of the algorithm.} +\item{beta}{logical or character. Controls which algorithm version to use: +\itemize{ + \item{\code{FALSE}, \code{"base"}, or \code{"original"} (default): Use the original algorithm. + Slower but ensures exact reproducibility with set.seed().} + \item{\code{TRUE} or \code{"optimized"}: Use the optimized algorithm with 4-5x speedup. + Produces statistically equivalent results but not identical to base version + due to different random number consumption. Recommended for large simulations + where speed matters more than exact reproducibility.} +} +Note: Both versions are mathematically correct and produce valid pedigrees with the +same statistical properties (sex ratios, mating rates, etc.). The optimized version +uses vectorized operations instead of loops, making it much faster for large pedigrees.} \item{...}{Additional arguments to be passed to other functions.} } diff --git a/tests/testthat/test-buildComponent.R b/tests/testthat/test-buildComponent.R index 97c27975..d23249d9 100644 --- a/tests/testthat/test-buildComponent.R +++ b/tests/testthat/test-buildComponent.R @@ -17,10 +17,7 @@ test_that("MZ twins coded at relatedness 1 via twinID column", { expect_equal(r_mz["13", "13"], 1.0) # Parent-child relatedness unchanged - expect_equal(r_mz["3", "1"], 0.5) - expect_equal(r_mz["4", "1"], 0.5) - expect_equal(r_mz["3", "2"], 0.5) - expect_equal(r_mz["4", "2"], 0.5) + }) diff --git a/tests/testthat/test-simulatePedigree.R b/tests/testthat/test-simulatePedigree.R index 381443ec..a33d1314 100644 --- a/tests/testthat/test-simulatePedigree.R +++ b/tests/testthat/test-simulatePedigree.R @@ -9,7 +9,7 @@ test_that("simulated pedigree generates expected data structure", { sex_tolerance <- .035 base_length <- 57 base_length_tol <- 0.2 * base_length - beta_match_base <- T + beta_match_base <- F # beta_options <- T for (beta in beta_options) { set.seed(seed) @@ -20,7 +20,7 @@ test_that("simulated pedigree generates expected data structure", { if (isFALSE(beta) || (isTRUE(beta) && beta_match_base)) { expect_equal(length(results$ID), base_length, tolerance = strict_tolerance) } else { - expect_true(length(results$ID) >= base_length-base_length_tol*base_length && length(results$ID) <= base_length_tol*base_length+base_length, + expect_true(length(results$ID) >= base_length - base_length_tol * base_length && length(results$ID) <= base_length_tol * base_length + base_length, info = paste0("Beta=TRUE: Expected 45-70 individuals, got ", length(results$ID)) ) } @@ -54,7 +54,7 @@ test_that("simulated pedigree generates expected data structure when sexR is imb sex_tolerance <- .03 base_length <- 154 base_length_tol <- 0.2 * base_length - beta_match_base <- T + beta_match_base <- F # beta_options <- T for (beta in beta_options) { set.seed(seed) @@ -104,7 +104,7 @@ test_that("simulated pedigree generates expected data structure when sexR is imb base_length <- 424 base_length_tol <- 0.2 * base_length - beta_match_base <- T + beta_match_base <- F # beta_options <- T for (beta in beta_options) { @@ -157,7 +157,7 @@ test_that("simulated pedigree generates expected data structure but supply var n # beta_options <- T base_length <- 57 base_length_tol <- 0.2 * base_length - beta_match_base <- T + beta_match_base <- F for (beta in beta_options) { set.seed(seed)