TMM normalization after RUVg correction
1
0
Entering edit mode
@14ef1b09
Last seen 3 days ago
Egypt

Is it important to use TMM normalization factors after RUVg correction?

I included library size as a factor of unwanted variation. What I understand so far is when calculating cpm values, the counts will be calculated from raw counts and ruv lib sizes which are effective library sizes and set norm.factors to 1.
Also, do I have to use voom with quality weights while already using RUVg correction?

dge=perform.RUVg.correction(dge = dge, k.variables = 3,
variable.to.assess = c("Age","lib.size"),
variable.threshold = c(0.2, 0.8),
ruvg.pvalue.threshold.group = 1e-2,
ruvg.pvalue.threshold.strongest.variable = 1e-2,
number.cores = 2,
verbose = TRUE)

cpm=cpm.DGEList(dge)

1
Entering edit mode
@james-w-macdonald-5106
Last seen 2 days ago
United States

I can't find any function called perform.RUVg.correction, so I won't comment on that except to say that you shouldn't be adjusting your counts, but instead should be computing extra variables that you can include in your model matrix (which is what RUVg provides you with).

0
Entering edit mode

Written by (Veld et al., 2022)

function(dge = dge,
k.variables = 3,
variable.to.assess = c("Age","lib.size"),
variable.threshold = c(0.2, 0.8),
ruvg.pvalue.threshold.group = 1e-2,
ruvg.pvalue.threshold.strongest.variable = 1e-2,
training.series.only = FALSE,
training.series.only.samples = training.samples,
number.cores = 2,
verbose = TRUE){

if (missing(dge)){
stop("Provide DGElist object")
}
stopifnot(class(dge) == "DGEList")

if (!is.numeric(k.variables)){
stop("Provide numeric value for k.variables")
}

if (k.variables < 1){
stop("k.variables should be 1 or more")
}

if (all(variable.to.assess %in% colnames(dge$samples)) != TRUE){ stop("Inputted variables do not match column names of the sample info table.") } if (length(variable.to.assess) != length(variable.threshold)){ stop("Number of variables in variable.to.assess should match number of thresholds") } if (!is.numeric(ruvg.pvalue.threshold.group)){ stop("Provide numeric value for ruvg.pvalue.threshold.group") } if (!is.numeric(ruvg.pvalue.threshold.strongest.variable)){ stop("Provide numeric value for ruvg.pvalue.threshold.strongest.variable") } if (training.series.only == TRUE){ allSamples <- training.series.only.samples allSamples <- allSamples[which(allSamples %in% colnames(dge))] } else { allSamples <- colnames(dge) } if (!"group" %in% variable.to.assess){ variable.to.assess <- c(variable.to.assess, "group") } matrix.variables <- matrix(NA, ncol = length(variable.to.assess) + 1, nrow = nrow(dge$counts) + 1)
colnames(matrix.variables) <- c("geneID", variable.to.assess)
matrix.variables[, 1] <- c(NA, rownames(dge$counts)) for (variable in variable.to.assess){ if (is.numeric(dge$samples[, variable])){
matrix.variables[1, variable] <- "numeric"
} else {
matrix.variables[1, variable] <- "categorial"
}
}

conf.loop <- data.frame(foreach(i = 1 : nrow(dge$counts), .combine = rbind) %do% { for (variable in variable.to.assess){ if (matrix.variables[1, variable] == "numeric"){ matrix.variables[i + 1, variable ] <- cor(dge$samples[allSamples, variable],
as.numeric(dge$counts[i, allSamples]), use = "complete.obs") } else { matrix.variables[i + 1, variable] <- as.numeric(unlist(summary( aov(as.numeric(dge$counts[i, allSamples]) ~ dge$samples[allSamples, variable])))[9]) } } }) stable.transcripts = NA for (variable in variable.to.assess){ if (variable %in% c("Group","group")){ stable.transcripts = stable.transcripts } else if (variable %in% c("Age", "age")){ stable.transcripts <- c(stable.transcripts, (matrix.variables[2 : nrow(matrix.variables), 1])[ which((as.numeric(matrix.variables[2 : nrow(matrix.variables), variable]) > variable.threshold[which(variable == variable.to.assess)]) == TRUE)], (matrix.variables[2 : nrow(matrix.variables), 1])[ which((as.numeric(matrix.variables[2 : nrow(matrix.variables), variable]) < -variable.threshold[which(variable == variable.to.assess)]) == TRUE)] ) } else { stable.transcripts <- c(stable.transcripts, (matrix.variables[2 : nrow(matrix.variables), 1])[ which((as.numeric(matrix.variables[2 : nrow(matrix.variables), variable]) > variable.threshold[which(variable == variable.to.assess)]) == TRUE)] ) } } stable.transcripts <- unique(stable.transcripts[which(!stable.transcripts == "NA")]) if (verbose == TRUE){ print(paste("Number of stable transcripts selected for RUV-module correction:", as.numeric(length(stable.transcripts)))) } if (length(stable.transcripts) > 3){ RUVg.pre.correction <- RUVg(x = as.matrix(dge[, allSamples]$counts),
cIdx = stable.transcripts,
k = k.variables)

matrix.variables.ruvg <- matrix(NA, ncol = length(variable.to.assess) + 2, nrow = k.variables + 1)
colnames(matrix.variables.ruvg) <- c("Axis", variable.to.assess, "Verdict")
for (variable in variable.to.assess){
if (is.numeric(dge$samples[, variable])){ matrix.variables.ruvg[1, variable] <- "numeric" } else { matrix.variables.ruvg[1, variable] <- "categorial" } } if (k.variables > ncol(RUVg.pre.correction$W)) {k.variables = ncol(RUVg.pre.correction$W)} RUVg.loop <- data.frame(foreach(i = 1 : k.variables, .combine = rbind) %do% { matrix.variables.ruvg[i + 1, "Axis"] <- i for (variable in variable.to.assess){ if (matrix.variables.ruvg[1, variable] == "numeric"){ matrix.variables.ruvg[i + 1, variable ] <- as.numeric(cor.test(RUVg.pre.correction$W[, i],
dge$samples[allSamples, variable])$p.value)
} else {
matrix.variables.ruvg[i + 1, variable] <- min(summary(aov(
RUVg.pre.correction$W[, i] ~ dge$samples[allSamples, variable]))[[1]][["Pr(>F)"]], na.rm = T)
}
}

if (as.numeric(matrix.variables.ruvg[i + 1, "group"]) > ruvg.pvalue.threshold.group) {
strongest.variable <- colnames(matrix.variables.ruvg)[
which(as.numeric(matrix.variables.ruvg[i + 1, 2 : ncol(matrix.variables.ruvg) - 1]) ==
min(as.numeric(matrix.variables.ruvg[i + 1, 2 : ncol(matrix.variables.ruvg) - 1])))
]
} else {
strongest.variable <- "group"
}

if (as.numeric(matrix.variables.ruvg[i + 1, strongest.variable]) < ruvg.pvalue.threshold.strongest.variable) {
matrix.variables.ruvg[i + 1, "Verdict"] <- strongest.variable
}
})

axis.group <- (matrix.variables.ruvg[2 : nrow(matrix.variables.ruvg), "Axis"])[
which(matrix.variables.ruvg[2 : nrow(matrix.variables.ruvg), "Verdict"] == "group")
]
axis.na <- (matrix.variables.ruvg[2 : nrow(matrix.variables.ruvg), "Axis"])[
is.na(matrix.variables.ruvg[2 : nrow(matrix.variables.ruvg), "Verdict"])
]
axis.confounding <- (matrix.variables.ruvg[2 : nrow(matrix.variables.ruvg), "Axis"])[
which(!(matrix.variables.ruvg[2 : nrow(matrix.variables.ruvg), "Axis"]) %in%
c(axis.group, axis.na))
]
axis.all <- (matrix.variables.ruvg[2 : nrow(matrix.variables.ruvg), "Axis"])
axis.drop <- 0
axis.removed <- FALSE

axis.loop <- foreach(i = 1 : k.variables) %do% {
if (i == 1) {
if (i %in% axis.confounding) {
RUVg.post.correction <- RUVg(dge$counts, stable.transcripts, k = axis.drop + 1, drop = 0) axis.removed <- TRUE } else { axis.drop <- 1 } } else { if (i %in% axis.confounding) { if(axis.removed == TRUE) { RUVg.post.correction <- RUVg(RUVg.post.correction$normalizedCounts, stable.transcripts,
k = axis.drop + 1, drop = axis.drop)
} else {
RUVg.post.correction <- RUVg(dge$counts, stable.transcripts, k = axis.drop + 1, drop = axis.drop) axis.removed <- TRUE } } else { axis.drop <- axis.drop + 1 } } } dge$raw.counts <- dge$counts if (length(axis.confounding) > 0){ dge$ruv.counts <- as.data.frame(RUVg.post.correction$normalizedCounts) } else { dge$ruv.counts <- dge$raw.counts } dge$samples$ruv.lib.size <- colSums(dge$ruv.counts)

dge$stable.transcripts <- stable.transcripts dge$RUVg.loop <- RUVg.loop
dge$axis.group <- axis.group dge$axis.na <- axis.na
dge$axis.confounding <- axis.confounding dge$axis.all <- axis.all

} else {
dge$raw.counts <- dge$counts
dge$ruv.counts <- dge$raw.counts
dge$samples$ruv.lib.size <- colSums(dge$ruv.counts) dge$stable.transcripts <- NA
dge$RUVg.loop <- NA dge$axis.group <- NA
dge$axis.na <- NA dge$axis.confounding <- NA
dge\$axis.all <- NA
}

return(dge)
}

1
Entering edit mode

This support site is meant to help people who have technical questions about Bioconductor packages. Your question has to do with non-Bioconductor code, and is more application based. You might try over on biostars.org instead.