TMM normalization after RUVg correction
1
0
Entering edit mode
@14ef1b09
Last seen 3 months ago
Egypt

Is it important to use TMM normalization factors after RUVg correction?

I included library size as a factor of unwanted variation. What I understand so far is when calculating cpm values, the counts will be calculated from raw counts and ruv lib sizes which are effective library sizes and set norm.factors to 1.
Also, do I have to use voom with quality weights while already using RUVg correction?

dge=perform.RUVg.correction(dge = dge, k.variables = 3,
                            variable.to.assess = c("Age","lib.size"),
                            variable.threshold = c(0.2, 0.8),
                            ruvg.pvalue.threshold.group = 1e-2,
                            ruvg.pvalue.threshold.strongest.variable = 1e-2,
                            number.cores = 2, 
                            verbose = TRUE)

cpm=cpm.DGEList(dge)
RUVSeq limma DifferentialExpression edgeR Normalization • 691 views
ADD COMMENT
1
Entering edit mode
@james-w-macdonald-5106
Last seen 1 day ago
United States

I can't find any function called perform.RUVg.correction, so I won't comment on that except to say that you shouldn't be adjusting your counts, but instead should be computing extra variables that you can include in your model matrix (which is what RUVg provides you with).

ADD COMMENT
0
Entering edit mode

Written by (Veld et al., 2022)

function(dge = dge,
     k.variables = 3,
     variable.to.assess = c("Age","lib.size"),
     variable.threshold = c(0.2, 0.8),
     ruvg.pvalue.threshold.group = 1e-2,
     ruvg.pvalue.threshold.strongest.variable = 1e-2,
     training.series.only = FALSE,
     training.series.only.samples = training.samples,
     number.cores = 2, 
     verbose = TRUE){


if (missing(dge)){
  stop("Provide DGElist object")
 }
 stopifnot(class(dge) == "DGEList")

if (!is.numeric(k.variables)){
   stop("Provide numeric value for k.variables")
}

 if (k.variables < 1){
   stop("k.variables should be 1 or more")
 }

 if (all(variable.to.assess %in% colnames(dge$samples)) != TRUE){
   stop("Inputted variables do not match column names of the sample info table.")
 }

    if (length(variable.to.assess) != length(variable.threshold)){
      stop("Number of variables in variable.to.assess should match number of thresholds")
 }

 if (!is.numeric(ruvg.pvalue.threshold.group)){
   stop("Provide numeric value for ruvg.pvalue.threshold.group")
 }

 if (!is.numeric(ruvg.pvalue.threshold.strongest.variable)){
   stop("Provide numeric value for ruvg.pvalue.threshold.strongest.variable")
 }

 if (training.series.only == TRUE){
   allSamples <- training.series.only.samples
   allSamples <- allSamples[which(allSamples %in% colnames(dge))]
 } else {
   allSamples <- colnames(dge)
 }

  if (!"group" %in% variable.to.assess){
   variable.to.assess <- c(variable.to.assess, "group")
 }

 matrix.variables <- matrix(NA, ncol = length(variable.to.assess) + 1, nrow = nrow(dge$counts) + 1) 
 colnames(matrix.variables) <- c("geneID", variable.to.assess)
 matrix.variables[, 1] <- c(NA, rownames(dge$counts))
 for (variable in variable.to.assess){
  if (is.numeric(dge$samples[, variable])){
    matrix.variables[1, variable] <- "numeric"
  } else {
     matrix.variables[1, variable] <- "categorial"
   }
 }

 conf.loop <- data.frame(foreach(i = 1 : nrow(dge$counts), .combine = rbind) %do% {
   for (variable in variable.to.assess){
     if (matrix.variables[1, variable] == "numeric"){
      matrix.variables[i + 1, variable ] <- cor(dge$samples[allSamples, variable], 
                                               as.numeric(dge$counts[i, allSamples]), 
                                               use = "complete.obs")
    } else {
      matrix.variables[i + 1, variable] <- as.numeric(unlist(summary(
        aov(as.numeric(dge$counts[i, allSamples]) ~ dge$samples[allSamples, variable])))[9])
     }
    }
  })

stable.transcripts = NA
for (variable in variable.to.assess){
  if (variable %in% c("Group","group")){
    stable.transcripts = stable.transcripts
  } else if (variable %in% c("Age", "age")){
    stable.transcripts <- c(stable.transcripts,
                            (matrix.variables[2 : nrow(matrix.variables), 1])[
                                which((as.numeric(matrix.variables[2 : nrow(matrix.variables), variable]) > 
                                       variable.threshold[which(variable == variable.to.assess)])
                                    == TRUE)],
                            (matrix.variables[2 : nrow(matrix.variables), 1])[
                              which((as.numeric(matrix.variables[2 : nrow(matrix.variables), variable]) < 
                                       -variable.threshold[which(variable == variable.to.assess)])
                                    == TRUE)]
    )
   } else {
     stable.transcripts <- c(stable.transcripts,
                             (matrix.variables[2 : nrow(matrix.variables), 1])[
                               which((as.numeric(matrix.variables[2 : nrow(matrix.variables), variable]) > 
                                        variable.threshold[which(variable == variable.to.assess)])
                                     == TRUE)]  
    )
  }
}

 stable.transcripts <- unique(stable.transcripts[which(!stable.transcripts == "NA")])
 if (verbose == TRUE){
  print(paste("Number of stable transcripts selected for RUV-module correction:", 
              as.numeric(length(stable.transcripts))))
}

 if (length(stable.transcripts) > 3){

  RUVg.pre.correction <- RUVg(x = as.matrix(dge[, allSamples]$counts), 
                              cIdx = stable.transcripts, 
                              k = k.variables)

   matrix.variables.ruvg <- matrix(NA, ncol = length(variable.to.assess) + 2, nrow = k.variables + 1) 
   colnames(matrix.variables.ruvg) <- c("Axis", variable.to.assess, "Verdict")
   for (variable in variable.to.assess){
     if (is.numeric(dge$samples[, variable])){
      matrix.variables.ruvg[1, variable] <- "numeric"
     } else {
      matrix.variables.ruvg[1, variable] <- "categorial"
    }
   }

  if (k.variables > ncol(RUVg.pre.correction$W)) {k.variables = ncol(RUVg.pre.correction$W)}  

     RUVg.loop <- data.frame(foreach(i = 1 : k.variables, .combine = rbind) %do% {
    matrix.variables.ruvg[i + 1, "Axis"] <- i
    for (variable in variable.to.assess){
      if (matrix.variables.ruvg[1, variable] == "numeric"){
         matrix.variables.ruvg[i + 1, variable ] <- as.numeric(cor.test(RUVg.pre.correction$W[, i], 
                                                                        dge$samples[allSamples, variable])$p.value)
       } else {
         matrix.variables.ruvg[i + 1, variable] <- min(summary(aov(
          RUVg.pre.correction$W[, i] ~ dge$samples[allSamples, variable]))[[1]][["Pr(>F)"]], na.rm = T)
       }
    }

     if (as.numeric(matrix.variables.ruvg[i + 1, "group"]) > ruvg.pvalue.threshold.group) {
       strongest.variable <- colnames(matrix.variables.ruvg)[
        which(as.numeric(matrix.variables.ruvg[i + 1, 2 : ncol(matrix.variables.ruvg) - 1]) == 
                 min(as.numeric(matrix.variables.ruvg[i + 1, 2 : ncol(matrix.variables.ruvg) - 1])))
      ]
     } else {
       strongest.variable <- "group"
     }

    if (as.numeric(matrix.variables.ruvg[i + 1, strongest.variable]) < ruvg.pvalue.threshold.strongest.variable) {
      matrix.variables.ruvg[i + 1, "Verdict"] <- strongest.variable
    } 
   })

  axis.group <- (matrix.variables.ruvg[2 : nrow(matrix.variables.ruvg), "Axis"])[
    which(matrix.variables.ruvg[2 : nrow(matrix.variables.ruvg), "Verdict"] == "group")
  ]
  axis.na <- (matrix.variables.ruvg[2 : nrow(matrix.variables.ruvg), "Axis"])[
    is.na(matrix.variables.ruvg[2 : nrow(matrix.variables.ruvg), "Verdict"])
   ]
  axis.confounding <- (matrix.variables.ruvg[2 : nrow(matrix.variables.ruvg), "Axis"])[
    which(!(matrix.variables.ruvg[2 : nrow(matrix.variables.ruvg), "Axis"]) %in% 
             c(axis.group, axis.na))
  ]
   axis.all <- (matrix.variables.ruvg[2 : nrow(matrix.variables.ruvg), "Axis"])
   axis.drop <- 0
   axis.removed <- FALSE

   axis.loop <- foreach(i = 1 : k.variables) %do% {
     if (i == 1) {
      if (i %in% axis.confounding) {
        RUVg.post.correction <- RUVg(dge$counts, stable.transcripts, k = axis.drop + 1, drop = 0)
         axis.removed <- TRUE
       } else {
         axis.drop <- 1
        }
     } else {
       if (i %in% axis.confounding) {
        if(axis.removed == TRUE) {
           RUVg.post.correction <- RUVg(RUVg.post.correction$normalizedCounts, stable.transcripts, 
                                        k = axis.drop + 1, drop = axis.drop)
         } else {
           RUVg.post.correction <- RUVg(dge$counts, stable.transcripts, k = axis.drop + 1, drop = axis.drop)
           axis.removed <- TRUE
         }
       } else {
        axis.drop <- axis.drop + 1
       }
     }
   }

   dge$raw.counts <- dge$counts
   if (length(axis.confounding) > 0){
    dge$ruv.counts <- as.data.frame(RUVg.post.correction$normalizedCounts)
   } else {
    dge$ruv.counts <- dge$raw.counts
   }
   dge$samples$ruv.lib.size <- colSums(dge$ruv.counts)

   dge$stable.transcripts <- stable.transcripts
   dge$RUVg.loop <- RUVg.loop
   dge$axis.group <- axis.group
   dge$axis.na <- axis.na
  dge$axis.confounding <- axis.confounding
  dge$axis.all <- axis.all

} else {
   dge$raw.counts <- dge$counts
   dge$ruv.counts <- dge$raw.counts
   dge$samples$ruv.lib.size <- colSums(dge$ruv.counts)

   dge$stable.transcripts <- NA
    dge$RUVg.loop <- NA
    dge$axis.group <- NA
    dge$axis.na <- NA
    dge$axis.confounding <- NA
    dge$axis.all <- NA
}

return(dge)
 }
ADD REPLY
1
Entering edit mode

This support site is meant to help people who have technical questions about Bioconductor packages. Your question has to do with non-Bioconductor code, and is more application based. You might try over on biostars.org instead.

ADD REPLY

Login before adding your answer.

Traffic: 478 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6