Question

NanoStringDiff - housekeeping genes variation

0

Entering edit mode

YM • 0

@ym-25082

Last seen 3.1 years ago

Hi!
I am using NanoStringDiff to analyse Nanostring mRNA data.
I use the function PlotsPositiveHousekeeping to check the variation of housekeeping genes (hkg). According to the code of this function (see below), counts for hkg are scaled using a size factor calculated from the coefficients of a generalized linear model with positive and negative controls counts as response variable and the concentration of the positive controls as predictor.
I am wondering why square-root is applied to the size factor when hkg counts are standardized ?

  coeff = c  # real coefficients
  c = c/mean(c)  # size factors

  for (i in 1:nrow(housekeeping)){
      housekeeping[i,]<-(housekeeping[i,]-mean.neg)/c^(1/2) 
}

Complete code of PlotsPositiveHousekeeping from https://github.com/tingtina/NanoStringDiff/blob/master/R/PlotsPositiveHousekeeping.R

PlotsPositiveHousekeeping<- function(path=path, header=TRUE){
  # ------------------------------------ #
  # path: points out the directory in which your csv.file is located #
  # header: a logical value(TRUE or FALSE) indicating whether the file contains the names of the variables as its first line. #
  # designs: data frame for pheno type data storage #
  # ------------------------------------ #
  data = read.table(path, header = header, sep = ",")

  if (is.null(data)) {
      stop("There is no counts data")
  }

  selectcol = !(names(data) %in% c("Code.Class", "Name", "Accession"))

  ## remove NA from data set
  counts = data[,selectcol]
  counts = as.matrix(counts)
  id = which(is.na(rowSums(counts)))
  if (length(id) > 0) {
      data = data[-id, ]
  }

  code.class = tolower(data[, c("Code.Class")])
  name = data[, c("Name")]
  accession = data[, c("Accession")]

  counts = data[,selectcol]
  counts = as.matrix(counts)
  rownames(counts) = name

  pos.id = grep("positive", code.class, fixed = TRUE)
  neg.id = grep("negative", code.class, fixed = TRUE)
  house.id = grep("housekeeping", code.class, fixed = TRUE)
  spikein.id = grep("spikein", code.class, fixed = TRUE)

  positive = counts[pos.id, ]
  negative = counts[neg.id, ]
  housekeeping = counts[house.id, ]
  house.adjust = counts[house.id, ]

  mean.neg<-colMeans(negative)

  pos = positive
  pos[pos <= 0] = 1
  neg = negative
  x = c(128, 32, 8, 2, 0.5, 0.125, rep(0, nrow(neg)))
  Y = rbind(pos, neg)
  n = ncol(Y)
  c = rep(0, n)
  for (i in 1:n) {

      model = glm(Y[, i] ~ x, family = poisson(link = identity))
      c[i] = model$coeff[2]
  }

  coeff = c  # real coefficients
  c = c/mean(c)  # size factors

  for (i in 1:nrow(housekeeping)){
      housekeeping[i,]<-(housekeeping[i,]-mean.neg)/c^(1/2)
  }

  nhouse<-nrow(housekeeping)
  varhouse<-rowVars(housekeeping)^(1/2)/rowMeans(housekeeping)
  con=c(128,32,8,2,0.5,0.125)
  par(mfrow=c(1,2),mar=c(5,6,2,1)+0.1, oma=c(2,2,2,2)+0.1)
  plot(con,rowMeans(positive),xlab="Concentration",ylab="Positive Counts",
  col = "blue")
  abline(lm(rowMeans(positive) ~ con))
  plot(c(1:nhouse),varhouse,xaxt='n',xlab="",ylab="Coefficient of Variation",
  col = "blue", xlim =c(1,nhouse+1), ylim =c(min(varhouse)*0.9,max(varhouse)*1.1))
  title(xlab="Housekeeping Genes", line=1.5)
  textxy(c(1:nhouse), varhouse, labs=rownames(housekeeping), cex = 1, m = c(0, 0), offset = 0.65)
} # end function #

NanoStringDiff • 648 views

ADD COMMENT • link 3.1 years ago YM • 0