Search
Question: prior.prob has opposite effects in goana and kegga?
0
gravatar for Jenny Drnevich
7 days ago by
Jenny Drnevich1.8k
United States
Jenny Drnevich1.8k wrote:

Hi all,

I've come across some unexpected results when trying to go over-representation testing on array methylation data, which I've traced to the underlying goana() and kegga() functions from limma. Genes can have different numbers of CpGs, and so the gometh() function from missMethyl will by default calculate the probability of a gene being called significant based on the number of CpGs for that gene and then pass this as the prior.prob argument for goana or kegga. For GO testing, adjusting for this prior.prob decreases the number of significant GO terms as I would have expected. However, for KEGG pathways, adjusting for the prior.prob greatly increases the number of significant pathways. Even randomly selected sets of CpGs have over half of the 319 KEGG pathways with raw p < 0.001 when adjusting for prior.prob, but only a dozen pathways when not adjusted. How is this possible? Reproducible example below.

Thanks,

Jenny

 

# codes for creating the necessary objects from the missMethyl vignette:

## ----load-libs, message=FALSE--------------------------------------------
library(missMethyl)
library(limma)
library(minfi)

## ----reading-data, message=FALSE-----------------------------------------
library(minfiData)
baseDir <- system.file("extdata", package = "minfiData")
targets <- read.metharray.sheet(baseDir)
rgSet <- read.metharray.exp(targets = targets)

## ----ppraw---------------------------------------------------------------
mSet <- preprocessRaw(rgSet)

## ----diffmeth2-----------------------------------------------------------
# get M-values for ALL probes
meth <- getMeth(mSet)
unmeth <- getUnmeth(mSet)
M <- log2((meth + 100)/(unmeth + 100))
grp <- factor(targets$status,levels=c("normal","cancer"))
des <- model.matrix(~grp)
INCs <- getINCs(rgSet)
Mc <- rbind(M,INCs)
ctl <- rownames(Mc) %in% rownames(INCs)
rfit1 <- RUVfit(data=Mc, design=des, coef=2, ctl=ctl) # Stage 1 analysis
rfit2 <- RUVadj(rfit1)

## ----ruv1----------------------------------------------------------------
top1 <- topRUV(rfit2, num=Inf)
ctl <- rownames(M) %in% rownames(top1[top1$p.ebayes.BH > 0.5,])

## ----ruv2----------------------------------------------------------------
# Perform RUV adjustment and fit
rfit1 <- RUVfit(data=M, design=des, coef=2, ctl=ctl) # Stage 2 analysis
rfit2 <- RUVadj(rfit1)

## ----gometh3-------------------------------------------------------------
topCpGs<-topRUV(rfit2,number=10000)
sigCpGs <- rownames(topCpGs)

## ----gometh4-------------------------------------------------------------
library(IlluminaHumanMethylation450kanno.ilmn12.hg19)

# end missMethyl vignette

# check how many GO and KEGG have raw p < 0.001 for the selected CpG set

gst <- gometh(sig.cpg=sigCpGs, all.cpg=rownames(rfit2), collection="GO")
sum(gst$P.DE < 0.001)

# [1] 628 - with bias correction

gst.nobias <- gometh(sig.cpg=sigCpGs, all.cpg=rownames(rfit2), collection="GO", prior.prob = FALSE)
sum(gst.nobias$P.DE < 0.001)

#[1] 1126 - without bias correction

kst <- gometh(sig.cpg=sigCpGs, all.cpg=rownames(rfit2), collection="KEGG")
sum(kst$P.DE < 0.001)

# [1] 160 - with bias correction, out of 319 pathways total

kst.nobias <- gometh(sig.cpg=sigCpGs, all.cpg=rownames(rfit2), collection="KEGG", prior.prob = FALSE)
sum(kst.nobias$P.DE < 0.001)

#  [1] 38 - without bias correction

# Pull 10 random subset of CpGs of same size

rand.001 <- matrix(NA, 10, 4)
colnames(rand.001) <- c("gst","gst.nobias","kst","kst.nobias")

#This takes a few minutes:

for(i in 1:10) {
  temp <- sample(rownames(rfit2), size = length(sigCpGs))
  gst.rand <- gometh(sig.cpg=temp, all.cpg=rownames(rfit2), collection="GO")
  rand.001[i,1] <- sum(gst.rand$P.DE < 0.001)
  gst.nobias.rand <- gometh(sig.cpg=temp, all.cpg=rownames(rfit2), collection="GO", prior.prob = FALSE)
  rand.001[i,2] <- sum(gst.nobias.rand$P.DE < 0.001)
  kst.rand <- gometh(sig.cpg=temp, all.cpg=rownames(rfit2), collection="KEGG")
  rand.001[i,3] <- sum(kst.rand$P.DE < 0.001)
  kst.nobias.rand <- gometh(sig.cpg=temp, all.cpg=rownames(rfit2), collection="KEGG", prior.prob = FALSE)
  rand.001[i,4] <- sum(kst.nobias.rand$P.DE < 0.001)
  print(i)
}

colMeans(rand.001)

#       gst gst.nobias        kst kst.nobias 
#      24.1      556.6      258.9       12.6

sessionInfo()

R version 3.4.0 (2017-04-21)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows >= 8 x64 (build 9200)

Matrix products: default

locale:
[1] LC_COLLATE=English_United States.1252  LC_CTYPE=English_United States.1252   
[3] LC_MONETARY=English_United States.1252 LC_NUMERIC=C                          
[5] LC_TIME=English_United States.1252    

attached base packages:
[1] stats4    parallel  stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] minfiData_0.22.0                                   IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.0
 [3] IlluminaHumanMethylation450kmanifest_0.4.0         minfi_1.22.1                                      
 [5] bumphunter_1.16.0                                  locfit_1.5-9.1                                    
 [7] iterators_1.0.8                                    foreach_1.4.3                                     
 [9] Biostrings_2.44.0                                  XVector_0.16.0                                    
[11] SummarizedExperiment_1.6.1                         DelayedArray_0.2.2                                
[13] matrixStats_0.52.2                                 Biobase_2.36.2                                    
[15] GenomicRanges_1.28.1                               GenomeInfoDb_1.12.0                               
[17] IRanges_2.10.1                                     S4Vectors_0.14.1                                  
[19] BiocGenerics_0.22.0                                limma_3.32.2                                      
[21] missMethyl_1.10.0                                 

#the rest deleted because I was over the character limit
ADD COMMENTlink modified 7 days ago • written 7 days ago by Jenny Drnevich1.8k
Please log in to add an answer.

Help
Access

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.
Powered by Biostar version 2.2.0
Traffic: 224 users visited in the last hour