Question

Error in ChIPQC: "Error in if (res >= minval) { : missing value where TRUE/FALSE needed"

0

Entering edit mode

liz.ingsimmons ▴ 140

@lizingsimmons-6935

Last seen 3.7 years ago

Germany

I would like to run ChIPQC on some ENCODE data downloaded using the ENCODExplorer package. Not all the samples have peaks available for download, however I'd still like to see e.g. % of reads in blacklist. I thought that this should be possible with ChIPQC, however I'm getting a (fairly uninformative) error, that persists even if I subset to only the samples that do have peaks.

See samplesheet, traceback, sessionInfo and code to get the data below:

samplesheet <- structure(list(Factor = c("CTCF", "CTCF", "CTCF", "CTCF"),
assay = c("ChIP-seq", "ChIP-seq", "ChIP-seq", "ChIP-seq"), Tissue =
c("CH12.LX", "CH12.LX", "CH12.LX", "CH12.LX"), Replicate = c(1L, 1L, 2L, 2L),
lab = c("Michael Snyder, Stanford", "Ross Hardison, PennState", "Michael
Snyder, Stanford", "Ross Hardison, PennState" ), organism = structure(c(3L,
3L, 3L, 3L), .Label = c("Drosophila melanogaster", "Homo sapiens", "Mus
musculus"), class = "factor"), assembly = c("mm9", "mm9", "mm9", "mm9"),
bamReads = c("ENCFF001NGB.bam", "ENCFF001MLC.bam", "ENCFF001NGJ.bam",
"ENCFF001MLD.bam"), Peaks = c(NA, "ENCFF001MLH.bed", NA, "ENCFF001MLI.bed"),
lab_short = list("Stanford", "PennState", "Stanford", "PennState"), SampleID =
c("CTCF_CH12.LX_Stanford_1", "CTCF_CH12.LX_PennState_1",
"CTCF_CH12.LX_Stanford_2", "CTCF_CH12.LX_PennState_2" )), class =
"data.frame", .Names = c("Factor", "assay", "Tissue", "Replicate", "lab",
"organism", "assembly", "bamReads", "Peaks", "lab_short", "SampleID"),
row.names = c(NA, -4L))

qc_res <- ChIPQC(samplesheet, annotation = "mm9", chromosome = "chr1",
                 blacklist = mm9_blacklist)
# CTCF_CH12.LX_Stanford_1 CH12.LX CTCF   1 bed
# CTCF_CH12.LX_PennState_1 CH12.LX CTCF   1 bed
# CTCF_CH12.LX_Stanford_2 CH12.LX CTCF   2 bed
# CTCF_CH12.LX_PennState_2 CH12.LX CTCF   2 bed
# Error in if (res >= minval) { : missing value where TRUE/FALSE needed

# only ones with peaks available
qc_res <- ChIPQC(samplesheet[!is.na(samplesheet$Peaks),],
                 samples=qc_res_list[!is.na(samplesheet$Peaks)])

# CTCF_CH12.LX_Stanford_1 CH12.LX CTCF   1 bed
# CTCF_CH12.LX_PennState_1 CH12.LX CTCF   1 bed
# CTCF_CH12.LX_Stanford_2 CH12.LX CTCF   2 bed
# CTCF_CH12.LX_PennState_2 CH12.LX CTCF   2 bed
#Error in if (res >= minval) { : missing value where TRUE/FALSE needed
#   traceback()
#   6: FUN(newX[, i], ...)
#   5: apply(pv$allvectors[, 4:ncol(pv$allvectors)], 1, pv.minOverlap,
#            minOverlap)
#   4: pv.vectors(model, mask = mask, minOverlap = minOverlap, bKeepAll = bKeepAll,
#                 bAnalysis = bAnalysis, attributes = attributes)
#   3: pv.model(DBA, mask = mask, minOverlap = minOverlap, samplesheet = sampleSheet,
#               config = config, caller = peakCaller, format = peakFormat,
#               scorecol = scoreCol, bLowerBetter = bLowerScoreBetter, skipLines = skipLines,
#               bAddCallerConsensus = bAddCallerConsensus, bRemoveM = bRemoveM,
#               bRemoveRandom = bRemoveRandom, bKeepAll = TRUE, bAnalysis = TRUE,
#               filter = filter, attributes = attributes)
#   2: dba(sampleSheet = experiment, bCorPlot = FALSE, peakCaller = "bed")
#   1: ChIPQC(samplesheet, annotation = "mm9", chromosome = "chr1",
#             blacklist = mm9_blacklist)
 
sessionInfo()
# R version 3.2.3 (2015-12-10)
# Platform: x86_64-pc-linux-gnu (64-bit)
# Running under: Debian GNU/Linux stretch/sid
#
# locale:
#   [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C               LC_TIME=en_GB.UTF-8       
# [4] LC_COLLATE=en_GB.UTF-8     LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
# [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                  LC_ADDRESS=C              
# [10] LC_TELEPHONE=C             LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
#
# attached base packages:
#   [1] stats4    parallel  stats     graphics  grDevices utils     datasets  methods   base     
#
# other attached packages:
#   [1] TxDb.Mmusculus.UCSC.mm9.knownGene_3.2.2 GenomicFeatures_1.22.13                
# [3] AnnotationDbi_1.32.3                    dplyr_0.4.3                            
# [5] tidyr_0.4.0                             ChIPQC_1.6.1                           
# [7] DiffBind_1.16.3                         RSQLite_1.0.0                          
# [9] DBI_0.3.1                               locfit_1.5-9.1                         
# [11] GenomicAlignments_1.6.3                 Rsamtools_1.22.0                       
# [13] Biostrings_2.38.4                       XVector_0.10.0                         
# [15] limma_3.26.8                            SummarizedExperiment_1.0.2             
# [17] Biobase_2.30.0                          GenomicRanges_1.22.4                   
# [19] GenomeInfoDb_1.6.3                      IRanges_2.4.8                          
# [21] S4Vectors_0.8.11                        BiocGenerics_0.16.1                    
# [23] ggplot2_2.0.0                           ENCODExplorer_1.2.1                    
# [25] BiocInstaller_1.20.1                   
#
# loaded via a namespace (and not attached):
#   [1] edgeR_3.12.0           jsonlite_0.9.19        splines_3.2.3          gtools_3.5.0          
# [5] assertthat_0.1         highr_0.5.1            latticeExtra_0.6-28    amap_0.8-14           
# [9] RBGL_1.46.0            Category_2.36.0        backports_1.0.0        lattice_0.20-33       
# [13] digest_0.6.9           RColorBrewer_1.1-2     checkmate_1.7.2        colorspace_1.2-6      
# [17] Matrix_1.2-3           plyr_1.8.3             GSEABase_1.32.0        chipseq_1.20.0        
# [21] XML_3.98-1.3           pheatmap_1.0.8         ShortRead_1.28.0       biomaRt_2.26.1        
# [25] genefilter_1.52.1      zlibbioc_1.16.0        xtable_1.8-2           GO.db_3.2.2           
# [29] scales_0.4.0           brew_1.0-6             gdata_2.17.0           BiocParallel_1.4.3    
# [33] annotate_1.48.0        lazyeval_0.1.10        survival_2.38-3        magrittr_1.5          
# [37] systemPipeR_1.4.8      fail_1.3               gplots_2.17.0          hwriter_1.3.2         
# [41] GOstats_2.36.0         graph_1.48.0           tools_3.2.3            BBmisc_1.9            
# [45] stringr_1.0.0          sendmailR_1.2-1        munsell_0.4.3          lambda.r_1.1.7        
# [49] caTools_1.17.1         futile.logger_1.4.1    grid_3.2.3             RCurl_1.95-4.7        
# [53] rjson_0.2.15           AnnotationForge_1.12.2 bitops_1.0-6           base64enc_0.1-3       
# [57] gtable_0.2.0           R6_2.1.2               reshape2_1.4.1         Nozzle.R1_1.1-1       
# [61] knitr_1.12.3           rtracklayer_1.30.2     futile.options_1.0.0   KernSmooth_2.23-15    
# [65] stringi_1.0-1          BatchJobs_1.6          Rcpp_0.12.3

# Code for downloading data and preparing samplesheet:
library(ENCODExplorer)
library(ChIPQC)

## Get data
biosamples <- c("CH12.LX")
targets <- c("CTCF")
formats <- c("bam", "bigBed")

res <- queryEncode(assay = "ChIP-seq", organism = "Mus musculus", fixed = FALSE,
          biosample = paste(biosamples, collapse = "|"),
          target = paste(targets, collapse = "|"),
          file_format = paste(formats, collapse = "|"))

#pretty table
cols <- c("target","assay", "biosample_name", "biological_replicate_number",
          "file_format","lab", "organism", "assembly", "file_accession")
knitr::kable(res$experiment[, cols])

#download data
downloadEncode(resultSet = res,
               resultOrigin = "queryEncode", dir = ".")

## Make samplesheet, etc
samplesheet <- res$experiment[, cols] %>%
  spread(key = file_format, value = file_accession) %>% #rename for ChIPQC conventions
  rename(bamReads = bam, Peaks = bigBed, Tissue = biosample_name,
         Factor = target, Replicate = biological_replicate_number) %>%
  mutate(bamReads = paste0(bamReads, ".bam"),
         Peaks = ifelse(is.na(Peaks), NA, paste0(Peaks, ".bed")),
         Factor = gsub("-mouse", "", Factor)) %>%
  mutate(lab_short = lapply(lab, function(x) strsplit(x, ", ")[[1]][2])) %>% #just institution
  mutate(SampleID = paste(Factor, Tissue, lab_short, Replicate, sep = "_")) #make Sample ID

bed_files <- unique(samplesheet$Peaks[!is.na(samplesheet$Peaks)])
for (bf in bed_files){
  bb <- sub(".bed", ".bigBed", bf)
  message("converting file: ", bb)
  system(paste("bigBedToBed", bb, bf, sep = " "))
}

mm9_blacklist <- makeGRangesFromDataFrame(read.table("mm9-blacklist.bed",
                                                     col.names = c("chr", "start", "end")))

chipqc • 2.1k views

ADD COMMENT • link updated 8.2 years ago by Rory Stark ★ 5.2k • written 8.4 years ago by liz.ingsimmons ▴ 140

score 0 · Answer 1 · 2016-05-16

Hi Liz-

I was finally able to reproduce this and get to the bottom of what is going on. It was the result of a change made to the DiffBind package. Fixes have been checked in for both DiffBind (2.0.1) and ChIPQC (1.8.2). You may have to wait a day or two for the chIPQC change to propagate in to the latest release.

Regards-

Rory