Dear Rory,
Thank you for your efforts in DiffBind.
A basic overview of my dataset. There are two conditions, each having two bio-replicates. I'd like to run differential analysis on an IDR peak collection (52,704 peaks). However, only 5,231 peaks were considered and show the expression with function dba.count. I'm wondering if I made a mistake. I would be so grateful if you have any ideas. Many thanks in advance!
My code is as follows.
library(DiffBind)
samples <- read.csv("in.info.csv")
tamoxifen <- dba(sampleSheet=samples)
library(rtracklayer)
mypeaks<- import("IDR_peak.bed", format="BED")
if(1){
###THE FIRST TRY
tamoxifen.counted <- dba.count(tamoxifen,peaks=mypeaks,summits=0,bParallel=TRUE)
#4 Samples, 5231 sites in matrix
}
if(1){
###THE SECOND TRY
tamoxifen.counted <- dba.count(tamoxifen,peaks=mypeaks, bParallel=TRUE)
#4 Samples, 45795 sites in matrix, with peak width= 401 bp (some may be less than 401 bp)
}
if(1){
###THE THIRD TRY
tamoxifen.counted <- dba.count(tamoxifen,peaks=mypeaks,filter=0,summits=FALSE,bParallel=TRUE)
tamoxifen.counted
#4 Samples, 52704 sites in matrix:
# ID Tissue Treatment Replicate Reads FRiP
#1 treatment_1 blood treatment 1 21484301 0.09
#2 treatment_2 blood treatment 2 24059705 0.08
#3 control_1 blood control 1 25190091 0.09
#4 control_2 blood coltrol 2 23613949 0.08
raw <- dba.count(tamoxifen.counted, peaks=NULL, score=DBA_SCORE_READS)
raw_count <- dba.peakset(raw, minOverlap=1, bRetrieve=TRUE, DataType=DBA_DATA_FRAME)
write.table(raw_count, file="res.consensus_raw.txt", quote=F, sep="\t", row.names=F, col.names=T)
#wc -l res.consensus_raw.txt
#9450 res.consensus_raw.txt
}
if(1){
###THE FOURTH TRY
tamoxifen.counted <- dba.count(tamoxifen,peaks=mypeaks, bParallel=TRUE,filter=0,summits=200)
#4 Samples, 52697 sites in matrix, with peak width= 401 bp (some may be less than 401 bp)
# ID Tissue Treatment Replicate Reads FRiP
#1 treatment_1 blood treatment 1 21484301 0.02
#2 treatment_2 blood treatment 2 24059705 0.02
#3 control_1 blood control 1 25190091 0.02
#4 control_2 blood coltrol 2 23613949 0.02
raw <- dba.count(tamoxifen.counted, peaks=NULL, score=DBA_SCORE_READS)
raw_count <- dba.peakset(raw, minOverlap=1, bRetrieve=TRUE, DataType=DBA_DATA_FRAME)
write.table(raw_count, file="res.consensus_raw.txt", quote=F, sep="\t", row.names=F, col.names=T)
#wc -l res.consensus_raw.txt
#45796 res.consensus_raw.txt
}
> sessionInfo( )
R version 4.2.3 (2023-03-15)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 20.04.4 LTS
Matrix products: default
BLAS/LAPACK: /home/software/miniconda3/lib/libopenblasp-r0.3.21.so
locale:
[1] LC_CTYPE=en_HK.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_HK.UTF-8 LC_COLLATE=en_HK.UTF-8
[5] LC_MONETARY=en_HK.UTF-8 LC_MESSAGES=en_HK.UTF-8
[7] LC_PAPER=en_HK.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_HK.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] stats4 stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] rtracklayer_1.58.0 DiffBind_3.8.4
[3] SummarizedExperiment_1.28.0 Biobase_2.58.0
[5] MatrixGenerics_1.10.0 matrixStats_0.63.0
[7] GenomicRanges_1.50.2 GenomeInfoDb_1.34.9
[9] IRanges_2.32.0 S4Vectors_0.36.2
[11] BiocGenerics_0.44.0
loaded via a namespace (and not attached):
[1] bitops_1.0-7 bit64_4.0.5 httr_1.4.6
[4] RColorBrewer_1.1-3 numDeriv_2016.8-1.1 tools_4.2.3
[7] utf8_1.2.3 R6_2.5.1 irlba_2.3.5.1
[10] KernSmooth_2.23-21 DBI_1.1.3 colorspace_2.1-0
[13] apeglm_1.20.0 tidyselect_1.2.0 DESeq2_1.38.3
[16] bit_4.0.5 compiler_4.2.3 cli_3.6.1
[19] DelayedArray_0.24.0 caTools_1.18.2 scales_1.2.1
[22] SQUAREM_2021.1 mvtnorm_1.1-3 mixsqp_0.3-48
[25] stringr_1.5.0 digest_0.6.31 Rsamtools_2.14.0
[28] XVector_0.38.0 jpeg_0.1-10 pkgconfig_2.0.3
[31] htmltools_0.5.5 fastmap_1.1.1 invgamma_1.1
[34] bbmle_1.0.25 limma_3.54.2 BSgenome_1.66.3
[37] htmlwidgets_1.6.2 rlang_1.1.1 RSQLite_2.3.1
[40] BiocIO_1.8.0 generics_0.1.3 hwriter_1.3.2.1
[43] BiocParallel_1.32.6 gtools_3.9.4 dplyr_1.1.2
[46] RCurl_1.98-1.12 magrittr_2.0.3 GenomeInfoDbData_1.2.9
[49] interp_1.1-4 Matrix_1.5-4 Rcpp_1.0.10
[52] munsell_0.5.0 fansi_1.0.4 lifecycle_1.0.3
[55] stringi_1.7.12 yaml_2.3.7 MASS_7.3-60
[58] zlibbioc_1.44.0 gplots_3.1.3 plyr_1.8.8
[61] blob_1.2.4 grid_4.2.3 parallel_4.2.3
[64] ggrepel_0.9.3 bdsmatrix_1.3-6 crayon_1.5.2
[67] deldir_1.0-6 lattice_0.21-8 Biostrings_2.66.0
[70] annotate_1.76.0 KEGGREST_1.38.0 locfit_1.5-9.7
[73] pillar_1.9.0 rjson_0.2.21 systemPipeR_2.4.0
[76] geneplotter_1.76.0 codetools_0.2-19 XML_3.99-0.14
[79] glue_1.6.2 ShortRead_1.56.1 GreyListChIP_1.30.0
[82] latticeExtra_0.6-30 png_0.1-8 vctrs_0.6.2
[85] gtable_0.3.3 amap_0.8-19 cachem_1.0.8
[88] ashr_2.2-54 ggplot2_3.4.2 emdbook_1.3.12
[91] xtable_1.8-4 restfulr_0.0.15 coda_0.19-4
[94] truncnorm_1.0-9 tibble_3.2.1 memoise_2.0.1
[97] AnnotationDbi_1.60.2 GenomicAlignments_1.34.1