Hi!
I am currently performing an EWAS on some DNA methylation data. I am using Ilumina EPIC 850k array and the mifi package. During QC, after removing samples that have a detection p-value >0.05, I have found over 800,000 probes are failing to reach the detection p-value of 0.01. This is happening on the whole dataset and also a subset of 7 samples. I am not sure where the issue is occurring. My colleagues have performed analysis with the same dataset and have only had around 20,000 probes fail to meet the threshold. I have checked that the idat files have loaded correctly and even re-uploaded the original data but it is still occurring. If anyone has any advice on how to problem solve this please let me know.
Regards, Lilly
detP <- detectionP(rgSet)
write.table(detP, file = "detP.csv", sep = ",", quote = FALSE)
head(detP)
keep2 <- colMeans(detP) <0.05
mSetSw2 <- mSetSw[,keep2]
keep <- rowSums(detP < 0.01) == ncol(rgSet)
> write.table(detP, file = "detP.csv", sep = ",", quote = FALSE)
> head(detP)
203096660092_R08C01 203096660092_R07C01 203096660092_R04C01 203096660092_R03C01
cg18478105 0.000000e+00 0.000000e+00 0.000000e+00 0.0000e+00
cg09835024 0.000000e+00 0.000000e+00 0.000000e+00 0.0000e+00
cg14361672 0.000000e+00 0.000000e+00 0.000000e+00 0.0000e+00
cg01763666 0.000000e+00 0.000000e+00 0.000000e+00 0.0000e+00
cg12950382 1.326109e-53 1.018518e-27 3.993235e-68 1.4103e-54
cg02115394 0.000000e+00 0.000000e+00 0.000000e+00 0.0000e+00
203096660092_R02C01 203096660092_R01C01 203084910103_R05C01
cg18478105 0.000000e+00 0.000000e+00 0.807084441
cg09835024 0.000000e+00 0.000000e+00 0.871220810
cg14361672 0.000000e+00 0.000000e+00 0.890377227
cg01763666 0.000000e+00 1.916556e-306 0.734445120
cg12950382 2.686714e-40 9.062771e-19 0.915134781
cg02115394 0.000000e+00 0.000000e+00 0.008544051
> summary(detP)
203096660092_R08C01 203096660092_R07C01 203096660092_R04C01 203096660092_R03C01 203096660092_R02C01 203096660092_R01C01
Min. :0.0000000 Min. :0.0000000 Min. :0.0000000 Min. :0.0000000 Min. :0.0000000 Min. :0.0000000
1st Qu.:0.0000000 1st Qu.:0.0000000 1st Qu.:0.0000000 1st Qu.:0.0000000 1st Qu.:0.0000000 1st Qu.:0.0000000
Median :0.0000000 Median :0.0000000 Median :0.0000000 Median :0.0000000 Median :0.0000000 Median :0.0000000
Mean :0.0002629 Mean :0.0001721 Mean :0.0001611 Mean :0.0001979 Mean :0.0003552 Mean :0.0004123
3rd Qu.:0.0000000 3rd Qu.:0.0000000 3rd Qu.:0.0000000 3rd Qu.:0.0000000 3rd Qu.:0.0000000 3rd Qu.:0.0000000
Max. :0.9999935 Max. :0.9999996 Max. :1.0000000 Max. :1.0000000 Max. :1.0000000 Max. :1.0000000
203084910103_R05C01
Min. :0.0000
1st Qu.:0.1758
Median :0.5000
Mean :0.4830
3rd Qu.:0.7796
Max. :1.0000
> keep2 <- colMeans(detP) <0.05
> head(keep2)
203096660092_R08C01 203096660092_R07C01 203096660092_R04C01 203096660092_R03C01 203096660092_R02C01 203096660092_R01C01
TRUE TRUE TRUE TRUE TRUE TRUE
> str(keep2)
Named logi [1:7] TRUE TRUE TRUE TRUE TRUE TRUE ...
- attr(*, "names")= chr [1:7] "203096660092_R08C01" "203096660092_R07C01" "203096660092_R04C01" "203096660092_R03C01" ...
> sum(keep2)
[1] 6
> mSetSw2 <- mSetSw[,keep2]
> dim(mSetSw2)
[1] 866091 6
> keep <- rowSums(detP < 0.01) == ncol(rgSet)
> str(keep)
Named logi [1:866091] FALSE FALSE FALSE FALSE FALSE TRUE ...
- attr(*, "names")= chr [1:866091] "cg18478105" "cg09835024" "cg14361672" "cg01763666" ...
> table(keep)
keep
FALSE TRUE
801563 64528
> sessionInfo()
R version 4.1.0 (2021-05-18)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: CentOS Linux 7 (Core)
Matrix products: default
BLAS/LAPACK: /usr/local/intel/2018u3/compilers_and_libraries_2018.3.222/linux/mkl/lib/intel64_lin/libmkl_gf_lp64.so
locale:
[1] LC_CTYPE=en_AU.UTF-8 LC_NUMERIC=C LC_TIME=en_AU.UTF-8 LC_COLLATE=en_AU.UTF-8
[5] LC_MONETARY=en_AU.UTF-8 LC_MESSAGES=en_AU.UTF-8 LC_PAPER=en_AU.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] grid parallel stats4 stats graphics grDevices utils datasets methods base
other attached packages:
[1] stringr_1.4.0 Gviz_1.36.2
[3] minfiData_0.38.0 IlluminaHumanMethylation450kmanifest_0.4.0
[5] RColorBrewer_1.1-2 IlluminaHumanMethylationEPICmanifest_0.3.0
[7] IlluminaHumanMethylationEPICanno.ilm10b2.hg19_0.6.0 missMethyl_1.26.1
[9] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.0 mitch_1.4.0
[11] FlowSorted.Blood.EPIC_1.10.1 ExperimentHub_2.0.0
[13] AnnotationHub_3.0.1 BiocFileCache_2.0.0
[15] dbplyr_2.1.1 IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0
[17] nlme_3.1-152 quadprog_1.5-8
[19] genefilter_1.74.0 topconfects_1.8.0
[21] beeswarm_0.4.0 gplots_3.1.1
[23] zoo_1.8-9 knitr_1.33
[25] DMRcate_2.6.0 minfi_1.38.0
[27] bumphunter_1.34.0 locfit_1.5-9.4
[29] iterators_1.0.13 foreach_1.5.1
[31] Biostrings_2.60.1 XVector_0.32.0
[33] SummarizedExperiment_1.22.0 Biobase_2.52.0
[35] MatrixGenerics_1.4.2 matrixStats_0.60.0
[37] GenomicRanges_1.44.0 GenomeInfoDb_1.28.0
[39] IRanges_2.26.0 limma_3.48.0
[41] S4Vectors_0.30.0 BiocGenerics_0.38.0
loaded via a namespace (and not attached):
[1] utf8_1.2.1 R.utils_2.10.1 tidyselect_1.1.1 RSQLite_2.2.7
[5] AnnotationDbi_1.54.1 htmlwidgets_1.5.3 BiocParallel_1.26.0 munsell_0.5.0
[9] codetools_0.2-18 preprocessCore_1.54.0 statmod_1.4.36 colorspace_2.0-1
[13] filelock_1.0.2 rstudioapi_0.13 GenomeInfoDbData_1.2.6 bit64_4.0.5
[17] rhdf5_2.36.0 vctrs_0.3.8 generics_0.1.0 xfun_0.24
[21] biovizBase_1.40.0 R6_2.5.0 illuminaio_0.34.0 AnnotationFilter_1.16.0
[25] bitops_1.0-7 rhdf5filters_1.4.0 cachem_1.0.5 reshape_0.8.8
[29] DelayedArray_0.18.0 assertthat_0.2.1 promises_1.2.0.1 BiocIO_1.2.0
[33] scales_1.1.1 bsseq_1.28.0 nnet_7.3-16 gtable_0.3.0
[37] ensembldb_2.16.4 rlang_0.4.11 splines_4.1.0 rtracklayer_1.52.0
[41] lazyeval_0.2.2 DSS_2.40.0 GEOquery_2.60.0 dichromat_2.0-1
[45] checkmate_2.0.0 reshape2_1.4.4 BiocManager_1.30.16 yaml_2.2.1
[49] GenomicFeatures_1.44.0 backports_1.2.1 httpuv_1.6.1 Hmisc_4.5-0
[53] tools_4.1.0 nor1mix_1.3-0 ggplot2_3.3.4 ellipsis_0.3.2
[57] siggenes_1.66.0 Rcpp_1.0.6 plyr_1.8.6 base64enc_0.1-3
[61] sparseMatrixStats_1.4.2 progress_1.2.2 zlibbioc_1.38.0 purrr_0.3.4
[65] RCurl_1.98-1.3 prettyunits_1.1.1 rpart_4.1-15 openssl_1.4.4
[69] cluster_2.1.2 magrittr_2.0.1 data.table_1.14.0 ProtGenerics_1.24.0
[73] evaluate_0.14 mime_0.11 hms_1.1.0 xtable_1.8-6
[77] XML_3.99-0.6 jpeg_0.1-8.1 mclust_5.4.7 gridExtra_2.3
[81] compiler_4.1.0 biomaRt_2.48.1 tibble_3.1.2 KernSmooth_2.23-20
[85] crayon_1.4.1 R.oo_1.24.0 htmltools_0.5.1.1 later_1.2.0
[89] Formula_1.2-4 tidyr_1.1.3 DBI_1.1.1 MASS_7.3-54
[93] rappdirs_0.3.3 Matrix_1.4-0 readr_1.4.0 cli_2.5.0
[97] permute_0.9-5 R.methodsS3_1.8.1 pkgconfig_2.0.3 GenomicAlignments_1.28.0
[101] foreign_0.8-81 xml2_1.3.2 annotate_1.70.0 rngtools_1.5
[105] multtest_2.48.0 beanplot_1.2 doRNG_1.8.2 scrime_1.3.5
[109] VariantAnnotation_1.38.0 digest_0.6.27 rmarkdown_2.9 base64_2.0
[113] htmlTable_2.2.1 edgeR_3.34.0 DelayedMatrixStats_1.14.2 restfulr_0.0.13
[117] curl_4.3.2 shiny_1.6.0 Rsamtools_2.8.0 gtools_3.9.2
[121] rjson_0.2.20 lifecycle_1.0.0 echarts4r_0.4.1 Rhdf5lib_1.14.2
[125] askpass_1.1 BSgenome_1.60.0 fansi_0.5.0 pillar_1.6.1
[129] GGally_2.1.2 lattice_0.20-44 KEGGREST_1.32.0 fastmap_1.1.0
[133] httr_1.4.2 survival_3.2-11 interactiveDisplayBase_1.30.0 glue_1.4.2
[137] png_0.1-7 BiocVersion_3.13.1 bit_4.0.4 stringi_1.6.2
[141] HDF5Array_1.20.0 blob_1.2.1 org.Hs.eg.db_3.13.0 caTools_1.18.2
[145] latticeExtra_0.6-30 memoise_2.0.0 dplyr_1.0.7