I am analyzing a subset of TCGA RNA-seq data. My sample set includes 125 tumors samples (57 normal tissue + 68 matched primary tumors). Upon running DESEq2 commands I continue to get upwards of 18k genes that are differentially expressed (based on the padj of less than 0.05). I am wondering if I missing a pre-processing step or some step that may remove FPs. I expected many less, maybe in around a few thousand. In other cancers in which my workflow has been used the DEGs don't seem to match up with the literature. I'm concerned that I am doing something wrong.
dds <- DESeqDataSetFromMatrix(countData=cts, colData=colData, design= ~ patient + condition) # Pre-filtering: pre-filtering low count genes before running the DESeq2 function keep <- rowSums(counts(dds) >= 10) >= 5 dds <- dds[keep,] # To make sure DESeq2 chooses the correct factor as your reference/control you must use the factor or relevel command. You can also tell the results function to which comparison to make. dds$condition <- relevel(dds$condition, ref=’NT’) dds <- DESeq(dds, fitType='local') * Initial results object & saving that object to a csv file res <- results(dds, contrast=c(‘condition’, ‘MPT’, ‘NT’)) write.csv(res, file='DESeqR.csv') resOrdered <- res[order(res$padj),] alpha <- 0.05 res05 <- results(dds, contrast=c('condition', 'MPT', 'NT'), alpha=alpha) sum(res$padj < alpha, na.rm=TRUE) summary(res05) write.csv(as.data.frame(res), file='DESeqResults.csv') write.csv(as.data.frame(res05), file='DESeqResults05padj.csv')
R version 3.5.0 (2018-04-23)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.6
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
loaded via a namespace (and not attached):
[1] Biobase_2.40.0 bit64_0.9-7
[3] splines_3.5.0 Formula_1.2-3
[5] assertthat_0.2.0 stats4_3.5.0
[7] latticeExtra_0.6-28 blob_1.1.1
[9] GenomeInfoDbData_1.1.0 pillar_1.3.0
[11] RSQLite_2.1.1 backports_1.1.2
[13] lattice_0.20-38 glue_1.3.0
[15] digest_0.6.18 GenomicRanges_1.32.7
[17] RColorBrewer_1.1-2 XVector_0.20.0
[19] checkmate_1.8.5 colorspace_1.3-2
[21] htmltools_0.3.6 Matrix_1.2-15
[23] plyr_1.8.4 DESeq2_1.20.0
[25] XML_3.98-1.16 pkgconfig_2.0.2
[27] genefilter_1.62.0 zlibbioc_1.26.0
[29] purrr_0.2.5 xtable_1.8-3
[31] scales_1.0.0 BiocParallel_1.14.2
[33] htmlTable_1.12 tibble_1.4.2
[35] annotate_1.58.0 IRanges_2.14.12
[37] ggplot2_3.1.0 SummarizedExperiment_1.10.1
[39] nnet_7.3-12 BiocGenerics_0.26.0
[41] lazyeval_0.2.1 survival_2.43-1
[43] magrittr_1.5 crayon_1.3.4
[45] memoise_1.1.0 foreign_0.8-71
[47] tools_3.5.0 data.table_1.11.8
[49] matrixStats_0.54.0 stringr_1.3.1
[51] S4Vectors_0.18.3 locfit_1.5-9.1
[53] munsell_0.5.0 cluster_2.0.7-1
[55] DelayedArray_0.6.6 AnnotationDbi_1.42.1
[57] bindrcpp_0.2.2 compiler_3.5.0
[59] GenomeInfoDb_1.16.0 rlang_0.3.0.1
[61] grid_3.5.0 RCurl_1.95-4.11
[63] rstudioapi_0.8 htmlwidgets_1.3
[65] bitops_1.0-6 base64enc_0.1-3
[67] gtable_0.2.0 DBI_1.0.0
[69] R6_2.3.0 gridExtra_2.3
[71] knitr_1.20 dplyr_0.7.7
[73] bit_1.1-14 bindr_0.1.1
[75] Hmisc_4.1-1 stringi_1.2.4
[77] parallel_3.5.0 Rcpp_0.12.19
[79] geneplotter_1.58.0 rpart_4.1-13
[81] acepack_1.4.1 tidyselect_0.2.5
as.data.frame(colData(dds))
condition patient
MPT_0ea510ed.4e24.4c5b.ab49.cfef85aedeab_gdc_realn_rehead.bam MPT TCGA.38.4626
MPT_10e0e7f3.3fc1.43f7.b7fc.7bb6a375d060_gdc_realn_rehead.bam MPT TCGA.49.6761
MPT_1132c05d.00f1.4c3b.a0ed.06a735c82401_gdc_realn_rehead.bam MPT TCGA.55.6986
MPT_1228fb38.e5a9.4521.ad83.a67977908527_gdc_realn_rehead.bam MPT TCGA.49.4490
MPT_233446a9.2cbf.4395.9fb4.0660276d9885_gdc_realn_rehead.bam MPT TCGA.44.6146
MPT_269f6d2a.87dc.4fd8.af61.2ab4e6e06728_gdc_realn_rehead.bam MPT TCGA.55.6981
MPT_284cdeba.e8fb.4e5d.960a.dc45815fd3bd_gdc_realn_rehead.bam MPT TCGA.55.6972
MPT_2950c941.f93f.4948.97f2.74cf6d5161de_gdc_realn_rehead.bam MPT TCGA.55.6971
MPT_298f6a56.5a1d.4b35.bb8a.e670ea6a0ee9_gdc_realn_rehead.bam MPT TCGA.49.4512
MPT_2adbf2eb.6c1c.4624.91ce.86765f2219c5_gdc_realn_rehead.bam MPT TCGA.91.6828
MPT_2babb909.4d6a.4873.9010.0481cd9d7f67_gdc_realn_rehead.bam MPT TCGA.44.6777
MPT_2c95ea97.62f0.4cdf.b5c2.e52f1dba1df7_gdc_realn_rehead.bam MPT TCGA.73.4676
MPT_2d5e614b.ab16.4ff9.bb9a.0c1004890a77_gdc_realn_rehead.bam MPT TCGA.55.6968
MPT_3efc7114.c5a2.4bb6.b6bf.02fafc3573f6_gdc_realn_rehead.bam MPT TCGA.44.2665
MPT_3f53a71c.e0a7.442f.92da.4e74f5c84581_gdc_realn_rehead.bam MPT TCGA.91.6829
MPT_41481131.9b42.47bb.9f05.879007ad423c_gdc_realn_rehead.bam MPT TCGA.55.6970
MPT_4585dd6a.7829.4919.8b36.25d94983144c_gdc_realn_rehead.bam MPT TCGA.44.6146
MPT_495752ac.1048.48c3.9791.623dbd116a1a_gdc_realn_rehead.bam MPT TCGA.50.6595
MPT_4d8bf6c2.5bfa.4ac3.945b.52522a7a0cf9_gdc_realn_rehead.bam MPT TCGA.44.2668
MPT_5735edb0.5df0.4425.b5db.614e94f1e2db_gdc_realn_rehead.bam MPT TCGA.50.5933
MPT_5a66bb8d.7df6.4655.806d.1451370d27a9_gdc_realn_rehead.bam MPT TCGA.91.6847
MPT_5ace35cc.50a9.49ec.84fe.dc6476b51b70_gdc_realn_rehead.bam MPT TCGA.44.2657
MPT_61274c16.dddd.4df9.815a.52bc8fdc2462_gdc_realn_rehead.bam MPT TCGA.44.2665
MPT_64e21887.298b.4768.8946.cf8fbfae611c_gdc_realn_rehead.bam MPT TCGA.50.5936
MPT_67dd9f18.b0ba.48ef.a01c.cefe2cee9263_gdc_realn_rehead.bam MPT TCGA.44.2661
MPT_6a30bada.db34.45d8.af22.2a185cfc120b_gdc_realn_rehead.bam MPT TCGA.44.2662
MPT_6cb90565.9ca5.48ee.a258.40ea601c7380_gdc_realn_rehead.bam MPT TCGA.44.6147
MPT_6e3dbbf4.ea7c.438f.a944.55adb7c291d5_gdc_realn_rehead.bam MPT TCGA.55.6983
MPT_6fc2c0be.53ca.418f.af9b.0a3a20640519_gdc_realn_rehead.bam MPT TCGA.44.6147
MPT_74be9457.39ce.4029.9076.53b0e57d2067_gdc_realn_rehead.bam MPT TCGA.44.2662
MPT_7505607d.48c9.43ab.bce8.283ff7d6ce6c_gdc_realn_rehead.bam MPT TCGA.55.6979
MPT_76a97a0e.a66b.4439.9b16.ea841f882758_gdc_realn_rehead.bam MPT TCGA.55.6982
MPT_79c5c92d.4f9b.4495.b9ce.957581959297_gdc_realn_rehead.bam MPT TCGA.44.6778
MPT_80c84645.5441.4a13.9b44.c5d6b4909d93_gdc_realn_rehead.bam MPT TCGA.44.6145
MPT_8a036b15.e79e.4e26.8700.cc6ff96ae01f_gdc_realn_rehead.bam MPT TCGA.38.4627
MPT_8f2741bc.e876.4acb.ab86.b90e88330644_gdc_realn_rehead.bam MPT TCGA.50.5939
MPT_92d91508.7894.4d4d.b51e.748c367835ec_gdc_realn_rehead.bam MPT TCGA.55.6985
MPT_97213ae3.3d55.4d5c.9ab7.aaedb58adf91_gdc_realn_rehead.bam MPT TCGA.44.3398
MPT_97ccb4e0.a0b0.4541.88f9.af010324ab7e_gdc_realn_rehead.bam MPT TCGA.91.6849
MPT_9b96860f.7717.46b3.8ab7.2f7d9796c1d4_gdc_realn_rehead.bam MPT TCGA.44.2668
MPT_9f809399.b8bf.4a21.a703.14464e6594eb_gdc_realn_rehead.bam MPT TCGA.44.5645
MPT_a53b56fb.04c3.402c.9236.0bdbd80da2b8_gdc_realn_rehead.bam MPT TCGA.91.6835
MPT_a5e9d1ee.2e45.4f48.b50d.11f118fc25a2_gdc_realn_rehead.bam MPT TCGA.50.5935
MPT_aa44f578.7400.4ef8.b1b0.8c369bfda870_gdc_realn_rehead.bam MPT TCGA.44.2655
MPT_ac545a99.b3ab.4917.836b.031903d738d3_gdc_realn_rehead.bam MPT TCGA.44.6148
MPT_af2719bb.3eb8.4d96.add9.3f44ca91e6ae_gdc_realn_rehead.bam MPT TCGA.44.5645
MPT_af288cac.29cd.4451.8d3f.6a32fc3e6ab5_gdc_realn_rehead.bam MPT TCGA.91.6836
MPT_b79a9278.cdfb.429b.8439.f78b61b2a463_gdc_realn_rehead.bam MPT TCGA.55.6984
MPT_ba3418e3.a846.47e1.8eb4.e71f17277dad_gdc_realn_rehead.bam MPT TCGA.50.5930
MPT_bcc8e80a.c06c.4a1a.b308.9957fc3ca5d4_gdc_realn_rehead.bam MPT TCGA.50.5932
MPT_c0994e02.1842.4736.a010.78275693e2ab_gdc_realn_rehead.bam MPT TCGA.44.2668
MPT_c6173474.4555.47b3.8e79.08b77529d305_gdc_realn_rehead.bam MPT TCGA.49.6743
MPT_cc3ad632.c9d1.4d39.a88e.d96e16546502_gdc_realn_rehead.bam MPT TCGA.38.4632
MPT_d04d83a4.fbe1.4b6a.a48d.2950394a67bb_gdc_realn_rehead.bam MPT TCGA.44.6146
MPT_d3467666.fc2e.41f7.95d2.215c7e36c715_gdc_realn_rehead.bam MPT TCGA.49.6745
MPT_d42a6fe0.8988.43ec.93fb.eec85f3dfd62_gdc_realn_rehead.bam MPT TCGA.38.4625
MPT_d8183508.e323.4a1f.ac65.4b89c848247f_gdc_realn_rehead.bam MPT TCGA.55.6975
MPT_d908f378.a7f0.4f06.8e44.1da223489b11_gdc_realn_rehead.bam MPT TCGA.49.6744
MPT_de162e91.f450.4632.8779.d24839cd7769_gdc_realn_rehead.bam MPT TCGA.55.6980
MPT_de3be65f.e5f2.4cd1.8617.c8ca750d26c6_gdc_realn_rehead.bam MPT TCGA.55.6978
MPT_e5a46b37.23a4.4314.bf69.0bc355587db8_gdc_realn_rehead.bam MPT TCGA.44.6776
MPT_e61b8a2e.0004.4dec.ac69.8a6f1bd91267_gdc_realn_rehead.bam MPT TCGA.50.5931
MPT_e66bf758.e262.41f7.8493.7b0e1816fd7d_gdc_realn_rehead.bam MPT TCGA.44.5645
A portion of the size factor portion of the colData object
as.data.frame(colData(dds))
sizeFactor
MPT_5735edb0.5df0.4425.b5db.614e94f1e2db_gdc_realn_rehead.bam 1.0522057
MPT_5a66bb8d.7df6.4655.806d.1451370d27a9_gdc_realn_rehead.bam 1.8495910
MPT_5ace35cc.50a9.49ec.84fe.dc6476b51b70_gdc_realn_rehead.bam 1.0372878
MPT_61274c16.dddd.4df9.815a.52bc8fdc2462_gdc_realn_rehead.bam 0.2474134
MPT_64e21887.298b.4768.8946.cf8fbfae611c_gdc_realn_rehead.bam 0.7443850
MPT_67dd9f18.b0ba.48ef.a01c.cefe2cee9263_gdc_realn_rehead.bam 1.4421486
MPT_6a30bada.db34.45d8.af22.2a185cfc120b_gdc_realn_rehead.bam 0.5531774
MPT_6cb90565.9ca5.48ee.a258.40ea601c7380_gdc_realn_rehead.bam 1.2025101
MPT_6e3dbbf4.ea7c.438f.a944.55adb7c291d5_gdc_realn_rehead.bam 1.9280516
MPT_6fc2c0be.53ca.418f.af9b.0a3a20640519_gdc_realn_rehead.bam 0.6001112
MPT_74be9457.39ce.4029.9076.53b0e57d2067_gdc_realn_rehead.bam 1.8760973
MPT_7505607d.48c9.43ab.bce8.283ff7d6ce6c_gdc_realn_rehead.bam 0.8970892
MPT_76a97a0e.a66b.4439.9b16.ea841f882758_gdc_realn_rehead.bam 0.7090317
MPT_79c5c92d.4f9b.4495.b9ce.957581959297_gdc_realn_rehead.bam 1.0315368
MPT_80c84645.5441.4a13.9b44.c5d6b4909d93_gdc_realn_rehead.bam 0.9038426
MPT_8a036b15.e79e.4e26.8700.cc6ff96ae01f_gdc_realn_rehead.bam 1.5952787
MPT_8f2741bc.e876.4acb.ab86.b90e88330644_gdc_realn_rehead.bam 0.7876509
MPT_92d91508.7894.4d4d.b51e.748c367835ec_gdc_realn_rehead.bam 0.8891323
MPT_97213ae3.3d55.4d5c.9ab7.aaedb58adf91_gdc_realn_rehead.bam 1.6099384
MPT_97ccb4e0.a0b0.4541.88f9.af010324ab7e_gdc_realn_rehead.bam 0.6056940
MPT_9b96860f.7717.46b3.8ab7.2f7d9796c1d4_gdc_realn_rehead.bam 0.7568102
MPT_9f809399.b8bf.4a21.a703.14464e6594eb_gdc_realn_rehead.bam 1.1089792
MPT_a53b56fb.04c3.402c.9236.0bdbd80da2b8_gdc_realn_rehead.bam 1.4137859
MPT_a5e9d1ee.2e45.4f48.b50d.11f118fc25a2_gdc_realn_rehead.bam 0.9357544
MPT_aa44f578.7400.4ef8.b1b0.8c369bfda870_gdc_realn_rehead.bam 2.2851588
MPT_ac545a99.b3ab.4917.836b.031903d738d3_gdc_realn_rehead.bam 1.2264147
MPT_af2719bb.3eb8.4d96.add9.3f44ca91e6ae_gdc_realn_rehead.bam 0.8615840
NT_38d095ea.9002.42a5.a556.1c751b0405e9_gdc_realn_rehead.bam 1.6164258
NT_3f574e31.db95.4199.826c.c2f5b1e4332b_gdc_realn_rehead.bam 0.9192779
NT_426159a8.d17a.4770.91ac.578ecdf46b8d_gdc_realn_rehead.bam 1.3265046
NT_4340a3f4.4b56.432a.949a.655b5f74c2f7_gdc_realn_rehead.bam 0.8355257
NT_4f4a304b.0e65.467e.b07e.155c528ceb15_gdc_realn_rehead.bam 1.7333935
NT_53d5d566.f294.448b.ac48.6ffa2ba333b0_gdc_realn_rehead.bam 0.8559585
NT_56885fdf.ae09.4478.be99.81bd282715eb_gdc_realn_rehead.bam 1.3766366
NT_5b8d2548.b32e.4abb.9244.a618b1b2852e_gdc_realn_rehead.bam 1.6451766
NT_67e0bb40.5006.4c03.839a.8bf3f9383b71_gdc_realn_rehead.bam 0.7742518
NT_692d70a3.ee5a.4fb5.a99f.17cd77bb5274_gdc_realn_rehead.bam 0.7328732
NT_6b81ca60.c91e.447d.8658.d3797422e4c7_gdc_realn_rehead.bam 1.0435036
NT_6c329a5f.3866.4cc1.9d5e.a43413d188b5_gdc_realn_rehead.bam 1.0036603
NT_71f1c38b.f8a7.444c.bd95.4ba50b20b658_gdc_realn_rehead.bam 0.8864363
NT_755cf943.64da.48f7.9c92.ebda57d9253a_gdc_realn_rehead.bam 1.0532410
NT_766f3ef2.d1a3.46a8.9881.966e8e2b9b02_gdc_realn_rehead.bam 1.5987272
NT_7bf2426a.6731.4065.a201.8bce925a16f6_gdc_realn_rehead.bam 0.9075476
NT_7f274fba.b331.4f6d.b257.66767f383ca8_gdc_realn_rehead.bam 0.8162466
NT_821336cd.4fc5.4050.8e19.92549c352dce_gdc_realn_rehead.bam 0.9278203
NT_841873e1.5f4c.4021.945a.59dd15d96f87_gdc_realn_rehead.bam 0.7755288
NT_8442e31c.0e0a.4106.8113.d8d648ea5139_gdc_realn_rehead.bam 0.9765713
NT_9233b447.8b45.4a8b.b500.1c40459d4c38_gdc_realn_rehead.bam 0.9063173
NT_932760c0.da82.4c5e.9ded.a16d66ab901f_gdc_realn_rehead.bam 0.8772111
NT_9f87923d.0659.4df8.9676.38aded7de3eb_gdc_realn_rehead.bam 0.7667327
NT_a4a5c7e9.c536.4e87.b337.eb5a003ba9fe_gdc_realn_rehead.bam 0.7813543
NT_a8d44728.8193.4fe6.adcc.fe792481fec7_gdc_realn_rehead.bam 1.5703533
NT_ada0db16.af3b.43ce.97e0.ffd75e7fcd66_gdc_realn_rehead.bam 1.2253505