Hello,
I have run into a very strange situation recently.
We have two replicates per sample, which are like this
cat sample_info.tbl
samples condition
SCN_1 CT17
SCN_2 30MIN
SCN_3 1HR
SCN_4 3HR
SCN_5 6HR
SCN_6 CT23
SCN_7 CT17
SCN_8 30MIN
SCN_9 1HR
SCN_10 3HR
SCN_11 6HR
SCN_12 CT23
When I run DESEq2 like this
### If you have already counted your reads. Then you don't need all the above counting steps. The above steps have given me odd results. That is why I used featurecounts program from Subread package and made my counts file.
## Set Working Directory
setwd("/media/A2C7-ACFD/")
getwd()
# load library for genomic annotations
library(GenomicFeatures)
library(GenomicRanges)
library(GenomicAlignments)
library(rtracklayer)
library(Rsamtools)
library(DESeq2)
#Read your pre-made raw count file
rawcountData <- read.table("Core-BAM-SCN-All-Samples-Raw-Read-Counts_RPKM-0.5.txt",header=TRUE,row.names=1,sep="\t")
#removing rows that are zero for all genes (edgeR and DESeq have trouble with these)
x <- rowSums(rawcountData==0)!=ncol(rawcountData)
filteredcountData <- rawcountData[x,]
write.table(filteredcountData,file="Core-BAM-SCN-All-Samples-Raw-Read-Counts_RPKM-0.5-Filtered.txt",sep="\t")
## Starting DESeq Analysis
#Make this file before starting the analysis
colData <- read.table("sample-info.tbl",header=TRUE,row.names=1,sep="\t")
#Make DESeq2 Object
dds <- DESeqDataSetFromMatrix(countData = filteredcountData,colData = colData,design = ~ condition)
#Info on the DESEq2 object
dds
head(counts(dds))
colData(dds)
##Writing Normalized read counts to a file for comparing/reference purposes.
dds_norm_estimatesize <- estimateSizeFactors(dds)
deseq_Ncounts <- as.data.frame(counts(dds_norm_estimatesize, normalized=TRUE))
write.table(deseq_Ncounts, "DESeq2-Normalized-Read-Counts.txt",sep="\t")
##DESeq calculation
dds <- DESeq(dds)
plotMA(dds,ylim=c(-2,2),main="SCN-Data-DESeq2-Approach")
dev.copy (png,"SCN-Deseq2-MA-Plot.png")
dev.off()
dev.off()
## contrast specifies conditions to be tested. Remove NA values and write to a file.
result_CT17_30 <- results(dds, contrast=c("condition","CT17","30MIN"))
pdf("CT17-30-min-pvalue-total.pdf")
hist(result_CT17_30$pvalue)
dev.off()
result_CT17_30 <- result_CT17_30[complete.cases(result_CT17_30),]
write.table(result_CT17_30, "CT17_vs_30MIN_DESEq2_output.txt")
resSig <- result_CT17_30[which(result_CT17_30$padj < 0.05),]
write.table(resSig, "CT17_vs_30MIN_DESEq2_significant-output.txt")
result_CT17_1HR <- results(dds, contrast=c("condition","CT17","1HR"))
pdf("CT17-1HR-pvalue-total.pdf")
hist(result_CT17_1HR$pvalue)
dev.off()
result_CT17_1HR <- result_CT17_1HR[complete.cases(result_CT17_1HR),]
write.table(result_CT17_1HR, "CT17_vs_1HR_DESEq2_output.txt")
resSig <- result_CT17_1HR[which(result_CT17_1HR$padj < 0.05),]
write.table(resSig, "CT17_vs_1HR_DESEq2_significant-output.txt")
result_CT17_3HR <- results(dds, contrast=c("condition","CT17","3HR"))
pdf("CT17-3HR-pvalue-total.pdf")
hist(result_CT17_3HR$pvalue)
dev.off()
result_CT17_3HR <- result_CT17_3HR[complete.cases(result_CT17_3HR),]
write.table(result_CT17_3HR, "CT17_vs_3HR_DESEq2_output.txt")
resSig <- result_CT17_3HR[which(result_CT17_3HR$padj < 0.05),]
write.table(resSig, "CT17_vs_3HR_DESEq2_significant-output.txt")
result_CT17_6HR <- results(dds, contrast=c("condition","CT17","6HR"))
pdf("CT17-6HR-pvalue-total.pdf")
hist(result_CT17_6HR$pvalue)
dev.off()
result_CT17_6HR <- result_CT17_6HR[complete.cases(result_CT17_6HR),]
write.table(result_CT17_6HR, "CT17_vs_6HR_DESEq2_output.txt")
resSig <- result_CT17_6HR[which(result_CT17_6HR$padj < 0.05),]
write.table(resSig, "CT17_vs_6HR_DESEq2_significant-output.txt")
result_CT17_CT23 <- results(dds, contrast=c("condition","CT17","CT23"))
pdf("CT17-CT23-pvalue-total.pdf")
hist(result_CT17_CT23$pvalue)
dev.off()
result_CT17_CT23 <- result_CT17_CT23[complete.cases(result_CT17_CT23),]
write.table(result_CT17_CT23, "CT17_vs_CT23_DESEq2_output.txt")
resSig <- result_CT17_CT23[which(result_CT17_CT23$padj < 0.05),]
write.table(resSig, "CT17_vs_CT23_DESEq2_significant-output.txt")
I see the same log2FC for any comparison of samples between CT17 and the other samples.
Here is a sample of my output
uc007aet.1 560.958423107228 560.958423107228 560.958423107228 560.958423107228
uc007aeu.1 338.733096047258 338.733096047258 338.733096047258 338.733096047258
uc007afd.3 243.757033577816 243.757033577816 243.757033577816 243.757033577816
uc007afe.3 207.343185897479 207.343185897479 207.343185897479 207.343185897479
uc007aff.3 442.671806596705 442.671806596705 442.671806596705 442.671806596705
uc007afg.1 307.904671077446 307.904671077446 307.904671077446 307.904671077446
uc007afh.1 878.044223134319 878.044223134319 878.044223134319 878.044223134319
uc007afi.2 2427.37033630472 2427.37033630472 2427.37033630472 2427.37033630472
uc007afk.2 433.170130774881 433.170130774881 433.170130774881 433.170130774881
uc007afl.2 431.589297925458 431.589297925458 431.589297925458 431.589297925458
Here is my sessionInfo()
R version 3.1.1 (2014-07-10)
Platform: x86_64-pc-linux-gnu (64-bit)
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] stats4 parallel stats graphics grDevices utils datasets
[8] methods base
other attached packages:
[1] DESeq2_1.6.3 RcppArmadillo_0.4.600.4.0
[3] Rcpp_0.11.4 rtracklayer_1.26.2
[5] GenomicAlignments_1.2.1 Rsamtools_1.18.2
[7] Biostrings_2.34.1 XVector_0.6.0
[9] GenomicFeatures_1.18.3 AnnotationDbi_1.28.1
[11] Biobase_2.26.0 GenomicRanges_1.18.4
[13] GenomeInfoDb_1.2.4 IRanges_2.0.1
[15] S4Vectors_0.4.0 BiocGenerics_0.12.1
loaded via a namespace (and not attached):
[1] acepack_1.3-3.3 annotate_1.44.0 base64enc_0.1-2
[4] BatchJobs_1.5 BBmisc_1.9 BiocParallel_1.0.3
[7] biomaRt_2.22.0 bitops_1.0-6 brew_1.0-6
[10] checkmate_1.5.1 cluster_2.0.1 codetools_0.2-10
[13] colorspace_1.2-4 DBI_0.3.1 digest_0.6.8
[16] fail_1.2 foreach_1.4.2 foreign_0.8-62
[19] Formula_1.2-0 genefilter_1.48.1 geneplotter_1.44.0
[22] ggplot2_1.0.0 grid_3.1.1 gtable_0.1.2
[25] Hmisc_3.15-0 iterators_1.0.7 lattice_0.20-29
[28] latticeExtra_0.6-26 locfit_1.5-9.1 MASS_7.3-37
[31] munsell_0.4.2 nnet_7.3-9 plyr_1.8.1
[34] proto_0.3-10 RColorBrewer_1.1-2 RCurl_1.95-4.5
[37] reshape2_1.4.1 rpart_4.1-9 RSQLite_1.0.0
[40] scales_0.2.4 sendmailR_1.2-1 splines_3.1.1
[43] stringr_0.6.2 survival_2.37-7 tools_3.1.1
[46] XML_3.98-1.1 xtable_1.7-4 zlibbioc_1.12.0
>
> proc.time()
user system elapsed
73.852 1.184 75.155