Deseq2 for RNAseq experiments without replicates
1
0
Entering edit mode
prp291 • 0
@prp291-9622
Last seen 7.0 years ago

I am trying to find out the differentially expressed gene in a RNAseq experiment without replicate. The first R script is inspired by this post on seqanswer  and other script I always use for my other experiments with replicates. When I run first script I got 800 genes while second script is giving me the more than 3000 genes. Can you tell me that which is script should I use?

  In first script I am getting the avgLogExpr and  rLogFC . Can you tell me please that which one should be considered. 

First script

# This R script will analyzed data when there is no replicate
# There script is inspired by this post
#http://seqanswers.com/forums/showthread.php?t=31036

#install.packages("DESeq2")
## try http:// if https:// URLs are not supported
#source("https://bioconductor.org/biocLite.R")
#biocLite("DESeq2")

# Set working directory
setwd("/Users/sanjay/Desktop/deseq");
getwd();


# Counts-Matrix
raw_count <- read.delim("count.txt", header=TRUE, row.names=1)
countdata1 <- round(raw_count)

#write.csv(countdata1, "final_filtered_data - Copy - Copy.csv")
head(countdata1)


# Remove all gene which has 0 value in all sample
all <- apply(countdata1, 1, function(x) all(x==0) )
newdata <- countdata1[!all,]
write.csv(newdata, file = "count_0_filter.csv")
head (newdata)

# remove uninformative genes keep only genes that are expressed in at least 200 count in 20 samples
dat <- newdata[rowSums(newdata > 20) >= 1,]

write.csv(dat, file = "rpkm.csv_0_filter_atleat_200_count.csv")
head (dat)

final_count <- read.csv("rpkm.csv_0_filter_atleat_200_count.csv", header=TRUE, row.names=1)

library(DESeq2)

# Convert to matrix
countdata <- as.matrix(final_count)
head(countdata)
(condition <- factor(c("control", "1h")))


(condition <- factor(c("control", "1h")))
# Create a coldata frame and instantiate the DESeqDataSet. See ?DESeqDataSetFromMatrix
(coldata <- data.frame(row.names=colnames(countdata), condition))

dds <- DESeqDataSetFromMatrix(countData=countdata, colData=coldata, design=~condition)
dds
#set control as reference level
dds$condition <- relevel(dds$condition, "control")
dds <- DESeq(dds)


# Regularized log transformation for clustering/heatmaps, etc
rld <- rlogTransformation(dds)
head(assay(rld))
hist(assay(rld))
write.csv(resdata, file="rld.csv")


res <- data.frame(
   assay(rld), 
   avgLogExpr = ( assay(rld)[,2] + assay(rld)[,1] ) / 2,
   rLogFC = assay(rld)[,2] - assay(rld)[,1] )
   
    
   head( res[ order(res$rLogFC), ] )
   write.csv(res, file="DEG.xlsx")

 

 

 

Second script

# This R script will analyzed data when there is no replicate

#install.packages("DESeq2")
## try http:// if https:// URLs are not supported
#source("https://bioconductor.org/biocLite.R")
#biocLite("DESeq2")

# Set working directory
#setwd("/Users/Sanjay/Desktop/mirna");
#getwd();

 

# Set working directory
setwd("/Users/Sanjay/Desktop/deseq");
getwd();


# Counts-Matrix
raw_count <- read.delim("count.txt", header=TRUE, row.names=1)
countdata1 <- round(raw_count)
head(countdata1)


# Remove all gene which has 0 value in all sample
all <- apply(countdata1, 1, function(x) all(x==0) )
newdata <- countdata1[!all,]
write.csv(newdata, file = "count_0_filter.csv")
head (newdata)

# remove uninformative genes keep only genes that are expressed in at least 50 count in 2 samples
dat <- newdata[rowSums(newdata > 50) >= 2,]
write.csv(dat, file = "rpkm.csv_0_filter_atleat_50_count.csv")
head (dat)

final_count <- read.csv("rpkm.csv_0_filter_atleat_50_count.csv", header=TRUE, row.names=1)

library(DESeq2)

# Convert to matrix
countdata <- as.matrix(final_count)
head(countdata)
(condition <- factor(c("NJ8", "NJ9")))


library(DESeq2)

# Create a coldata frame and instantiate the DESeqDataSet. See ?DESeqDataSetFromMatrix
(coldata <- data.frame(row.names=colnames(countdata), condition))

 

dds <- DESeqDataSetFromMatrix(countData=countdata, colData=coldata, design=~condition)
dds

#set NJ8 as reference level
dds$condition <- relevel(dds$condition, "NJ8")
dds <- DESeq(dds)
DESeq(dds)

# Regularized log transformation for clustering/heatmaps, etc
rld <- rlogTransformation(dds)
head(assay(rld))
hist(assay(rld))
write.csv(resdata, file="rld.csv")

 

# Get differential expression results
res <- results(dds)
## Order by adjusted fold change
res <- res[order(res$log2FoldChange), ]
## Merge with normalized count data
resdata <- merge(as.data.frame(res), as.data.frame(counts(dds, normalized=TRUE)), by="row.names", sort=FALSE)
names(resdata)[1] <- "Gene"
head(resdata)
## Write results
write.csv(resdata, file="diffexpr-results.csv")


## filter result by log2foldchange i.e. set to 1
dat.log2FC.twofold = res[abs(res$log2FoldChange) >= 1.0,]
write.csv(dat.log2FC.twofold, file="diffexpr-results_filter.csv")


## Volcano plot with "significant" genes labeled
volcanoplot <- function (res, lfcthresh=2, sigthresh=0.05, main="Volcano Plot", legendpos="bottomright", labelsig=TRUE, textcx=1, ...) {
  with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main=main, ...))
  with(subset(res, padj<sigthresh ), points(log2FoldChange, -log10(pvalue), pch=20, col="red", ...))
  with(subset(res, abs(log2FoldChange)>lfcthresh), points(log2FoldChange, -log10(pvalue), pch=20, col="orange", ...))
  with(subset(res, padj<sigthresh & abs(log2FoldChange)>lfcthresh), points(log2FoldChange, -log10(pvalue), pch=20, col="green", ...))
  if (labelsig) {
    require(calibrate)
    with(subset(res, padj<sigthresh & abs(log2FoldChange)>lfcthresh), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=textcx, ...))
  }
  legend(legendpos, xjust=1, yjust=1, legend=c(paste("FDR<",sigthresh,sep=""), paste("|LogFC|>",lfcthresh,sep=""), "both"), pch=20, col=c("red","orange","green"))
}
png("diffexpr-volcanoplot.png", 1200, 1000, pointsize=20)
volcanoplot(resdata, lfcthresh=1, sigthresh=0.05, textcx=.8, xlim=c(-2.3, 2))
dev.off()

 

 

 

rnaseq deseq2 • 12k views
ADD COMMENT
1
Entering edit mode
@mikelove
Last seen 26 minutes ago
United States

For an experiment without replicates, you should just run DESeq() as normal.

I can't easily see the difference between the two scripts you posted. And make sure to read the section of the help file ?DESeq which starts "Experiments without replicates...". It's important to remember that this kind of analysis is solely for exploratory purposes, and that biological replicates are needed for a proper analysis.

ADD COMMENT

Login before adding your answer.

Traffic: 965 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6