Search
Question: Deseq2 for RNAseq experiments without replicates
0
gravatar for prp291
14 months ago by
prp2910
prp2910 wrote:

I am trying to find out the differentially expressed gene in a RNAseq experiment without replicate. The first R script is inspired by this post on seqanswer  and other script I always use for my other experiments with replicates. When I run first script I got 800 genes while second script is giving me the more than 3000 genes. Can you tell me that which is script should I use?

  In first script I am getting the avgLogExpr and  rLogFC . Can you tell me please that which one should be considered. 

First script

# This R script will analyzed data when there is no replicate
# There script is inspired by this post
#http://seqanswers.com/forums/showthread.php?t=31036

#install.packages("DESeq2")
## try http:// if https:// URLs are not supported
#source("https://bioconductor.org/biocLite.R")
#biocLite("DESeq2")

# Set working directory
setwd("/Users/sanjay/Desktop/deseq");
getwd();


# Counts-Matrix
raw_count <- read.delim("count.txt", header=TRUE, row.names=1)
countdata1 <- round(raw_count)

#write.csv(countdata1, "final_filtered_data - Copy - Copy.csv")
head(countdata1)


# Remove all gene which has 0 value in all sample
all <- apply(countdata1, 1, function(x) all(x==0) )
newdata <- countdata1[!all,]
write.csv(newdata, file = "count_0_filter.csv")
head (newdata)

# remove uninformative genes keep only genes that are expressed in at least 200 count in 20 samples
dat <- newdata[rowSums(newdata > 20) >= 1,]

write.csv(dat, file = "rpkm.csv_0_filter_atleat_200_count.csv")
head (dat)

final_count <- read.csv("rpkm.csv_0_filter_atleat_200_count.csv", header=TRUE, row.names=1)

library(DESeq2)

# Convert to matrix
countdata <- as.matrix(final_count)
head(countdata)
(condition <- factor(c("control", "1h")))


(condition <- factor(c("control", "1h")))
# Create a coldata frame and instantiate the DESeqDataSet. See ?DESeqDataSetFromMatrix
(coldata <- data.frame(row.names=colnames(countdata), condition))

dds <- DESeqDataSetFromMatrix(countData=countdata, colData=coldata, design=~condition)
dds
#set control as reference level
dds$condition <- relevel(dds$condition, "control")
dds <- DESeq(dds)


# Regularized log transformation for clustering/heatmaps, etc
rld <- rlogTransformation(dds)
head(assay(rld))
hist(assay(rld))
write.csv(resdata, file="rld.csv")


res <- data.frame(
   assay(rld), 
   avgLogExpr = ( assay(rld)[,2] + assay(rld)[,1] ) / 2,
   rLogFC = assay(rld)[,2] - assay(rld)[,1] )
   
    
   head( res[ order(res$rLogFC), ] )
   write.csv(res, file="DEG.xlsx")

 

 

 

Second script

# This R script will analyzed data when there is no replicate

#install.packages("DESeq2")
## try http:// if https:// URLs are not supported
#source("https://bioconductor.org/biocLite.R")
#biocLite("DESeq2")

# Set working directory
#setwd("/Users/Sanjay/Desktop/mirna");
#getwd();

 

# Set working directory
setwd("/Users/Sanjay/Desktop/deseq");
getwd();


# Counts-Matrix
raw_count <- read.delim("count.txt", header=TRUE, row.names=1)
countdata1 <- round(raw_count)
head(countdata1)


# Remove all gene which has 0 value in all sample
all <- apply(countdata1, 1, function(x) all(x==0) )
newdata <- countdata1[!all,]
write.csv(newdata, file = "count_0_filter.csv")
head (newdata)

# remove uninformative genes keep only genes that are expressed in at least 50 count in 2 samples
dat <- newdata[rowSums(newdata > 50) >= 2,]
write.csv(dat, file = "rpkm.csv_0_filter_atleat_50_count.csv")
head (dat)

final_count <- read.csv("rpkm.csv_0_filter_atleat_50_count.csv", header=TRUE, row.names=1)

library(DESeq2)

# Convert to matrix
countdata <- as.matrix(final_count)
head(countdata)
(condition <- factor(c("NJ8", "NJ9")))


library(DESeq2)

# Create a coldata frame and instantiate the DESeqDataSet. See ?DESeqDataSetFromMatrix
(coldata <- data.frame(row.names=colnames(countdata), condition))

 

dds <- DESeqDataSetFromMatrix(countData=countdata, colData=coldata, design=~condition)
dds

#set NJ8 as reference level
dds$condition <- relevel(dds$condition, "NJ8")
dds <- DESeq(dds)
DESeq(dds)

# Regularized log transformation for clustering/heatmaps, etc
rld <- rlogTransformation(dds)
head(assay(rld))
hist(assay(rld))
write.csv(resdata, file="rld.csv")

 

# Get differential expression results
res <- results(dds)
## Order by adjusted fold change
res <- res[order(res$log2FoldChange), ]
## Merge with normalized count data
resdata <- merge(as.data.frame(res), as.data.frame(counts(dds, normalized=TRUE)), by="row.names", sort=FALSE)
names(resdata)[1] <- "Gene"
head(resdata)
## Write results
write.csv(resdata, file="diffexpr-results.csv")


## filter result by log2foldchange i.e. set to 1
dat.log2FC.twofold = res[abs(res$log2FoldChange) >= 1.0,]
write.csv(dat.log2FC.twofold, file="diffexpr-results_filter.csv")


## Volcano plot with "significant" genes labeled
volcanoplot <- function (res, lfcthresh=2, sigthresh=0.05, main="Volcano Plot", legendpos="bottomright", labelsig=TRUE, textcx=1, ...) {
  with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main=main, ...))
  with(subset(res, padj<sigthresh ), points(log2FoldChange, -log10(pvalue), pch=20, col="red", ...))
  with(subset(res, abs(log2FoldChange)>lfcthresh), points(log2FoldChange, -log10(pvalue), pch=20, col="orange", ...))
  with(subset(res, padj<sigthresh & abs(log2FoldChange)>lfcthresh), points(log2FoldChange, -log10(pvalue), pch=20, col="green", ...))
  if (labelsig) {
    require(calibrate)
    with(subset(res, padj<sigthresh & abs(log2FoldChange)>lfcthresh), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=textcx, ...))
  }
  legend(legendpos, xjust=1, yjust=1, legend=c(paste("FDR<",sigthresh,sep=""), paste("|LogFC|>",lfcthresh,sep=""), "both"), pch=20, col=c("red","orange","green"))
}
png("diffexpr-volcanoplot.png", 1200, 1000, pointsize=20)
volcanoplot(resdata, lfcthresh=1, sigthresh=0.05, textcx=.8, xlim=c(-2.3, 2))
dev.off()

 

 

 

ADD COMMENTlink modified 14 months ago by Michael Love11k • written 14 months ago by prp2910
0
gravatar for Michael Love
14 months ago by
Michael Love11k
United States
Michael Love11k wrote:

For an experiment without replicates, you should just run DESeq() as normal.

I can't easily see the difference between the two scripts you posted. And make sure to read the section of the help file ?DESeq which starts "Experiments without replicates...". It's important to remember that this kind of analysis is solely for exploratory purposes, and that biological replicates are needed for a proper analysis.

ADD COMMENTlink written 14 months ago by Michael Love11k
Please log in to add an answer.

Help
Access

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.
Powered by Biostar version 2.2.0
Traffic: 120 users visited in the last hour