Entering edit mode
I'm doing an RNA seq analysis and I'm trying to show my results using a volcano plot on R studio. I used the the follow script to make my volcano plot on R studio:
# Import count table
countdata <- read.table("family_13_01_RNA-seq.counts_fixed.txt", header=TRUE, row.names=1)
# Convert to matrix
countdata <- as.matrix(countdata)
head(countdata)
# Assign condition (first four are controls, second four and third four contain two different experiments)
condition <- factor(c("unaffected","unaffected","unaffected","affected","affected","affected"),levels=c("unaffected","affected"))
subject <- factor(c("1","2","3","3","2","1"))
library(DESeq2)
# Create a coldata frame and instantiate the DESeqDataSet
coldata <- data.frame(row.names=colnames(countdata), subject, condition)
dds <- DESeqDataSetFromMatrix(countData=countdata, colData=coldata, design=~ subject + condition)
dds
#pre-filtering to keep only rows that have at least 1 reads total
keep <- rowSums(counts(dds)) > 1
dds <- dds[keep,]
# Run the DESeq
dds <- DESeq(dds)
# Regularized log transformation for clustering/heatmaps
rld <- rlogTransformation(dds)
head(assay(rld))
hist(assay(rld))
# Colors for plots below
library(RColorBrewer)
(mycols <- brewer.pal(8, "Dark2")[1:length(unique(condition))])
# Sample distance heatmap
sampleDists <- as.matrix(dist(t(assay(rld))))
library(gplots)
png("qc-heatmap_baker.png", w=1000, h=1000, pointsize=20)
heatmap.2(as.matrix(sampleDists), key=F, trace="none",
col=colorpanel(100, "black", "white"),
ColSideColors=mycols[condition], RowSideColors=mycols[condition],
margin=c(10, 10), main="Sample Distance Matrix")
dev.off()
# Principal components analysis
rld_pca <- function (rld, intgroup = "condition", ntop = 500, colors=NULL, legendpos="bottomleft", main="PCA Biplot", textcx=1, ...) {
require(genefilter)
require(calibrate)
require(RColorBrewer)
rv = rowVars(assay(rld))
select = order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))]
pca = prcomp(t(assay(rld)[select, ]))
fac = factor(apply(as.data.frame(colData(rld)[, intgroup, drop = FALSE]), 1, paste, collapse = " : "))
if (is.null(colors)) {
if (nlevels(fac) >= 3) {
colors = brewer.pal(nlevels(fac), "Paired")
} else {
colors = c("black", "red")
}
}
pc1var <- round(summary(pca)$importance[2,1]*100, digits=1)
pc2var <- round(summary(pca)$importance[2,2]*100, digits=1)
pc1lab <- paste0("PC1 (",as.character(pc1var),"%)")
pc2lab <- paste0("PC2 (",as.character(pc2var),"%)")
plot(PC2~PC1, data=as.data.frame(pca$x), bg=colors[fac], pch=21, xlab=pc1lab, ylab=pc2lab, main=main, ...)
with(as.data.frame(pca$x), textxy(PC1, PC2, labs=rownames(as.data.frame(pca$x)), cex=textcx))
legend(legendpos, legend=levels(fac), col=colors, pch=20)
png("qc-pca.png", 1000, 1000, pointsize=20)
rld_pca(rld, colors=mycols, intgroup="condition", xlim=c(-20, 20))
dev.off()
# Get differential expression results
res <- results(dds)
table(res$padj<0.05)
## Order by adjusted p-value
res <- res[order(res$padj), ]
## Merge with normalized count data
resdata <- merge(as.data.frame(res), as.data.frame(counts(dds, normalized=TRUE)), by="row.names", sort=FALSE)
names(resdata)[1] <- "Gene"
head(resdata)
#get significant results (FDR<0.05)
## Write results
write.csv(resdata, file="sig_diffexpr-results.csv")
## Volcano plot with significant DE genes
volcanoplot <- function (res, lfcthresh=2, sigthresh=0.05, main="Volcano Plot", legendpos="bottomright", labelsig=TRUE, textcx=1, ...) {
with(res, plot(log2FoldChange, -log10(padj), pch=20, main=main, ...))
with(subset(res, padj<sigthresh ), points(log2FoldChange, -log10(padj), pch=20, col="red", ...))
with(subset(res, abs(log2FoldChange)>lfcthresh), points(log2FoldChange, -log10(padj), pch=20, col="orange", ...))
with(subset(res, padj<sigthresh & abs(log2FoldChange)>lfcthresh), points(log2FoldChange, -log10(padj), pch=20, col="green", ...))
if (labelsig) {
require(calibrate)
with(subset(res, padj<sigthresh & abs(log2FoldChange)>lfcthresh), textxy(log2FoldChange, -log10(padj), labs=Gene, cex=textcx, ...))
}
legend(legendpos, xjust=1, yjust=1, legend=c(paste("FDR<",sigthresh,sep=""), paste("|LogFC|>",lfcthresh,sep=""), "Both"), pch=20, col=c("red","orange","green"))
}
png("diffexpr-volcanoplot.png", 1200, 1000, pointsize=20)
volcanoplot(resdata, lfcthresh=1, sigthresh=0.05, textcx=.8, xlim=c(-3, 3))
dev.off()
I am novice R studio user , the issue I'm having is that the gene name labels displayed on my Volcano plot overlap, making them unreadable how can I prevent this overlap of the gene labels?
This isn’t a question about how to use DESeq2 functions so I removed the tag.
Cross-posted https://www.biostars.org/p/396170/