extract fold change of a list of gene from EdgeR?
1
0
Entering edit mode
najib • 0
@59ce8241
Last seen 7 weeks ago
South Korea

hello, I have a list of gene of a specific gene ontology and I would like to extract the fold change from EdgeR object based on the list of genes. how is it possible to be done? is it possible to use EdgeR for it? thank you for your help.

for calculation the different expressed gene, i used the following code and extracted the gene of GO i am focusing on:

#Load the required libraries
library(edgeR)
library(limma)
library(RColorBrewer)
library(mixOmics)
library(HTSFilter)
#Before starting modify the data you have by keeping only gene name and samples with replicates (chromosomes and gene length are not required)
#make csv file called (design.csv) contain information of files and condition in two columns to fit with the data matrix
#importing files
nrow(rawCountTable)
#Create a DGEList data object
dgeFull <- DGEList(rawCountTable, group=sampleInfo$condition) dgeFull #Data exploration and quality assessment pseudoCounts <- log2(dgeFull$counts+1) #Extract pseudo-counts (ie log2(K+1)log2a(K+1))
hist(pseudoCounts[,"PH6.rep.1"]) #Histogram for pseudo-counts (sample Cond.WT.Rep.1)
boxplot(pseudoCounts, col="gray", las=3) #Boxplot for pseudo-counts
#MA-plots between PH6 or PH7 samples
par(mfrow=c(1,2))
avalues <- (pseudoCounts[,1] + pseudoCounts[,2])/2
mvalues <- (pseudoCounts[,1] - pseudoCounts[,2])
plot(avalues, mvalues, xlab="A", ylab="M", pch=19, main="pH7")
abline(h=0, col="red")
avalues <- (pseudoCounts[,4] + pseudoCounts[,5])/2
mvalues <- (pseudoCounts[,4] - pseudoCounts[,5])
plot(avalues, mvalues, xlab="A", ylab="M", pch=19, main="pH6")
abline(h=0, col="red")
#MDS for pseudo-counts (using limma package)
plotMDS(pseudoCounts)
sampleDists <- as.matrix(dist(t(pseudoCounts)))
sampleDists
#heatmap for pseudo-counts (using mixOmics package)
cimColor <- colorRampPalette(rev(brewer.pal(9, "Blues")))(16)
cim(sampleDists, color=cimColor, symkey=FALSE)
#remove genes with zero counts for all samples
dgeFull <- DGEList(dgeFull$counts[apply(dgeFull$counts, 1, sum) != 0, ],group=dgeFull$samples$group)
head(dgeFull$counts) #estimate the normalization factors dgeFull <- calcNormFactors(dgeFull, method="TMM") dgeFull$samples
head(dgeFull$counts) #From the normalization factors and the original count table, find the normalized counts and use the log2log2-transformation to inspect them with boxplots and a MDS. Normalized counts can be extracted from dgeFull using the function cpm: eff.lib.size <- dgeFull$samples$lib.size*dgeFull$samples$norm.factors normCounts <- cpm(dgeFull) pseudoNormCounts <- log2(normCounts + 1) boxplot(pseudoNormCounts, col="gray", las=3) plotMDS(pseudoNormCounts) #estimate common and tagwise dispersion dgeFull <- estimateCommonDisp(dgeFull) dgeFull <- estimateTagwiseDisp(dgeFull) dgeFull #perform an exact test for the difference in expression between the two conditions ¡°WT¡± and ¡°Mt¡± dgeTest <- exactTest(dgeFull) dgeTest #Independant filtering filtData <- HTSFilter(dgeFull)$filteredData #remove low expressed genes
dgeTestFilt <- exactTest(filtData)
dgeTestFilt
#Diagnostic plot for multiple testing
hist(dgeTestFilt$table[,"PValue"], breaks=50) #plot an histogram of unadjusted p-values after filtering #Inspecting the results resNoFilt <- topTags(dgeTest, n=nrow(dgeTest$table)) #extract a summary of the differential expression statistics
resFilt <- topTags(dgeTestFilt, n=nrow(dgeTest$table)) head(resFilt) #compare the number of differentially expressed genes with and without filtering (risk: 1%) sum(resNoFilt$table$FDR < 0.05) #Before filtering sum(resFilt$table$FDR < 0.05) #After filtering #extract and sort differentially expressed genes sigDownReg <- resFilt$table[resFilt$table$FDR<0.05,]
sigDownReg <- sigDownReg[order(sigDownReg$logFC),] head(sigDownReg) sigUpReg <- sigDownReg[order(sigDownReg$logFC, decreasing=TRUE),]
#write the results in csv files
write.csv(sigDownReg, file="JJ8PH7-3-FDR_0.05_sigDownReg.csv")
write.csv(sigUpReg, file="JJ8PH7-3-FDR_0.05_sigUpReg.csv")
#Interpreting the DE analysis results
plotSmear(dgeTestFilt,de.tags = rownames(resFilt$table)[which(resFilt$table$FDR<0.01)]) abline(h=c(-2,2), col="blue") plotMD(dgeTestFilt,de.tags = rownames(resFilt$table)[which(resFilt$table$FDR<0.05)])
abline(h=c(-2,2), col="black")
#select 1% differentially expressed genes and produce a heatmap
y <- cpm(dgeFull, log=TRUE, prior.count = 1)
selY <- y[rownames(resFilt$table)[resFilt$table$FDR<0.01 & abs(resFilt$table$logFC)>1.5],] head(selY) cimColor <- colorRampPalette(rev(brewer.pal(9, "Blues")))(255)[255:1] finalHM <- cim(t(selY), color=cimColor, symkey=FALSE) plot(finalHM$ddc, leaflab="none")
finalHM <- cim(t(selY), color=cimColor, symkey=FALSE)


then i used the TopGo package as fellow:

#Note before analysis
#make sure that sequence names in the gene of interest list is similar to that of the annotation file
library(topGO)
library(Rgraphviz)
#mapping of GO annotation file
#Defining your list of genes of interest in te annotation file
geneUniverse <- names(geneID2GO)
genesOfInterest <- as.character(genesOfInterest$V1) #Then we need to tell TopGO where these interesting genes appear in the 'geneUniverse' vector geneList <- factor(as.integer(geneUniverse %in% genesOfInterest)) names(geneList) <- geneUniverse #Putting the data together into an R object #We need to put our data in an object of type 'topGOdata'. This will contain the list of genes of interest, the GO annotations, and the GO hierarchy itself. myGOdataBP <- new("topGOdata", description="My project", ontology="BP", allGenes=geneList, annot = annFUN.gene2GO, gene2GO = geneID2GO) #for Biological process myGOdataCC <- new("topGOdata", description="My project", ontology="CC", allGenes=geneList, annot = annFUN.gene2GO, gene2GO = geneID2GO) #for cellular component myGOdataMF <- new("topGOdata", description="My project", ontology="MF", allGenes=geneList, annot = annFUN.gene2GO, gene2GO = geneID2GO) #for molecular function #The list of genes of interest can be accessed using the method sigGenes(): sgBP <- sigGenes(myGOdataBP) str(sgBP) sgCC <- sigGenes(myGOdataCC) str(sgCC) sgMF <- sigGenes(myGOdataMF) str(sgMF) #Performing enrichment tests resultBPClassic <- runTest(myGOdataBP, algorithm="classic", statistic="fisher") resultBPElim <- runTest(myGOdataBP, algorithm="elim", statistic="fisher") resultBPTopgo <- runTest(myGOdataBP, algorithm="weight01", statistic="fisher") resultFisherCC <- runTest(myGOdataCC, algorithm="classic", statistic="fisher") resultFisherCC resultFisherMF <- runTest(myGOdataMF, algorithm="classic", statistic="fisher") resultFisherMF #We can list the top ten significant results found allResBP <- GenTable(myGOdataBP, classicFisher = resultBPClassic, elimFisher = resultBPElim, topgoFisher = resultBPTopgo, orderBy = "topgoFisher", ranksOf = "classicFisher", topNodes = 10) allResCC <- GenTable(myGOdataCC, classicFisher = resultFisherCC, orderBy = "resultFisherCC", ranksOf = "classicFisher", topNodes = 10) allResMF <- GenTable(myGOdataMF, classicFisher = resultFisherMF, orderBy = "resultFisherMF", ranksOf = "classicFisher", topNodes = 10) #to save the tables write.table(allResBP,"PH1-PH6-7-GO-BP.txt",sep="\t") write.table(allResCC,"PH1-PH6-7-GO-BP.txt",sep="\t") write.table(allResMF,"PH1-PH6-7-GO-BP.txt",sep="\t") #Making graphes showSigOfNodes(myGOdataBP, score(resultBPTopgo), firstSigNodes = 5, useInfo ='all') showSigOfNodes(myGOdataCC, score(resultFisher), firstSigNodes = 5, useInfo ='all') showSigOfNodes(myGOdataMF, score(resultFisher), firstSigNodes = 5, useInfo ='all') printGraph(myGOdataBP, resultFisher, firstSigNodes = 5, fn.prefix = "tGO", useInfo = "all", pdfSW = TRUE) printGraph(myGOdataCC, resultFisher, firstSigNodes = 5, fn.prefix = "tGO", useInfo = "all", pdfSW = TRUE) printGraph(myGOdataMF, resultFisher, firstSigNodes = 5, fn.prefix = "tGO", useInfo = "all", pdfSW = TRUE) #find the annotated genes in GO enrichment length(usedGO(myGOdata)) myterms <- allResBP$GO.ID
mygenes <- genesInTerm(myGOdata, myterms)
for (i in 1:length(myterms))
{
myterm <- myterms[i]
mygenesforterm <- mygenes[myterm][[1]]
myfactor <- mygenesforterm %in% genesOfInterest # find the genes that are in the list of genes of interest
mygenesforterm2 <- mygenesforterm[myfactor == TRUE]
mygenesforterm2 <- paste(mygenesforterm2, collapse=',')
print(paste("Term",myterm,"genes:",mygenesforterm2))}

EdgeR RNAseq Foldchange • 578 views
0
Entering edit mode

What have you tried?

0
Entering edit mode

hello i didn’t try anything yet.

0
Entering edit mode

Why do you think other people want to put in more effort to solve your problems than you do?

1
Entering edit mode
@gordon-smyth
Last seen 9 hours ago
WEHI, Melbourne, Australia

The topTags function collates results from edgeR test objects into an easy-to-read data.frame format, including log2-fold-changes.

0
Entering edit mode

how is possible to use topTags for that? as i knoe toptags extracf the top genes not based on a list of genes.

0
Entering edit mode

how is possible to use topTags for that? as i knoe toptags extracf the top genes not based on a list of genes.

1
Entering edit mode

It is easy to subset objects in R, and your code shows that you are already familiar with subsetting. Just use topTags to extract all genes and then subset the data.frame to your gene list, or else subset the edgeR object to your list of genes before running topTags.

BTW, can I say that ranking genes by fold-change is not usually a good idea. Rather than prioritizing biologically important genes, it will instead prioritize genes at low expression levels.

0
Entering edit mode

thank you for your help. we just extract the genes with a certain fold change. then, I annotated their gene ontology and used TopGO to extract the genes related to catalytic gene ontology. then I was planning make heat map based on their fold change. we are interested in the metabolic activity and more precisely the cellulase activity as our strain of interest showed a high cellulase activity compared to reference strain.

0
Entering edit mode

The code looks like it came from here, so OP might not understand much at all;

http://www.nathalievialaneix.eu/doc/html/solution_edgeR-tomato-withcode.html