hello, I have a list of gene of a specific gene ontology and I would like to extract the fold change from EdgeR object based on the list of genes. how is it possible to be done? is it possible to use EdgeR for it? thank you for your help.
for calculation the different expressed gene, i used the following code and extracted the gene of GO i am focusing on:
#Load the required libraries
library(edgeR)
library(limma)
library(RColorBrewer)
library(mixOmics)
library(HTSFilter)
#Before starting modify the data you have by keeping only gene name and samples with replicates (chromosomes and gene length are not required)
#make csv file called (design.csv) contain information of files and condition in two columns to fit with the data matrix
#importing files
setwd("C:/Users/Admin/Project05")
rawCountTable <- read.table("JJ8pH_mRNA_pH7-3.txt", header=TRUE, sep="\t", row.names=1)
sampleInfo <- read.table("design.csv", header=TRUE, sep=",", row.names=1)
head(rawCountTable)
nrow(rawCountTable)
#Create a DGEList data object
dgeFull <- DGEList(rawCountTable, group=sampleInfo$condition)
dgeFull
#Data exploration and quality assessment
pseudoCounts <- log2(dgeFull$counts+1) #Extract pseudo-counts (ie log2(K+1)log2a(K+1))
head(pseudoCounts)
hist(pseudoCounts[,"PH6.rep.1"]) #Histogram for pseudo-counts (sample Cond.WT.Rep.1)
boxplot(pseudoCounts, col="gray", las=3) #Boxplot for pseudo-counts
#MA-plots between PH6 or PH7 samples
par(mfrow=c(1,2))
avalues <- (pseudoCounts[,1] + pseudoCounts[,2])/2
mvalues <- (pseudoCounts[,1] - pseudoCounts[,2])
plot(avalues, mvalues, xlab="A", ylab="M", pch=19, main="pH7")
abline(h=0, col="red")
avalues <- (pseudoCounts[,4] + pseudoCounts[,5])/2
mvalues <- (pseudoCounts[,4] - pseudoCounts[,5])
plot(avalues, mvalues, xlab="A", ylab="M", pch=19, main="pH6")
abline(h=0, col="red")
#MDS for pseudo-counts (using limma package)
plotMDS(pseudoCounts)
sampleDists <- as.matrix(dist(t(pseudoCounts)))
sampleDists
#heatmap for pseudo-counts (using mixOmics package)
cimColor <- colorRampPalette(rev(brewer.pal(9, "Blues")))(16)
cim(sampleDists, color=cimColor, symkey=FALSE)
#remove genes with zero counts for all samples
dgeFull <- DGEList(dgeFull$counts[apply(dgeFull$counts, 1, sum) != 0, ],group=dgeFull$samples$group)
head(dgeFull$counts)
#estimate the normalization factors
dgeFull <- calcNormFactors(dgeFull, method="TMM")
dgeFull$samples
head(dgeFull$counts)
#From the normalization factors and the original count table, find the normalized counts and use the log2log2-transformation to inspect them with boxplots and a MDS. Normalized counts can be extracted from dgeFull using the function cpm:
eff.lib.size <- dgeFull$samples$lib.size*dgeFull$samples$norm.factors
normCounts <- cpm(dgeFull)
pseudoNormCounts <- log2(normCounts + 1)
boxplot(pseudoNormCounts, col="gray", las=3)
plotMDS(pseudoNormCounts)
#estimate common and tagwise dispersion
dgeFull <- estimateCommonDisp(dgeFull)
dgeFull <- estimateTagwiseDisp(dgeFull)
dgeFull
#perform an exact test for the difference in expression between the two conditions ¡°WT¡± and ¡°Mt¡±
dgeTest <- exactTest(dgeFull)
dgeTest
#Independant filtering
filtData <- HTSFilter(dgeFull)$filteredData #remove low expressed genes
dgeTestFilt <- exactTest(filtData)
dgeTestFilt
#Diagnostic plot for multiple testing
hist(dgeTestFilt$table[,"PValue"], breaks=50) #plot an histogram of unadjusted p-values after filtering
#Inspecting the results
resNoFilt <- topTags(dgeTest, n=nrow(dgeTest$table)) #extract a summary of the differential expression statistics
head(resNoFilt)
resFilt <- topTags(dgeTestFilt, n=nrow(dgeTest$table))
head(resFilt)
#compare the number of differentially expressed genes with and without filtering (risk: 1%)
sum(resNoFilt$table$FDR < 0.05) #Before filtering
sum(resFilt$table$FDR < 0.05) #After filtering
#extract and sort differentially expressed genes
sigDownReg <- resFilt$table[resFilt$table$FDR<0.05,]
sigDownReg <- sigDownReg[order(sigDownReg$logFC),]
head(sigDownReg)
sigUpReg <- sigDownReg[order(sigDownReg$logFC, decreasing=TRUE),]
head(sigUpReg)
#write the results in csv files
write.csv(sigDownReg, file="JJ8PH7-3-FDR_0.05_sigDownReg.csv")
write.csv(sigUpReg, file="JJ8PH7-3-FDR_0.05_sigUpReg.csv")
#Interpreting the DE analysis results
plotSmear(dgeTestFilt,de.tags = rownames(resFilt$table)[which(resFilt$table$FDR<0.01)])
abline(h=c(-2,2), col="blue")
plotMD(dgeTestFilt,de.tags = rownames(resFilt$table)[which(resFilt$table$FDR<0.05)])
abline(h=c(-2,2), col="black")
#select 1% differentially expressed genes and produce a heatmap
y <- cpm(dgeFull, log=TRUE, prior.count = 1)
head(y)
selY <- y[rownames(resFilt$table)[resFilt$table$FDR<0.01 & abs(resFilt$table$logFC)>1.5],]
head(selY)
cimColor <- colorRampPalette(rev(brewer.pal(9, "Blues")))(255)[255:1]
finalHM <- cim(t(selY), color=cimColor, symkey=FALSE)
plot(finalHM$ddc, leaflab="none")
finalHM <- cim(t(selY), color=cimColor, symkey=FALSE)
then i used the TopGo package as fellow:
#Note before analysis
#make sure that sequence names in the gene of interest list is similar to that of the annotation file
library(topGO)
library(Rgraphviz)
#mapping of GO annotation file
geneID2GO <- readMappings(file = "PH1_interpro_GOann.txt")
#Defining your list of genes of interest in te annotation file
geneUniverse <- names(geneID2GO)
genesOfInterest <- read.table("PH1-Ph6-7-upregulated GO.txt",header=FALSE)
genesOfInterest <- as.character(genesOfInterest$V1)
#Then we need to tell TopGO where these interesting genes appear in the 'geneUniverse' vector
geneList <- factor(as.integer(geneUniverse %in% genesOfInterest))
names(geneList) <- geneUniverse
#Putting the data together into an R object
#We need to put our data in an object of type 'topGOdata'. This will contain the list of genes of interest, the GO annotations, and the GO hierarchy itself.
myGOdataBP <- new("topGOdata", description="My project", ontology="BP", allGenes=geneList, annot = annFUN.gene2GO, gene2GO = geneID2GO) #for Biological process
myGOdataCC <- new("topGOdata", description="My project", ontology="CC", allGenes=geneList, annot = annFUN.gene2GO, gene2GO = geneID2GO) #for cellular component
myGOdataMF <- new("topGOdata", description="My project", ontology="MF", allGenes=geneList, annot = annFUN.gene2GO, gene2GO = geneID2GO) #for molecular function
#The list of genes of interest can be accessed using the method sigGenes():
sgBP <- sigGenes(myGOdataBP)
str(sgBP)
sgCC <- sigGenes(myGOdataCC)
str(sgCC)
sgMF <- sigGenes(myGOdataMF)
str(sgMF)
#Performing enrichment tests
resultBPClassic <- runTest(myGOdataBP, algorithm="classic", statistic="fisher")
resultBPElim <- runTest(myGOdataBP, algorithm="elim", statistic="fisher")
resultBPTopgo <- runTest(myGOdataBP, algorithm="weight01", statistic="fisher")
resultFisherCC <- runTest(myGOdataCC, algorithm="classic", statistic="fisher")
resultFisherCC
resultFisherMF <- runTest(myGOdataMF, algorithm="classic", statistic="fisher")
resultFisherMF
#We can list the top ten significant results found
allResBP <- GenTable(myGOdataBP, classicFisher = resultBPClassic, elimFisher = resultBPElim, topgoFisher = resultBPTopgo, orderBy = "topgoFisher", ranksOf = "classicFisher", topNodes = 10)
allResCC <- GenTable(myGOdataCC, classicFisher = resultFisherCC, orderBy = "resultFisherCC", ranksOf = "classicFisher", topNodes = 10)
allResMF <- GenTable(myGOdataMF, classicFisher = resultFisherMF, orderBy = "resultFisherMF", ranksOf = "classicFisher", topNodes = 10)
#to save the tables
write.table(allResBP,"PH1-PH6-7-GO-BP.txt",sep="\t")
write.table(allResCC,"PH1-PH6-7-GO-BP.txt",sep="\t")
write.table(allResMF,"PH1-PH6-7-GO-BP.txt",sep="\t")
#Making graphes
showSigOfNodes(myGOdataBP, score(resultBPTopgo), firstSigNodes = 5, useInfo ='all')
showSigOfNodes(myGOdataCC, score(resultFisher), firstSigNodes = 5, useInfo ='all')
showSigOfNodes(myGOdataMF, score(resultFisher), firstSigNodes = 5, useInfo ='all')
printGraph(myGOdataBP, resultFisher, firstSigNodes = 5, fn.prefix = "tGO", useInfo = "all", pdfSW = TRUE)
printGraph(myGOdataCC, resultFisher, firstSigNodes = 5, fn.prefix = "tGO", useInfo = "all", pdfSW = TRUE)
printGraph(myGOdataMF, resultFisher, firstSigNodes = 5, fn.prefix = "tGO", useInfo = "all", pdfSW = TRUE)
#find the annotated genes in GO enrichment
length(usedGO(myGOdata))
myterms <- allResBP$GO.ID
mygenes <- genesInTerm(myGOdata, myterms)
for (i in 1:length(myterms))
{
myterm <- myterms[i]
mygenesforterm <- mygenes[myterm][[1]]
myfactor <- mygenesforterm %in% genesOfInterest # find the genes that are in the list of genes of interest
mygenesforterm2 <- mygenesforterm[myfactor == TRUE]
mygenesforterm2 <- paste(mygenesforterm2, collapse=',')
print(paste("Term",myterm,"genes:",mygenesforterm2))}
What have you tried?
hello i didn’t try anything yet.
Why do you think other people want to put in more effort to solve your problems than you do?