Entering edit mode
Hello,
I am doing Differential Gene Expression analysis for illumina's WG-
DASL
data using R Bioconductor and GenomeStudio.
I am using Quantile normalisation and BH-FDR on both platform for
finding
DEgenes. But the result is not comparable from both platform.
Please find the attached file for R Script and final result from both
platform and suggest me for any corrections.
Sincerely thanks,
Abdul Rawoof,
-------------- next part --------------
> BSData <-
readBeadSummaryData(dataFile="SampleProbeProfile.txt",sep="\t",
skip=0, ProbeID="ProbeID",
columns = list(exprs
="AVG_Signal",se.exprs="BEAD_STDERR",nObservations = "Avg_NBEADS",
Detection="Detection Pval"))
> slotNames(BSData)
> pData(BSData)
> dim(BSData)
> names(assayData(BSData))
> boxplot(as.data.frame(exprs(BSData)),las=2,outline=FALSE,xlab="Sampl
es",ylab="Average Signals",main="Boxplot for Non-Normalised
Avg_Signals")
> boxplot(as.data.frame(log2(exprs(BSData))),las=2,outline=FALSE,xlab=
"Samples",ylab="Log2Average Signals",main="Boxplot for Non-Normalised
Log2Avg_Signals")
> boxplot(as.data.frame(nObservations(BSData)),las=2,outline=FALSE,xla
b="Samples",ylab="No of BEADS",main="Boxplot for No.of BEADS per
Sample")
> clust <- dist(t(exprs(BSData)))
> hclust <- hclust(clust,method="complete")
> plot(hclust,xlab="SAMPLES",ylab="Heights",main="Cluster Dendogram
Before Normalisation")
> g <- rownames(exprs(BSData))[1:10]
> plotMAXY(log2(exprs(BSData)), arrays =c(1,7), genesToLabel =
g,labelCol="red", labelpch=20,foldLine=2.46,log=FALSE)
> plotMAXY(log2(exprs(BSData)), arrays
=c(1,7),labelpch=20,foldLine=2.46,log=FALSE)
> plotMAXY(log2(exprs(BSData)), arrays =c(6,8,9), genesToLabel =
g,labelCol="red", labelpch=20,foldLine=2.46,log=FALSE)
> plotMAXY(log2(exprs(BSData)), arrays
=c(6,8,9),labelpch=20,foldLine=2.46,log=FALSE)
> plotMAXY(log2(exprs(BSData)), arrays =c(2,4,5), genesToLabel =
g,labelCol="red", labelpch=20,foldLine=2.46,log=FALSE)
> plotMAXY(log2(exprs(BSData)), arrays
=c(2,4,5),labelpch=20,foldLine=2.46,log=FALSE)
> plotMAXY(log2(exprs(BSData)), arrays =c(10,11,12), genesToLabel =
g,labelCol="red", labelpch=20,foldLine=2.46,log=FALSE)
> plotMAXY(log2(exprs(BSData)), arrays
=c(10,11,12),labelpch=20,foldLine=2.46,log=FALSE)
> plotMAXY(log2(exprs(BSData)), arrays =c(2,3,4), genesToLabel =
g,labelCol="red", labelpch=20,foldLine=2.46,log=FALSE)
> plotMAXY(log2(exprs(BSData)), arrays
=c(2,3,4),labelpch=20,foldLine=2.46,log=FALSE)
**********************************************************************
*****************************************
> BSNorm <- normaliseIllumina(BSData,
method="quantile",transform="log2")
> write.table(exprs(BSNorm),file="QNormLog2AvgSingal.txt",sep="\t")
> boxplot(exprs(BSNorm),las=3,outline=F, col=colors,xlab="Samples",
ylab="Log2_Avg_Signals", main="Boxplot For Normalised
Log2AVG_SIGNALS",cex.axis=0.9)
**********************************************************************
*****************************************
> clust <- dist(t(BSNorm))
> hc <-hclust(clust,method="complete")
> plot(hc,xlab="Samples",main="Cluster Dendogram After Normalisation")
**********************************************************************
*****************************************
> plotMAXY(BSNorm, arrays =c(1,7),
labelpch=20,foldLine=2.46,log=FALSE)
> plotMAXY(BSNorm, arrays =c(2,4,5),
labelpch=20,foldLine=2.46,log=FALSE)
> plotMAXY(BSNorm, arrays =c(10,11,12),
labelpch=20,foldLine=2.46,log=FALSE)
> plotMAXY(BSNorm, arrays =c(6,8,9),
labelpch=20,foldLine=2.46,log=FALSE)
> plotMAXY(BSNorm, arrays =c(2,3,4),
labelpch=20,foldLine=2.46,log=FALSE)
**********************************************************************
******************************************
> ###Differential Expression Analysis ###
> design <- matrix(nrow=12,ncol=5,0)
> colnames(design)=c("D1_R","D1_NR","D2_R","D2_NR","Control")
> design[which(strtrim(colnames(exprs(BSNorm)),4)=="D1_R",),1]=1
> design[which(strtrim(colnames(exprs(BSNorm)),5)=="D1_NR",),2]=1
> design[which(strtrim(colnames(exprs(BSNorm)),4)=="D2_R",),3]=1
> design[which(strtrim(colnames(exprs(BSNorm)),5)=="D2_NR",),4]=1
> design[which(strtrim(colnames(exprs(BSNorm)),7)=="Control",),5]=1
**********************************************************************
*******************************************
> fit <-lmFit(exprs(BSNorm),design)
> cont.matrix <- makeContrasts(D1RvsC= D1_R-Control,D1NRvsC=D1_NR-
Control,D2RvsC=D2_R-Control,D2NRvsC=D2_NR-Control,levels=design)
> fit1 <- contrasts.fit(fit,cont.matrix)
> ebfit <- eBayes(fit1)
**********************************************************************
*******************************************
> dTest <- decideTests(ebfit,p.value=0.05,lfc=0)
> summary(dTest)
D1RvsC D1NRvsC D2RvsC D2NRvsC
-1 1 0 3 2
0 29373 29377 29326 29374
1 3 0 48 1
**********************************************************************
*******************************************
>topD1R=toptable(ebfit[,1],coef="D1RvsC",number=Inf,genelist=fit1$gene
s,adjust.method="BH",p.value=1,lfc=0)
> topD1NR =toptable(ebfit[,2],coef="D1NRvsC",number=Inf,genelist=fit1$
genes,adjust.method="BH",p.value=1,lfc=0)
> topD2R =toptable(ebfit[,3],coef="D2RvsC",number=Inf,genelist=fit1$ge
nes,adjust.method="BH",p.value=1,lfc=0)
> topD2NR =toptable(ebfit[,4],coef="D2NRvsC",number=Inf,genelist=fit1$
genes,adjust.method="BH",p.value=1,lfc=0)
**********************************************************************
*******************************************
> vennDiagram(dTest[,1:3], main= " VennDiagram for Differentially
Expressed Genes",include=c("up","down"),counts.col=c("red","green"))
> vennDiagram(dTest[,c(1,3,4)], main= " VennDiagram for Differentially
Expressed Genes",include=c("up","down"),counts.col=c("red","green"))
> vennDiagram(dTest[,3:4], main= " VennDiagram for Differentially
Expressed Genes",include=c("up","down"),counts.col=c("red","green"))
> vennDiagram(dTest[,1:2], main= " VennDiagram for Differentially
Expressed Genes",include=c("up","down"),counts.col=c("red","green"))
**********************************************************************
*******************************************
> smoothScatter(ebfit[,1]$Amean,ebfit[,1]$coefficients,xlab="Average
Intensity",ylab="Log_Ratio",main="SmoothScatter plot for
D1R_vs_Control",pch=1.8,cex=.30)
> smoothScatter(ebfit[,2]$Amean,ebfit[,2]$coefficients,xlab="Average
Intensity",ylab="Log_Ratio",main="SmoothScatter plot for
D1NR_vs_Control",pch=1.8,cex=.30)
> smoothScatter(ebfit[,3]$Amean,ebfit[,3]$coefficients,xlab="Average
Intensity",ylab="Log_Ratio",main="SmoothScatter plot for
D2R_vs_Control",pch=1.8,cex=.30)
> smoothScatter(ebfit[,4]$Amean,ebfit[,4]$coefficients,xlab="Average
Intensity",ylab="Log_Ratio",main="SmoothScatter plot for
D2NR_vs_Control",pch=1.8,cex=.30)
**********************************************************************
*******************************************
> par(mfrow=c(2,2))
> volcanoplot(ebfit[,1],coef=1,highlight=5,names=fit1$genes$TargetID,x
lab="Log_Fold_Change",
ylab="Log_Odds",main="VolcanoPlots of DEgenes for
D1_R",pch=1,cex=.25)
> volcanoplot(ebfit[,2],coef=1,highlight=5,names=fit1$genes$TargetID,x
lab="Log_Fold_Change",
ylab="Log_Odds",main="VolcanoPlots of DEgenes for
D1_NR",pch=1,cex=.25)
> volcanoplot(ebfit[,3],coef=1,highlight=5,names=fit1$genes$TargetID,x
lab="Log_Fold_Change",
ylab="Log_Odds",main="VolcanoPlots of DEgenes for
D2_R",pch=1,cex=.25)
> volcanoplot(ebfit[,4],coef=1,highlight=5,names=fit1$genes$TargetID,x
lab="Log_Fold_Change",
ylab="Log_Odds",main="VolcanoPlots of DEgenes for
D2_NR",pch=1,cex=.25)
**********************************************************************
*******************************************
>library(Heatplus)
> rnD1R <- rownames(topD1R)[topD1R$P.Value<0.05]
> rnD1R <- as.numeric(rnD1R)
> DEgeneD1R <-BSNorm[rnD1R,c(1,7,10,11,12)]
> regD1R =regHeatmap(exprs(DEgeneD1R))
> plot(regD1R)
**********************************************************************
*******************************************
> rnD1NR <- rownames(topD1NR)[topD1NR$P.Value<0.05]
> rnD1NR <- as.numeric(rnD1NR)
> DEgeneD1NR <-BSNorm[rnD1NR,c(6,8,9,10,11,12)]
> regD1NR=regHeatmap(exprs(DEgeneD1NR) )
> plot(regD1NR)
**********************************************************************
*******************************************
> rnD2R <- rownames(topD2R)[topD2R$P.Value<0.05]
> rnD2R <- as.numeric(rnD2R)
> DEgeneD2R <-BSNorm[rnD2R,c(3,10,11,12)]
> regD2R=regHeatmap(exprs(DEgeneD2R) )
> plot(regD2R)
**********************************************************************
*******************************************
> rnD2NR <- rownames(topD2NR)[topD2NR$P.Value<0.05]
> rnD2NR <- as.numeric(rnD2NR)
> DEgeneD2NR <-BSNorm[rnD2NR,c(2,4,5,10,11,12)]
> regD2NR=regHeatmap(exprs(DEgeneD2NR))
> plot(regD2NR)
#######XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXX##############