Deleted:EdgeR, Please help me review my R script in glm approach
0
0
Entering edit mode
RR • 0
@7e31e763
Last seen 23 months ago
Thailand

Hello all, I am doing gene expression profiling of 3 structures(BB, 79A, 28z). I have 9 samples from 3 donors. Each donor has 3 samples, representing 3 structures. For example Donor 1 has structure BB, 79A, 28z , Donor 2 has structure BB, 79A, 28z Donor 3 has structure BB, 79A, 28z. I would like to see the differentially expressed gene of structure 79A. I compare structure 79A vs 28z, 79A vs BB and 28z vs BB. I have attached the pheno column in the picture. Currently I got only 8 samples as another 1 is still in the process of sequencing but I want to try analyze these 8 samples first. I am trying to write my script in glm approach. I am a beginner and quite new to edgeR and I would like to ask for your help to review and confirm whether what I wrote is correct. I have got DEGs from the scripts below however I am not quite reliable with the result. Thank you in advance!!

library(edgeR)

counts <- read.table('NewCounting_file.txt')
head(counts)

str(counts)
#create gene name and gene id variable
gene_name <- counts[,2]
gene_id <- counts[,1]
class(counts)
head(gene_id)
head(gene_name)
#Delete gene id and gene name column on counts 
counts[1:2] <- list(NULL)
head(counts)
#create gene count numeric matrix 
counts_mat <- apply(as.matrix.noquote(counts),2,as.numeric)
head(counts_mat)
class(counts_mat)
#Assign gene name as row name 
row.names(counts_mat) <- gene_name
head(counts_mat)
#check data type before processing
#Make sure data is numeric matrix containing gene name as row and sample name as column
str(counts_mat)
counts_mat
#Assign group as column name 
sample_name <- c("MD7_BB","MD7_79A","MD7_28","BD7_BB","BD7_79A","BD7_28","JD7_79A","JD7_28")
colnames(counts_mat) <- sample_name
head(counts_mat)
## read phenotype data in 
group <- c("MD7_BB","MD7_79A","MD7_28","BD7_BB","BD7_79A","BD7_28","JD7_79A","JD7_28")
Pheno <- read.csv("mypheno.csv", stringsAsFactors = TRUE)![enter image description here][1]
dglist <- DGEList(counts=counts_mat, samples = Pheno, group = group)
str(dglist)
keep <-filterByExpr(dglist)
summary(keep)
dglist <- dglist[keep, ,keep.lib.sizes=FALSE]
dim(dglist)

dglist <- calcNormFactors(dglist, method="TMM")
plotMDS(dglist)
dglist$samples
dglist
#Design matrix
donors <- factor(dglist$sample$Donors)
groups <- factor(dglist$sample$Structures)
design <- model.matrix(~0 + donors + groups)
design

dglist<- estimateDisp(dglist, design, robust=TRUE)
dglist$common.dispersion
plotBCV(dglist)
#Fitting
fit <- glmQLFit(dglist,design, robust=TRUE)
head(fit$coefficients)
colnames(fit)
plotQLDisp(fit)

##############DGE
##To detect genes that are differentially expressed btw any of three treatments
qlf <- glmQLFTest(fit, coef=4:5)
topTags(qlf)
FDR <- p.adjust(qlf$table$PValue, method='fdr')
sum(FDR < 0.05)
summary(decideTests(qlf))
#closer look at individual counts per million for the top gens
top <- rownames(topTags(qlf))
cpm(dglist)[top,]
#total num of genes significantly up-regulated or down regulated at 5%FDR 
summary(decideTests(qlf))
#plot all the logFCs against average count size, highlighting the DE genes
plotMD(qlf)
# the blue line indicate 2 fold up or down
abline(h=c(-1,1), col="blue")
###############################

#detect gene that are differential expressed in 79A vs BBz
qlf_79A_BBz <- glmQLFTest(fit, coef=4)
# Show top 10 differentially expressed genes (DEG)
topTags(qlf_79A_BBz)
#total num of genes significantly up-regulated or down regulated at 5%FDR 
summary(decideTests(qlf_79A_BBz))
#closer look at individual counts per million for the top gens
top <- rownames(topTags(qlf_79A_BBz))
cpm(dglist)[top,]
#plot all the logFCs against average count size, highlighting the DE genes
plotMD(qlf_79A_BBz)
# the blue line indicate 2 fold up or down
abline(h=c(-1,1), col="blue")
#Constructing a table showing differential expression with edge R, trimming out those p-value>=0.05
qlf_79A_BBz$table <- cbind(qlf_79A_BBz$table, FDR=p.adjust(qlf_79A_BBz$table$PValue, method ='fdr'))
head(qlf_79A_BBz)
str(qlf_79A_BBz)

gsign_79A_BBz <- qlf_79A_BBz$table[qlf_79A_BBz$table$FDR<0.05,]
gsign_79A_BBz <- gsign_79A_BBz[order(gsign_79A_BBz$FDR),]
dim(gsign_79A_BBz)
head(gsign_79A_BBz)
head(gsign_79A_BBz,30)
str(gsign_79A_BBz)
summary(gsign_79A_BBz)

##convert to df

gsign_79A_BBz_df <-data.frame(gsign_79A_BBz)
head(gsign_79A_BBz_df)
view(gsign_79A_BBz_df)
new_gsign_79A_BBz <- cbind(rownames(gsign_79A_BBz_df),gsign_79A_BBz_df)
view(gsign_79A_BBz_df)
rownames(new_gsign_79A_BBz)<-NULL 
view(new_gsign_79A_BBz)
colnames(new_gsign_79A_BBz)[1]<-"genename"
View(new_gsign_79A_BBz)
head(new_gsign_79A_BBz)
#save file
write.csv(new_gsign_79A_BBz, file="new_gsign_79A_BBz.csv")
###############################
###############################
#detect gene that are differential expressed in BB vs 28z
qlf_BB_28z <- glmQLFTest(fit, coef=5)
topTags(qlf_BB_28z)
#closer look at individual counts per million for the top gens
top <- rownames(topTags(qlf_BB_28z))
topFDR <- top <- rownames(topTags(qlf_BB_28z))
cpm(dglist)[top,]
#total num of genes significantly up-regulated or down regulated at 5%FDR 
summary(decideTests(qlf_BB_28z))
#plot all the logFCs against average count size, highlighting the DE genes
plotMD(qlf_BB_28z)
# the blue line indicate 2 fold up or down
abline(h=c(-1,1), col="blue")
#Constructing a table showing differential expression with edge R, trimming out those p-value>=0.05
qlf_BB_28z$table <- cbind(qlf_BB_28z$table, FDR=p.adjust(qlf_BB_28z$table$PValue, method ='fdr'))
head(qlf_BB_28z)
str(qlf_BB_28z)

gsign_BB_28z <- qlf_BB_28z$table[qlf_BB_28z$table$FDR<0.05,]
gsign_BB_28z <- gsign_BB_28z[order(gsign_BB_28z$FDR),]
dim(gsign_BB_28z)
head(gsign_BB_28z)
head(gsign_BB_28z,30)
gsign_BB_28z
head(gsign_BB_28z)
str(gsign_BB_28z)
summary(gsign_BB_28z)
##convert to df
gsign_BB_28z_df <-data.frame(gsign_BB_28z)
head(gsign_BB_28z_df)
view(gsign_BB_28z_df)
new_gsign_BB_28z <- cbind(rownames(gsign_BB_28z_df),gsign_BB_28z_df)
view(gsign_BB_28z_df)
rownames(new_gsign_BB_28z)<-NULL 
view(new_gsign_BB_28z)
colnames(new_gsign_BB_28z)[1]<-"genename"
View(new_gsign_BB_28z)
head(new_gsign_BB_28z)
##save file
write.csv(new_gsign_BB_28z, file="new_gsign_BB_28z.csv")

######################################
######################################
#detect gene that are differential expressed in 28z vs 79A
qlf28z_79A <- glmQLFTest(fit, contrast=c(0,0,0,-1,1))
topTags(qlf28z_79A)
#total num of genes significantly up-regulated or down regulated at 5%FDR 
summary(decideTests(qlf28z_79A))
#closer look at individual counts per million for the top gens
top <- rownames(topTags(qlf28z_79A))
cpm(dglist)[top,]
#plot all the logFCs against average count size, highlighting the DE genes
plotMD(qlf_BB_28z)
# the blue line indicate 2 fold up or down
abline(h=c(-1,1), col="blue")
#Constructing a table showing differential expression with edge R, trimming out those p-value>=0.05
qlf28z_79A$table <- cbind(qlf28z_79A$table, FDR=p.adjust(qlf28z_79A$table$PValue, method ='fdr'))
head(qlf28z_79A)
str(qlf28z_79A)

gsign_28z_79A <- qlf28z_79A$table[qlf28z_79A$table$FDR<0.05,]
gsign_28z_79A <- gsign_28z_79A[order(gsign_28z_79A$FDR),]
dim(gsign_28z_79A)
head(gsign_28z_79A)
head(gsign_28z_79A,30)
str(gsign_28z_79A)
summary(gsign_28z_79A)
##convert to data frame
gsign_28z_79A_df <-data.frame(gsign_28z_79A)
head(gsign_28z_79A_df)
view(gsign_28z_79A_df)
new_gsign_28z_79A <- cbind(rownames(gsign_28z_79A_df),gsign_28z_79A_df)
view(gsign_28z_79A_df)
rownames(new_gsign_28z_79A)<-NULL 
view(new_gsign_28z_79A)
colnames(new_gsign_28z_79A)[1]<-"genename"
View(new_gsign_28z_79A)
head(new_gsign_28z_79A)
write.csv(new_gsign_28z_79A, file="new_gsign_28z_79A.csv")
######################################
edgeR DEGs • 614 views
ADD COMMENT
This thread is not open. No new answers may be added
Traffic: 740 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6