design matrix edge R pairwise comparison at different timepoints after infection with replicates
1
0
Entering edit mode
@kaat-de-cremer-5346
Last seen 10.4 years ago
Dear edgeR patrons, I am using edgeR to find genes differentially expressed between infected and mock-infected control plants, at 3 time points after infection. I have RNAseq data for 3 independent tests, so for every single test I have 6 libraries (control + infected at 3 time points). Having three replicates this makes 18 libraries in total. What I did until now is look at each time point separate and calculate DEgenes at that time point as shown in this script: > head(x) C1 C2 C3 T1 T2 T3 1 0 1 2 0 0 0 2 13 6 4 10 8 12 3 17 16 9 10 8 11 4 2 1 2 2 3 2 5. 1 3 1 2 1 3 0 6 958 457 438 565 429 518 > treatment<-factor(c("C","C","C","T","T","T")) > test<-factor(c(1,2,3,1,2,3)) > y<-DGEList(counts=x,group=treatment) Calculating library sizes from column totals. > cpm.y<-cpm(y) > y<-y[rowSums(cpm.y>2)>=3,] > y<-calcNormFactors(y) > design<-model.matrix(~test+treat) > y<-estimateGLMCommonDisp(y,design,verbose=TRUE) Disp = 0.0265 , BCV = 0.1628 > y<-estimateGLMTrendedDisp(y,design) Loading required package: splines > y<-estimateGLMTagwiseDisp(y,design) > fit<-glmFit(y,design) > lrt<-glmLRT(y,fit) This works fine but I wonder if I should do the analysis of the different time points all at once? Will this make a difference? Unfortunately I cannot figure out how to design the matrix. I hope someone can help me, Kaat [[alternative HTML version deleted]]
RNASeq edgeR RNASeq edgeR • 2.6k views
ADD COMMENT
0
Entering edit mode
jribeiro • 0
@jribeiro-8589
Last seen 9.4 years ago
United States

Hi Kaat,

 

I have a similar problem, exactly same design, two treatments (M/R) and 3 time points, 24,48 and 72 h. Here is the script.

Regards,

Jose Ribeiro

 

#Note, input is READ count not RPKM, but I filtered for contigs having RPKM=> 10
#I redirect output below, check path for your machine
x <- read.delim("all-rpkm10.txt",row.names="Symbol")
head(x)
#                            M24_1  M24_2  M24_3  M48_1  M48_2  M48_3  M72_1
#Ir-SigP-5025_FR5_171-225    73300 437360 549622 602856 659898 505265 461383
#Ir-SigP-261154_FR6_668-747  91394 597844 766361 871606 904314 664936 592187
#Ir-SigP-15814_FR2_11-76    151729 466056 589993 669327 713620 555262 499542
#Ir-SigP-238894_FR6_217-303 109401 548717 706688 759050 819945 630981 556457
#Ir-273528                  513723 477842 625334 243488 436690 317665 363292
#Ir-261740                  450616 404725 529356 212800 377810 276754 307546
#                            M72_2  M72_3  R24_1  R24_2   R24_3   R48_1  R48_2
#Ir-SigP-5025_FR5_171-225   578678 492101 636488 689458  848344 1088458 718386
#Ir-SigP-261154_FR6_668-747 775722 606867 918875 982119 1069844 1473023 993263
#Ir-SigP-15814_FR2_11-76    641585 509077 671750 746265  885797 1066416 785909
#Ir-SigP-238894_FR6_217-303 716663 579172 807450 882311 1078659 1270305 906349
#Ir-273528                  392772 208231 529106 693211  481182  415799 362500
#Ir-261740                  350966 194612 444408 572590  399377  350478 314461
#                             R48_3   R72_1  R72_2  R72_3
#Ir-SigP-5025_FR5_171-225    779567  770357 107456 633257
#Ir-SigP-261154_FR6_668-747 1094015 1097808 131793 863420
#Ir-SigP-15814_FR2_11-76     786518  816780 164134 667976
#Ir-SigP-238894_FR6_217-303  926835  966339 132874 781580
#Ir-273528                   577026  316752 316714 394431
#Ir-261740                   493309  268045 275242 346463

targets = read.delim("dge.list.txt")
#

head(targets)
#  Symbol Treat Time lib.size norm.factor
#1  M24_1     M   24 25418308           1
#2  M24_2     M   24 20485372           1
#3  M24_3     M   24 24126994           1
#4  M48_1     M   48 20712009           1
#5  M48_2     M   48 23378038           1
#6  M48_3     M   48 21433471           1
#etc

#load experimental design
#
Time <- factor(c(24,24,24,48,48,48,72,72,72,24,24,24,48,48,48,72,72,72))
Treat <- factor(c("M","M","M","M","M","M","M","M","M","R","R","R","R","R","R","R","R","R"))
Group <- c(1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6)
y <- DGEList(counts=x,group=Group)
#
#normalize data
#
y <- calcNormFactors(y)
y$samples
#
# output
#      group lib.size norm.factors
#M24_1     1 24840841    1.1143623
#M24_2     1 20057068    1.0036531
#M24_3     1 23604356    1.0823090
#M48_1     2 20328600    0.9721955
#M48_2     2 22938720    0.9353933
#M48_3     2 20941356    1.1539243
#M72_1     3 21952451    1.0681859
#M72_2     3 24321079    1.2883963
#M72_3     3 16778586    1.4357941
#R24_1     4 22747712    0.7207162
#R24_2     4 23904095    0.6871886
#R24_3     4 24595971    0.9272390
#R48_1     5 28033832    0.9396293
#R48_2     5 24341516    0.9390415
#R48_3     5 26004057    0.8688401
#R72_1     6 23384795    0.9517134
#R72_2     6 22311379    1.1321000
#R72_3     6 24086436    1.0503182
#
data.frame(Sample=colnames(y),Treat,Time)
#   Sample Treat Time
#1   M24_1     M   24
#2   M24_2     M   24
#3   M24_3     M   24
#4   M48_1     M   48
#5   M48_2     M   48
#6   M48_3     M   48
#7   M72_1     M   72
#8   M72_2     M   72
#9   M72_3     M   72
#10  R24_1     R   24
#11  R24_2     R   24
#12  R24_3     R   24
#13  R48_1     R   48
#14  R48_2     R   48
#15  R48_3     R   48
#16  R72_1     R   72
#17  R72_2     R   72
#18  R72_3     R   72
design <- model.matrix(~Time+Treat)
rownames(design) <- colnames(y)
design
# output
#      (Intercept) Time48 Time72 TreatR
#M24_1           1      0      0      0
#M24_2           1      0      0      0
#M24_3           1      0      0      0
#M48_2           1      1      0      0
#M48_3           1      1      0      0
#M72_1           1      0      1      0
#M72_2           1      0      1      0
#M72_3           1      0      1      0
#R24_1           1      0      0      1
#R24_2           1      0      0      1
#R24_3           1      0      0      1
#R48_1           1      1      0      1
#R48_2           1      1      0      1
#R48_3           1      1      0      1
#R72_1           1      0      1      1
#R72_2           1      0      1      1
#R72_3           1      0      1      1
#attr(,"assign")
#[1] 0 1 1 2
#attr(,"contrasts")
#attr(,"contrasts")$Time
#[1] "contr.treatment"
#
#attr(,"contrasts")$Treat
#[1] "contr.treatment"
#Plots multi-dimensional scaling plot (same concept, but different from, PCA)
colors<-c("black","black","black","red","red","red","blue","blue","blue")
plotMDS(y,col=colors,main = "MDS Plot for Count Data RPKM > 10")
#
#predFC {edgeR} Computes estimated coefficients for a NB glm (Negative Binomial Generalized Linear Model) in such a way that the log-fold-changes are shrunk towards zero.
#The higher prior.n, the closer the estimates will be to the common dispersion. The recommended value is the nearest integer to 50/(#samples - #groups).
#
dim(x)
y <- estimateCommonDisp(y)
names(y)
#[1] 20773    18 #rows and samples
y$common.dispersion
#[1] 0.369717
dispersion <- y$common.dispersion
#The higher prior.n, the closer the estimates will be to the common dispersion. The recommended value is the nearest integer to 50/(#samples - #groups).
#In our case #samples = 18, #groups = 6 or 50/(18-6) = 4
logFC <- predFC(y,design,prior.count=4,dispersion=dispersion)
sink("N:/temp/logFC.txt")
logFC
sink()
cor(logFC[,1:4])
# output
#            (Intercept)     Time48      Time72      TreatR
#(Intercept)  1.00000000 -0.197490299 -0.28046068 -0.044456142
#Time48      -0.19749030  1.000000000  0.73210204 -0.009613076
#Time72      -0.28046068  0.732102039  1.00000000 -0.026014629
#TreatR      -0.04445614 -0.009613076 -0.02601463  1.000000000

#Now proceed to determine differentially expressed genes. Fit genewise glms:

y <- estimateGLMCommonDisp(y, design, verbose=TRUE)
#Disp = 0.43976 , BCV = 0.6631 
y <- estimateGLMTrendedDisp(y, design)
y <- estimateGLMTagwiseDisp(y, design)
jpeg("n:/temp/BCV.jpg")
plotBCV(y)
dev.off()
fit <- glmFit(y, design)
#
#get list of FDR<0.05 genes for all data
#
lrt <- glmLRT(fit,coef=2:4)
FDR <- p.adjust(lrt$table$PValue, method="BH")
sum(FDR < 0.05)
#output
#[1] 1279
sink("n:/temp/edgeR-all-time-treatment.txt")
topTags(lrt,n=sum(FDR < 0.05))
sink()
#
#get list of FDR<0.05 genes 48h
#
lrt <- glmLRT(fit,coef=,2)
FDR <- p.adjust(lrt$table$PValue, method="BH")
sink("N:/temp/edgeR-48h.txt")
topTags(lrt,n=sum(FDR < 0.05))
sink()
#
#get list of FDR<0.05 genes 72h
#
lrt <- glmLRT(fit,coef=,3)
FDR <- p.adjust(lrt$table$PValue, method="BH")
sink("N:/temp/edgeR-72h.txt")
topTags(lrt,n=sum(FDR < 0.05))
sink()
#
#get list of FDR<0.05 genes treatment
lrt <- glmLRT(fit,coef=,4)
FDR <- p.adjust(lrt$table$PValue, method="BH")
sink("N:/temp/edgeR-Treat.txt")
topTags(lrt,n=sum(FDR < 0.05))
sink()
#
#========================================DEG PLOTS (similar in purpose to volcano plots=================================
#obtain DEG plots
lrt <- glmLRT(fit,coef=,4)
FDR <- p.adjust(lrt$table$PValue, method="BH")
sum(FDR < 0.05)
top <- rownames(topTags(lrt))
cpm(y)[top,]
summary(dt <- decideTestsDGE(lrt))
isDE <- as.logical(dt)
DEnames <- rownames(y)[isDE]
jpeg("N:/temp/DEG-treat.jpg")
plotSmear(lrt, de.tags=DEnames,main="Treatment")
abline(h=c(-1,1), col="blue")
dev.off()
#time 48h
lrt <- glmLRT(fit,coef=,2)
FDR <- p.adjust(lrt$table$PValue, method="BH")
sum(FDR < 0.05)
top <- rownames(topTags(lrt))
cpm(y)[top,]
summary(dt <- decideTestsDGE(lrt))
isDE <- as.logical(dt)
DEnames <- rownames(y)[isDE]
jpeg("N:/temp/DEG-48h.jpg")
plotSmear(lrt, de.tags=DEnames,main="48 h")
abline(h=c(-1,1), col="blue")
dev.off()
#time 72h
lrt <- glmLRT(fit,coef=,3)
FDR <- p.adjust(lrt$table$PValue, method="BH")
sum(FDR < 0.05)
top <- rownames(topTags(lrt))
cpm(y)[top,]
summary(dt <- decideTestsDGE(lrt))
isDE <- as.logical(dt)
DEnames <- rownames(y)[isDE]
jpeg("N:/temp/DEG-72h.jpg")
plotSmear(lrt, de.tags=DEnames,main="72 h")
abline(h=c(-1,1), col="blue")
dev.off()

 

 

ADD COMMENT
0
Entering edit mode

Hi,

  1. Did you realize you are replying to a post that's > 3 years old?
  2. Is there a question you're asking here? If so, could you please clarify your question and post a more succint/clear version of it as its own/new question (not a reply to a previous question)?
ADD REPLY

Login before adding your answer.

Traffic: 603 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6