Hi All,
I need help to resolve this problem with deseq.
I have two cultivars (`R` and `S`), two time points (`5d` and `30d`) and four treatments (`Scn`, `Aph`, `ScnAph`, `Ctr`). There are two sets of controls (`Ctr`) for each cultivar and I have a total of 48 samples.
I need to compare:
1. Scn Vs Ctr within resistance within 5 day
2. Scn Vs Aph within susceptible within 5 day
3. Scn Vs Ctr within resistance within 30 day
4. Scn Vs Aph within susceptible within 30 day
5. R Vs S in whole dataset
Can someone please guide me through this. I have tried this model which looks like this, but I am not sure if this is the correct way of doing it. Thanks for your help in advance.
dds <- DESeqDataSetFromMatrix(countData=count.mat,colData=cond,design= ~cultivar+time+cultivar:time)
##you relevel 5d so you'll do "cultivarR.time30d"
dds$time <- relevel(dds$time, ref = "5d")
##you relevel S so you'll do "cultivarR.time5d"
dds$cultivar <- relevel(dds$cultivar, ref = "S")
dds = DESeq(dds, test="LRT", reduced= ~cultivar + time)
#5d vs 30d within resistance
##in case where dds$time <- relevel(dds$time, ref = "S")
resultsNames(dds)
# > resultsNames(dds)
# [1] "Intercept" "cultivar_R_vs_S" "time_5d_vs_30d" "cultivarR.time5d"
#5d vs 30d within resistance
tt<- results(dds, contrast=list(c("time_5d_vs_30d", "cultivarR.time5d")), test="Wald")
My data:
my `count.mat` looks like this:
count.mat <- structure(list(sam5_4a_counts = c(26, 28, 1, 137, 3, 0), sam5_4b_counts = c(13,
6, 0, 95, 3, 0), sam5_4c_counts = c(1, 7, 0, 41, 2, 0), sam5_5a_counts = c(28,
15, 0, 84, 0, 0), sam5_5b_counts = c(12, 29, 1, 97, 2, 1), sam5_5c_counts = c(10,
11, 0, 77, 2, 0), sam5_6a_counts = c(42, 24, 0, 139, 4, 0), sam5_6b_counts = c(29,
28, 1, 166, 1, 1), sam5_6c_counts = c(29, 46, 0, 112, 5, 3),
sam5_7a_counts = c(7, 7, 1, 65, 0, 0), sam5_7b_counts = c(37,
10, 0, 108, 4, 0), sam5_7c_counts = c(7, 4, 0, 47, 0, 0),
sam5_1a_counts = c(44, 56, 4, 107, 2, 0), sam5_1b_counts = c(13,
11, 3, 44, 1, 0), sam5_1c_counts = c(39, 55, 1, 166, 1, 0
), sam5_2a_counts = c(4, 8, 1, 75, 1, 0), sam5_2b_counts = c(126,
160, 10, 414, 5, 1), sam5_2c_counts = c(28, 37, 1, 209, 3,
1), sam5_3a_counts = c(38, 70, 5, 138, 3, 1), sam5_3b_counts = c(132,
218, 6, 390, 14, 5), sam5_3c_counts = c(10, 2, 0, 39, 0,
1), sam5_8a_counts = c(19, 37, 1, 140, 1, 1), sam5_8b_counts = c(5,
12, 0, 63, 1, 0), sam5_8c_counts = c(27, 14, 1, 90, 0, 0),
sam30_4a_counts = c(29, 31, 0, 68, 2, 0), sam30_4b_counts = c(32,
24, 0, 70, 1, 0), sam30_4c_counts = c(13, 8, 0, 38, 2, 1),
sam30_5a_counts = c(22, 14, 0, 104, 2, 0), sam30_5b_counts = c(37,
49, 2, 88, 1, 0), sam30_5c_counts = c(37, 84, 1, 106, 0,
1), sam30_6a_counts = c(74, 58, 3, 110, 2, 1), sam30_6b_counts = c(68,
183, 3, 150, 2, 1), sam30_6c_counts = c(38, 86, 1, 161, 1,
0), sam30_7a_counts = c(21, 27, 0, 93, 2, 0), sam30_7b_counts = c(27,
20, 0, 89, 0, 1), sam30_7c_counts = c(24L, 23L, 0L, 91L,
1L, 0L), sam30_1a_counts = c(5, 7, 0, 16, 0, 0), sam30_1b_counts = c(38,
35, 3, 102, 0, 0), sam30_1c_counts = c(55, 26, 0, 136, 2,
0), sam30_2a_counts = c(20, 6, 0, 65, 2, 0), sam30_2b_counts = c(10,
5, 0, 43, 0, 0), sam30_2c_counts = c(86, 88, 7, 167, 6, 0
), sam30_3a_counts = c(39, 19, 1, 132, 3, 0), sam30_3b_counts = c(30,
31, 0, 113, 2, 0), sam30_3c_counts = c(59, 35, 0, 104, 3,
0), sam30_8a_counts = c(37, 22, 0, 133, 1, 0), sam30_8b_counts = c(28,
20, 0, 150, 2, 0), sam30_8c_counts = c(13, 20, 1, 104, 0,
0)), row.names = c("Glyma.01G000100", "Glyma.01G000200",
"Glyma.01G000300", "Glyma.01G000400", "Glyma.01G000500", "Glyma.01G000600"
), class = "data.frame")
my condition dataframe `cond`.
cond <- structure(list(cultivar = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("R", "S"), class = "factor"),
time = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("30d", "5d"), class = "factor"),
treatment = structure(c(3L, 3L, 3L, 1L, 1L, 1L, 4L, 4L, 4L,
2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 4L, 4L, 4L, 2L, 2L, 2L,
3L, 3L, 3L, 1L, 1L, 1L, 4L, 4L, 4L, 2L, 2L, 2L, 3L, 3L, 3L,
1L, 1L, 1L, 4L, 4L, 4L, 2L, 2L, 2L), .Label = c("Aph", "Ctr",
"Scn", "ScnAph"), class = "factor")), row.names = c(NA, -48L
), class = "data.frame")
Thanks, Michael. I got these results below. Now, how do I run contrast for 1 to 4 ? Could you please confirm if it's correct what I have below? Yes for 5, I just want to see the overall effect.
If you combine all three factors it will be easier to form your contrasts.
Thanks Michael. I merged `cond` dataframe as you suggested, but got the following error below. How do I reduce for one column factor? Could you please confirm if I am doing it correctly?
cond$TimeTreatmentCultivar<- as.factor(paste(cond$cultivar,cond$time,cond$treatment, sep = "_"))
dds <- DESeqDataSetFromMatrix(countData=count.mat,colData=cond,design= ~TimeTreatmentCultivar)
dds = DESeq(dds, test="LRT", reduced= ~ TimeTreatmentCultivar)
Error:
estimating size factors
t orderestimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
Error in nbinomLRT(object, full = full, reduced = reduced, quiet = quiet, :
less than one degree of freedom, perhaps full and reduced models are not in the correc
Your LRT doesn’t make sense here. Just use DESeq(dds) and then results(dds, ...)