Hi! While re-analyzing GSE65106 from GEO, I have constructed a table containing a subset of samples, as follows:
> a # see below for dput(a)
accession cond donor batch pair
A1.r1 GSM1587362 ASD AA1 1a 1
A1.r2 GSM1587363 ASD AA1 2a 1
A2.r1 GSM1587364 ASD AA2 1a 2
A2.r2 GSM1587365 ASD AA2 2a 2
A3.r1 GSM1587366 ASD AA3 1a 3
A3.r2 GSM1587367 ASD AA3 2a 3
SC1.r1 GSM1587368 Normal AN1 3a 1
SC1.r2 GSM1587369 Normal AN1 3a 1
SC2.r1 GSM1587370 Normal AN2 3a 2
SC2.r2 GSM1587371 Normal AN2 3b 2
SC3.r1 GSM1587372 Normal AN3 3a 3
SC3.r2 GSM1587373 Normal AN3 3b 3
ASC1.r1 GSM1587374 Normal NN1 4a 1
ASC1.r2 GSM1587375 Normal NN1 4a 1
ASC2.r1 GSM1587376 Normal NN2 4a 2
ASC2.r2 GSM1587377 Normal NN3 4a 2
Here, r1/r2 are the replications of experiment on the same subject,
Rows named A1\A2 are autistic (diseased) subjects
Rows named SC1\SC2 are sibling controls; the number denoting to which A(diseased subject) they are paired,
Rows named ASC1\2 are age- and sex-matched controls to ADF.
I want to calculate DEGs between ASD & Normal conditions:
a=structure(list(accession = structure(1:16, .Names = c("ADF.AA1.1",
"ADF.AA1.2", "ADF.AA2.1", "ADF.AA2.2", "ADF.AA3.1", "ADF.AA3.2",
"USCf.AN1.1", "USCf.AN1.2", "USCf.AN2.1", "USCf.AN2.2", "USCf.AN3.1",
"USCf.AN3.2", "ASCF.NN1.1", "ASCF.NN1.2", "ASCF.NN2.1", "ASCF.NN2.2"
), .Label = c("GSM1587362", "GSM1587363", "GSM1587364", "GSM1587365",
"GSM1587366", "GSM1587367", "GSM1587368", "GSM1587369", "GSM1587370",
"GSM1587371", "GSM1587372", "GSM1587373", "GSM1587374", "GSM1587375",
"GSM1587376", "GSM1587377"), class = "factor"), cond = structure(c(ADF.AA1.1 = 1L,
ADF.AA1.2 = 1L, ADF.AA2.1 = 1L, ADF.AA2.2 = 1L, ADF.AA3.1 = 1L,
ADF.AA3.2 = 1L, USCf.AN1.1 = 2L, USCf.AN1.2 = 2L, USCf.AN2.1 = 2L,
USCf.AN2.2 = 2L, USCf.AN3.1 = 2L, USCf.AN3.2 = 2L, ASCF.NN1.1 = 2L,
ASCF.NN1.2 = 2L, ASCF.NN2.1 = 2L, ASCF.NN2.2 = 2L), .Label = c("ASD",
"Normal"), class = "factor"), donor = structure(c(ADF.AA1.1 = 1L,
ADF.AA1.2 = 1L, ADF.AA2.1 = 2L, ADF.AA2.2 = 2L, ADF.AA3.1 = 3L,
ADF.AA3.2 = 3L, USCf.AN1.1 = 4L, USCf.AN1.2 = 4L, USCf.AN2.1 = 5L,
USCf.AN2.2 = 5L, USCf.AN3.1 = 6L, USCf.AN3.2 = 6L, ASCF.NN1.1 = 7L,
ASCF.NN1.2 = 7L, ASCF.NN2.1 = 8L, ASCF.NN2.2 = 9L), .Label = c("AA1",
"AA2", "AA3", "AN1", "AN2", "AN3", "NN1", "NN2", "NN3"), class = "factor"),
batch = structure(c(ADF.AA1.1 = 1L, ADF.AA1.2 = 2L, ADF.AA2.1 = 1L,
ADF.AA2.2 = 2L, ADF.AA3.1 = 1L, ADF.AA3.2 = 2L, USCf.AN1.1 = 3L,
USCf.AN1.2 = 3L, USCf.AN2.1 = 3L, USCf.AN2.2 = 4L, USCf.AN3.1 = 3L,
USCf.AN3.2 = 4L, ASCF.NN1.1 = 5L, ASCF.NN1.2 = 5L, ASCF.NN2.1 = 5L,
ASCF.NN2.2 = 5L), .Label = c("1a", "2a", "3a", "3b", "4a"
), class = "factor"), pair = structure(c(1L, 1L, 2L, 2L,
3L, 3L, 1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L, 2L, 2L), .Label = c("1",
"2", "3"), class = "factor")), class = "data.frame", row.names = c("A1.r1",
"A1.r2", "A2.r1", "A2.r2", "A3.r1", "A3.r2", "SC1.r1", "SC1.r2",
"SC2.r1", "SC2.r2", "SC3.r1", "SC3.r2", "ASC1.r1", "ASC1.r2",
"ASC2.r1", "ASC2.r2"))
eset= matrix (rnorm(80),5)*10# test dataset
library (limma)
mm = model.matrix (~0+cond+donor+batch+pair,a)
mc= makeContrasts ('condASD-condNormal', levels=mm)
lf= lmFit (eset, mm )
cf=contrasts.fit (lf, mc)
eb=eBayes (cf)
tt=topTable(eb)
tt
I wondered: 0) Is it a (biologically) common/accepted practice to treat unaffected siblings and age-and sex-matched controls equally?
1) Whether what I coded correctly answer the problem?
2) What is the "practical" meaning of following warning: (I am very uncomfortable with "pair2 pair3" being part of the message! )
Coefficients not estimable: donorNN3 batch3b batch4a pair2 pair3
Warning message:
Partial NA coefficients for 5 probe(s)
3) Is it passable to neglect certain factors - here, pair for example- and proceed with the "less accurate" analysis?
For the benefit of other readers, this is what OP's data.frame looks like:
Thank you. I updated the original question according to your post. PS. I also edited the title.