We have quite complex model in the analysis and was wondering about the RUVseq argument "k". Should it be set to 1 or how many in this case below (the vignette isn't really helpful in modifying the example code) ? And is the below right way to implement the W_1 in the model of differential expression analysis? Or how should it be done?
#Ruvseq code
design <- model.matrix(~ time + condition + time:subject.nested + condition:time, data=pData(set))
y <- DGEList(counts=counts(set1), group=coldata$CASE_CONTROL)
y <- calcNormFactors(y, method="upperquartile")
y <- estimateGLMCommonDisp(y, design)
y <- estimateGLMTagwiseDisp(y, design)
fit <- glmFit(y, design)
lrt <- glmLRT(fit, coef=35:36)
top <- topTags(lrt, n=nrow(set1))$table
empirical <- rownames(set1)[which(!(rownames(set1) %in% rownames(top)[1:5000]))]
set2 <- RUVg(set, empirical, k=1)
#Code for differential expression
full <- model.matrix(~ W_1+ time + condition + time:subject.nested + condition:time, pData(set2))
reduced <- model.matrix(~ W_1 + time + condition + time:subject.nested, pData(set2))
ddsFullCountTable <- DESeqDataSetFromMatrix(countData = counts(set2),colData = pData(set2), design = ~ 1 )
ddsFullCountTable <- ddsFullCountTable[ rowSums(counts(ddsFullCountTable)) > 1, ]
keep <- rowSums(counts(ddsFullCountTable) >= 5) >= 5
ddsFullCountTable <- ddsFullCountTable[keep,]
dds <- DESeq(ddsFullCountTable, test="LRT", full=full, reduced=reduced)