**0**wrote:

I'm trying to use DESeq2 to test if hybrid > max(parent1, parent2) | hybrid < min(parent1, parent2), where parent1, parent2, and hybrid are three genetic varieties. The DESeq2 framework doesn't directly do this kind of hypothesis test, so I test a bunch of contrasts and then pick the p-value for the appropriate one for each gene. I don't really need an exact p-value, I just need some way to rank the genes.

The problem is that all the adjusted p-values returned by DESeq are 1 and I don't know why. I include a minimal working example below.

library(DESeq2)

# Toy setting with 1000 genes, 3 groups, and 4 replicates per group.

genes = 1000

groups = 3

replicates = 4

# Generate the count table. This is a terrible simulation in

# general, but all I'm trying to do is check my usage of DESeq2.

lambda = rgamma(genes*groups*replicates, 200, 5)

counts = matrix(rpois(genes*groups*replicates, lambda), nrow = genes, ncol = groups*replicates)

# The colData objecct has the experimental design. The three groups (genetic varieties)

# are parent1, parent2, and hybrid. My goal is to find out if hybrid > max(parent1, parent2)

# or hybrid < min(parent1, parent2) in terms of gene expression.

colData = DataFrame(row.names=LETTERS[1:(3*replicates)],

Treatment = factor(rep(c("parent1", "parent2", "hybrid"), each = replicates))

)

# Fit the DESeq model.

se = SummarizedExperiment(assays = SimpleList(counts = counts), colData = colData)

dds <- DESeqDataSet(se = se, design = ~ Treatment - 1)

dds <- DESeq(dds, betaPrior = F)

# For each gene, I extract the DESeq adjusted p-values for the four hypothesis

# tests below and then choose one based on the DESeq2 model coefficients.

# p-values for testing if parent1 > hybrid

p1.g.h = results(dds, lfcThreshold = dim(counts)[1], altHypothesis = "greater",

contrast = c(Treatmenthybrid = -1, Treatmentparent1 = 1, Treatmentparent2 = 0))$padj

# p-values for testing if hybrid > parent2

h.g.p2 = results(dds, lfcThreshold = dim(counts)[1], altHypothesis = "less",

contrast = c(Treatmenthybrid = -1, Treatmentparent1 = 0, Treatmentparent2 = 1))$padj

# p-values for testing if parent2 > hybrid

p2.g.h = results(dds, lfcThreshold = dim(counts)[1], altHypothesis = "greater",

contrast = c(Treatmenthybrid = -1, Treatmentparent1 = 0, Treatmentparent2 = 1))$padj

# p-values for testing if hybrid > parent1

h.g.p1 = results(dds, lfcThreshold = dim(counts)[1], altHypothesis = "less",

contrast = c(Treatmenthybrid = -1, Treatmentparent1 = 1, Treatmentparent2 = 0))$padj

# DESeq2 model coefficients.

cf = coef(dds)

p1 = cf[, "Treatmentparent1"]

p2 = cf[, "Treatmentparent2"]

h = cf[, "Treatmenthybrid"]

# For each gene, I use the coefficients below to pick which p-value to use.

df = cbind(p1, p2, h, p1.g.h, p2.g.h, h.g.p1, h.g.p2)

my_pvals = apply(df, 1, function(x){

if((min(x["p1"], x["p2"]) <= x["h"]) && (x["h"] <= max(x["p1"], x["p2"])))

return(0)

else if(x["h"] > x["p1"] && (x["p1"] >= x["p2"]))

return(x["h.g.p1"])

else if(x["h"] > x["p2"] && (x["p2"] >= x["p1"]))

return(x["h.g.p2"])

else if(x["h"] < x["p1"] && (x["p1"] <= x["p2"]))

return(x["p1.g.h"])

else if(x["h"] < x["p2"] && (x["p2"] <= x["p1"]))

return(x["p2.g.h"])

else

return(1)

})

# All p-values are either 0 or 1. Why is that?

table(my_pvals)

# And all the adjusted p-values are 1

table(p1.g.h)

table(p2.g.h)

table(h.g.p1)

table(h.g.p2)