Hi! I did the code for exactTest and for glmQLFTest for my 153 samples. I used raw counts (DGE) for the DE analysis. Resuts from the exactTest: 10 genes with FDR<0.05 and 1651 genes with p-value< 0.05. With glmQLFTest results, all the p-values are >= 0.99 and all the FDR values are =1. Why are the results so different?
glm approach
library(edgeR)
library(tidyverse)
library(readxl)
library(writexl)
setwd("C:/Users/catel/Documents/DBT_DEG/Analysis_CC")
# read the phenotype features
data <- read.csv("C:/Users/catel/Documents/DBT_DEG/Analysis_CC/Data/Case_Control_Phenotype_Features_NA_BMI_excluded_cellCounts.csv")
group <- as.factor(data$T2D)
age <-as.numeric(data$Age_exact)
sex <-as.factor(data$Sex)
BMI <-as.numeric(gsub(",",".",data$BMI))
cd8t <- as.numeric(data$CD8T)
cd4t <- as.numeric(data$CD4T)
nk <- as.numeric(data$NK)
bcell <- as.numeric(data$Bcell)
mono <- as.numeric(data$Mono)
gran <- as.numeric(data$Gran)
x <-read.csv("C:/Users/catel/Documents/DBT_DEG/Analysis_CC/Data/case_control_gene_counts_filtered_gene_name_type_chr_NA_BMI_excluded_cellCounts.csv")
# gene counts is a csv file obtained from FeatureCounts: first column contains the ENSids (from row 2), column two has in row 1 the sample ID and the rest are the counts for each ENSid.
y <- DGEList(counts=x,group=group)
y <- normLibSizes(y)
keep <- filterByExpr(y, group = group, min.prop = 0.8, min.count = 1)
table(keep)
y <- y[keep, , keep.lib.sizes=FALSE]
y <- normLibSizes(y)
design <- model.matrix(~group + sex + age + BMI + cd8t + cd4t + nk + bcell + mono+ gran)
y <- estimateDisp(y, design)
fit <- glmQLFit(y,design)
qlf <- glmQLFTest(fit,coef=2)
p_values <- qlf$table$PValue
FDR_values <- p.adjust(p_values, method = "BH")
FDR_results <- data.frame(FDR = FDR_values)
all_results_with_FDR <- cbind(qlf, FDR_results)
classical approach
setwd("C:/Users/catel/Documents/DBT_DEG/Analysis_CC")
library("edgeR")
library(tidyr)
data <- read.csv("C:/Users/catel/Documents/DBT_DEG/Analysis_CC/Data/Case_Control_Phenotype_Features_NA_BMI_excluded_cellCounts.csv")
#group include individuals with T2D (YES) and non-diabetics (NO)
group <- as.factor(data$T2D)
age <-as.numeric(data$Age_exact)
sex <-as.factor(data$Sex)
BMI <-as.numeric(gsub(",",".",data$BMI))
cd8t <- as.numeric(data$CD8T)
cd4t <- as.numeric(data$CD4T)
nk <- as.numeric(data$NK)
bcell <- as.numeric(data$Bcell)
mono <- as.numeric(data$Mono)
gran <- as.numeric(data$Gran)
x <-read.csv("C:/Users/catel/Documents/DBT_DEG/Analysis_CC/Data/case_control_gene_counts_filtered_gene_name_type_chr_NA_BMI_excluded_cellCounts.csv")
# gene counts is a csv file obtained from FeatureCounts: first column contains the ENSids (from row 2), column two has in row 1 the sample ID and the rest are the counts for each ENSid.
y <- DGEList(counts=x,group=group)
# Normalizes libraries
y <- calcNormFactors(y)
#Filtration
# this calculates number of genes that fulfill the conditions of cpm >=1 and >= 80% of samples
keep <- filterByExpr(y, group = group, min.prop = 0.8, min.count = 1)
table(keep)
# Apply filtration, add the logical vectors and sort out everything that is false
y <- y[keep, , keep.lib.sizes=FALSE]
#Recalculate library sizes after values are filtered out
y <- calcNormFactors(y)
#Estimate the dispersion to model the biological and technical variability in your data
design <- model.matrix(~group+sex+age+BMI+cd8t+cd4t+nk+bcell+mono+gran)
y <- estimateDisp(y, design)
# Perform exact testing for differential expression
et <- exactTest(y)
# Differential Expression Analysis:
write.table(topTags(et, 65000L), file = "C:/Users/catel/Documents/DBT_DEG/Analysis_CC/Results/Age_sex_BMI_cellCounts_filtrerGene_c_approach/outputfile_A1_BMI_DEG.csv")
results <-read.table("C:/Users/catel/Documents/DBT_DEG/Analysis_CC/Results/Age_sex_BMI_cellCounts_filtrerGene_c_approach/outputfile_A1_BMI_DEG.csv")
write.table(topTags(et, 65000L), file = "C:/Users/catel/Documents/DBT_DEG/Analysis_CC/Results/Age_sex_BMI_cellCounts_filtrerGene_c_approach/outputfile_A1_DEG.txt", row.names = TRUE, col.names=TRUE)
results <- read.table("C:/Users/catel/Documents/DBT_DEG/Analysis_CC/Results/Age_sex_BMI_cellCounts_filtrerGene_c_approach/outputfile_A1_DEG.txt", header = TRUE)
results$FDR<-p.adjust(results$PValue,"fdr")
write.table(results$FDR, "C:/Users/catel/Documents/DBT_DEG/Analysis_CC/Results/Age_sex_BMI_cellCounts_filtrerGene_c_approach/outputfile_A1_BMI_FDR.csv")
Best Regards Francesca