Question

Pairwise differential expression using limma

0

Entering edit mode

Melissa • 0

@33edf2a1

Last seen 9 months ago

Malaysia

I want to perform pairwise comparisons between the different groups and retrieve only the top/bottom 5 for each comparison. The output (heatmap) did not show any visible demarcation between the groups. Is there something wrong with my code? The input is raw RNA-seq count.

group <- factor(clin.info$subtype, levels=c("1a", "1b", "1c", "2a",
"2b", "2c")) design <- model.matrix(~group) rownames(design) <-
rownames(clin.info) colnames(design) <- c("KIRP1a", "KIRP1b",
"KIRP1c", "KIRP2a", "KIRP2b", "KIRP2c")

mrna.dge <- DGEList(counts=mat, group=group, remove.zeros=T) keep <-
filterByExpr(mrna.dge, design, min.count=50) mrna.dge <-
mrna.dge[keep,,keep.lib.sizes=FALSE] mrna.dge <-
calcNormFactors(mrna.dge)

v <- voom(mrna.dge, design, plot=TRUE) fit <- lmFit(v, design)
contrasts.matrix <- makeContrasts("KIRP1a-KIRP1b", "KIRP1a-KIRP1c","KIRP1a-KIRP2a", "KIRP1a-KIRP2b", "KIRP1a-KIRP2c","KIRP1b-KIRP1c","KIRP1b-KIRP2a", "KIRP1b-KIRP2b", "KIRP1b-KIRP2c","KIRP1c-KIRP2a","KIRP1c-KIRP2b", "KIRP1c-KIRP2c","KIRP2a-KIRP2b","KIRP2a-KIRP2c","KIRP2b-KIRP2c",levels=colnames(design)) 
fit2 <- contrasts.fit(fit, contrasts.matrix) efit <- eBayes(fit2)

Pairwise comparison:

KIRP1a.v.KIRP1b.DEG <- topTable(efit, coef=1, adjust.method="BH", sort.by="p", p.value=0.05)
KIRP1a.v.KIRP1c.DEG <- topTable(efit, coef=2, adjust.method="BH", sort.by="p", p.value=0.05)
KIRP1a.v.KIRP2a.DEG <- topTable(efit, coef=3, adjust.method="BH", sort.by="p", p.value=0.05)
KIRP1a.v.KIRP2b.DEG <- topTable(efit, coef=4, adjust.method="BH", sort.by="p", p.value=0.05)
KIRP1a.v.KIRP2c.DEG <- topTable(efit, coef=5, adjust.method="BH", sort.by="p", p.value=0.05)
KIRP1b.v.KIRP1c.DEG <- topTable(efit, coef=6, adjust.method="BH", sort.by="p", p.value=0.05)
KIRP1b.v.KIRP2a.DEG <- topTable(efit, coef=7, adjust.method="BH", sort.by="p", p.value=0.05)
KIRP1b.v.KIRP2b.DEG <- topTable(efit, coef=8, adjust.method="BH", sort.by="p", p.value=0.05)
KIRP1b.v.KIRP2c.DEG <- topTable(efit, coef=9, adjust.method="BH", sort.by="p", p.value=0.05)
KIRP1c.v.KIRP2a.DEG <- topTable(efit, coef=10, adjust.method="BH", sort.by="p", p.value=0.05)
KIRP1c.v.KIRP2b.DEG <- topTable(efit, coef=11, adjust.method="BH", sort.by="p", p.value=0.05)
KIRP1c.v.KIRP2c.DEG <- topTable(efit, coef=12, adjust.method="BH", sort.by="p", p.value=0.05)
KIRP2a.v.KIRP2b.DEG <- topTable(efit, coef=13, adjust.method="BH", sort.by="p", p.value=0.05)
KIRP2a.v.KIRP2c.DEG <- topTable(efit, coef=14, adjust.method="BH", sort.by="p", p.value=0.05)
KIRP2b.v.KIRP2c.DEG <- topTable(efit, coef=15, adjust.method="BH", sort.by="p", p.value=0.05)

Combine and reorder DEGs:

top.ranked.mrna <- rbind(KIRP1a.v.KIRP1b.DEG, KIRP1a.v.KIRP1c.DEG, KIRP1a.v.KIRP2a.DEG, KIRP1a.v.KIRP2b.DEG, KIRP1a.v.KIRP2c.DEG,KIRP1b.v.KIRP1c.DEG, KIRP1b.v.KIRP2a.DEG, KIRP1b.v.KIRP2b.DEG, KIRP1b.v.KIRP2c.DEG,KIRP1c.v.KIRP2a.DEG, KIRP1c.v.KIRP2b.DEG, KIRP1c.v.KIRP2c.DEG, KIRP2a.v.KIRP2b.DEG, KIRP2a.v.KIRP2c.DEG)
top.ranked.mrna.ordered <- top.ranked.mrna[order(top.ranked.mrna$adj.P.Val),]

Up- and down-regulated genes:

# Upregulated DEGs
deg.up <- top.ranked.mrna[top.ranked.mrna$adj.P.Val < 0.05 & top.ranked.mrna$logFC > 0, ]
deg.up <- deg.up[rownames(deg.up) %in% rownames(mat),]
deg.up <- deg.up[order(deg.up$adj.P.Val),]
deg.up.top <- deg.up[1:10,]

# Downregulated DEGs
deg.down <- top.ranked.mrna[top.ranked.mrna$adj.P.Val < 0.05 & top.ranked.mrna$logFC < 0, ]
deg.down <- deg.down[rownames(deg.down) %in% rownames(mat),]
deg.down <- deg.down[order(deg.down$adj.P.Val),]
deg.down.top <- deg.down[1:10,]

# All DEGs 
all.deg <- rbind(deg.up, deg.down)
all.deg <- mat[rownames(mat) %in% rownames(all.deg),]
all.deg <- cpm(all.deg, log = TRUE)[rownames(all.deg) %in% rownames(top.ranked.mrna.ordered),]
all.deg <- all.deg[order(match(rownames(all.deg),rownames(top.ranked.mrna.ordered))),]

# Subset of DEGs for heatmap (top and bottom 5 of each group)

top.ranked.subset.hyper <- rbind(KIRP1a.v.KIRP1b.DEG[1:5,], KIRP1a.v.KIRP1c.DEG[1:5,], KIRP1a.v.KIRP2a.DEG[1:5,], KIRP1a.v.KIRP2b.DEG[1:5,], KIRP1a.v.KIRP2c.DEG[1:5,],KIRP1b.v.KIRP1c.DEG[1:5,], KIRP1b.v.KIRP2a.DEG[1:5,], KIRP1b.v.KIRP2b.DEG[1:5,], KIRP1b.v.KIRP2c.DEG[1:5,],KIRP1c.v.KIRP2a.DEG[1:5,], KIRP1c.v.KIRP2b.DEG[1:5,], KIRP1c.v.KIRP2c.DEG[1:5,], KIRP2a.v.KIRP2b.DEG[1:5,], KIRP2a.v.KIRP2c.DEG[1:5,])
top.ranked.subset.hyper <-  top.ranked.subset.hyper[order(top.ranked.subset.hyper$adj.P.Val),]


top.ranked.subset.hypo <- rbind(bottom_n(KIRP1a.v.KIRP1b.DEG,5), bottom_n(KIRP1a.v.KIRP1c.DEG,5), bottom_n(KIRP1a.v.KIRP2a.DEG,5), bottom_n(KIRP1a.v.KIRP2b.DEG,5),bottom_n(KIRP1a.v.KIRP2c.DEG,5),bottom_n(KIRP1b.v.KIRP1c.DEG,5), bottom_n(KIRP1b.v.KIRP2a.DEG,5), bottom_n(KIRP1b.v.KIRP2b.DEG,5), bottom_n(KIRP1b.v.KIRP2c.DEG,5),bottom_n(KIRP1c.v.KIRP2a.DEG,5), bottom_n(KIRP1c.v.KIRP2b.DEG,5), bottom_n(KIRP1c.v.KIRP2c.DEG,5), bottom_n(KIRP2a.v.KIRP2b.DEG,5), bottom_n(KIRP2a.v.KIRP2c.DEG,5))
top.ranked.subset.hyper <-  top.ranked.subset.hyper[order(top.ranked.subset.hyper$adj.P.Val),]

top.deg <- rbind(top.ranked.subset.hyper, top.ranked.subset.hypo)
top.deg <- all.deg[rownames(all.deg) %in% rownames(top.deg),]

# Remove duplicate rows
top.deg <- top.deg[!duplicated(top.deg), ]

# Normalize for plotting
norm.deg <- zscore(top.deg, dist="chisq")

Input:

dput(mat[1:20,1:20])
structure(c(2583, 1360, 1722, 2590, 4478, 1564, 463, 2347, 2467, 
7020.91, 3227, 4716, 5240, 2279.04, 2240, 2618, 13148, 2202, 
6013, 1524, 4720, 2013, 2615, 2562, 4518, 1300, 636, 2504, 1937, 
2328.95, 5165, 6354, 4381, 957.21, 3274, 1999, 1467, 10893, 1729, 
1184, 2768, 2388, 3338, 2715, 5936, 2405, 1010, 4546, 3300, 1941, 
7047, 7309, 58, 1509.53, 5776, 2627, 26281, 4210, 2979, 772, 
1689, 1366, 1407, 3010, 4139, 672, 333, 1772, 872, 2005, 1704, 
4440, 1690, 914.06, 1779, 1266, 4715, 1801, 1760, 1228, 3618, 
1928, 2144, 3096, 4825, 1257, 1701, 2586, 4340, 2491.91, 2022, 
3102, 4289, 875.24, 2096, 1918, 1443, 2344, 2843, 4531, 2936, 
1338, 1957, 1180, 5419, 785, 1685, 2939, 1220, 1251.95, 8810, 
4346, 4017, 2240.73, 3202, 1269, 5575, 3407, 1276, 306, 2889, 
2877, 4046, 10246, 10136, 3848.9, 1638, 3368, 4955, 6752.8, 2340, 
2882, 8117, 11895.08, 3060, 8352, 42026, 5959, 2547, 11283, 3441, 
1990, 2177, 3708, 4412, 2445, 1296, 2567, 7109, 2101.96, 3357, 
3033, 23338, 773.54, 2634, 1730, 4076, 12475, 440, 3734, 2354, 
1971, 1954, 3901, 7452, 1621, 1663, 6607, 7042, 2396.97, 2750, 
8182, 32687, 1392.2, 1825, 5934, 5520, 4346, 4304, 3562, 9455, 
3503, 2853, 3180, 7491, 2593, 1132, 3667, 1569, 1982.91, 9183, 
11425, 101, 2704.9, 2063, 4284, 83201, 3977, 1541, 41, 1495, 
2128, 2410, 2823, 4972, 1885, 498, 2198, 1382, 2375.96, 2041, 
2392, 671, 2106.5, 2228, 3549, 1022, 3825, 1466, 530, 1776, 539, 
1314, 1323, 1405, 324, 617, 1799, 267, 607, 508, 3166, 1316, 
609, 1334, 977, 543, 1114, 698, 211, 5089, 1167, 1984, 2649, 
4904, 1999, 1415, 3950, 1005, 2335, 4258, 2551, 5704, 1262.89, 
2473, 1704, 13564, 5657, 1941, 1031, 409, 1626, 2055, 1867, 9169, 
2193, 1051, 5418, 10556, 1471.95, 1919, 2634, 2855, 2077.01, 
3088, 2278, 29390, 5413, 906, 4104, 1908, 1086, 1769, 2562, 5130, 
1483, 340, 2856, 1447, 5589.95, 2492, 3121, 2546, 1867.33, 1807, 
4363, 6303, 1725, 3403, 115, 1980, 3042, 2040, 6031, 8613, 2544, 
2492, 3244, 14574, 5972.64, 4345, 6637, 15937, 6983.94, 3255, 
1764, 20080, 3549, 948, 4593, 3357, 899, 1182, 2393, 3589, 1003, 
3797, 2896, 7163, 1927.93, 2049, 2486, 12389, 1235.82, 2010, 
2006, 1026, 2592, 2966, 5589, 3749, 2139, 2756, 5537, 5421, 2216, 
1638, 3535, 4765, 2116, 2186, 7265, 4608, 3230.19, 4147, 4101, 
5052, 6825, 1134, 2077, 1198, 1942, 3534, 4988, 6122, 4154, 3954, 
3603, 47622, 6097.76, 7273, 11667, 33619, 3647.39, 4561, 4815, 
23369, 12155, 1497, 3239, 4801, 1821, 1992, 5675, 6114, 2866, 
631, 2112, 1125, 7291.51, 7144, 13832, 12439, 730.17, 2610, 6179, 
9147, 13289, 2301, 7482), dim = c(20L, 20L), dimnames = list(
    c("A4GALT", "AACS", "AAGAB", "AAK1", "AAMP", "AASDHPPT", 
    "AASS", "AATF", "ABAT", "ABCA1", "ABCA2", "ABCA3", "ABCB1", 
    "ABCB6", "ABCB8", "ABCC1", "ABCC3", "ABCC4", "ABCC5", "ABCC6"
    ), c("TCGA.2K.A9WE.01", "TCGA.2Z.A9J1.01", "TCGA.2Z.A9J3.01", 
    "TCGA.2Z.A9J5.01", "TCGA.2Z.A9J6.01", "TCGA.2Z.A9J7.01", 
    "TCGA.2Z.A9J8.01", "TCGA.2Z.A9JD.01", "TCGA.2Z.A9JI.01", 
    "TCGA.2Z.A9JJ.01", "TCGA.2Z.A9JO.01", "TCGA.2Z.A9JQ.01", 
    "TCGA.4A.A93W.01", "TCGA.4A.A93X.01", "TCGA.4A.A93Y.01", 
    "TCGA.5P.A9JU.01", "TCGA.5P.A9JY.01", "TCGA.5P.A9KE.01", 
    "TCGA.A4.7288.01", "TCGA.A4.7583.01")))

dput(clin.info[,c("survival", "subtype")][1:20,])
structure(list(survival = c("lts", "lts", "lts", "lts", "lts", 
"lts", "lts", "lts", "lts", "lts", "lts", "lts", "lts", "lts", 
"lts", "non-lts", "lts", "lts", "lts", "lts"), subtype = c("2a", 
"1a", "1b", "1a", "1a", "1b", "2b", "2b", "2a", "1a", "2b", "1b", 
"1c", "2b", "2a", "2b", "1b", "2b", "2b", "1c")), row.names = c("TCGA.Y8.A8S1.01", 
"TCGA.Y8.A8S0.01", "TCGA.Y8.A8RZ.01", "TCGA.Y8.A8RY.01", "TCGA.Y8.A897.01", 
"TCGA.Y8.A896.01", "TCGA.Y8.A895.01", "TCGA.Y8.A894.01", "TCGA.WN.A9G9.01", 
"TCGA.V9.A7HT.01", "TCGA.UZ.A9Q1.01", "TCGA.UZ.A9Q0.01", "TCGA.UZ.A9PZ.01", 
"TCGA.UZ.A9PX.01", "TCGA.UZ.A9PV.01", "TCGA.UZ.A9PU.01", "TCGA.UZ.A9PS.01", 
"TCGA.UZ.A9PR.01", "TCGA.UZ.A9PP.01", "TCGA.UZ.A9PO.01"), class = "data.frame")

Plot:

enter image description here

limma • 722 views

ADD COMMENT • link 10 months ago Melissa • 0

score 1 · Answer 1 · 2023-06-30

1

Entering edit mode

James W. MacDonald 65k

@james-w-macdonald-5106

Last seen 2 hours ago

United States

I didn't read through all that code. But what you are doing doesn't make sense to me. By combining all the results and then filtering on the p-values you are selecting genes that are significant in just one contrast. To be clear, I'm not saying they all come from the same contrast, but instead each gene selected is based on the results from one contrast. Let's say gene X is highly significant in one contrast, and not at all in the others. That won't necessarily show up in your heatmap because it's buried in all those non-significant results.

You could just as easily do what you have done by using an F-test anyway, and it's just one line.

top.genes <- topTable(efit)

ADD COMMENT • link 10 months ago James W. MacDonald 65k

0

Entering edit mode

Hello James, Thank you for your comments. How do I obtain the top 5 up- or down-regulated DEGs for each contrast?

ADD REPLY • link 10 months ago Melissa • 0

0

Entering edit mode

top5 <- lapply(1:ncol(contrasts.matrix), topTable, n = 5)

ADD REPLY • link 10 months ago James W. MacDonald 65k

0

Entering edit mode

Would this make sense?

group <- factor(clin.info$subtype, levels=c("1a", "1b", "1c", "2a", "2b", "2c")) 
batch <- clin.info$admin.batch_number
design <- model.matrix(~0+group+batch) 
rownames(design) <- rownames(clin.info) 

mrna.dge <- DGEList(counts=mat, group=group, remove.zeros=T) 
keep <- filterByExpr(mrna.dge, design, min.count=50) 
mrna.dge <- mrna.dge[keep,,keep.lib.sizes=FALSE] 
mrna.dge <- calcNormFactors(mrna.dge)

v <- voom(mrna.dge, design, plot=TRUE) 
fit <- lmFit(v, design)
contrasts.matrix <- makeContrasts("KIRP1a-KIRP1b", "KIRP1a-KIRP1c","KIRP1a-KIRP2a", "KIRP1a-KIRP2b", "KIRP1a-KIRP2c",
                                  "KIRP1b-KIRP1c","KIRP1b-KIRP2a", "KIRP1b-KIRP2b", "KIRP1b-KIRP2c",
                                  "KIRP1c-KIRP2a","KIRP1c-KIRP2b", "KIRP1c-KIRP2c",
                                  "KIRP2a-KIRP2b","KIRP2a-KIRP2c",
                                  "KIRP2b-KIRP2c",
                                  levels=colnames(design)) 
fit2 <- contrasts.fit(fit, contrasts.matrix) 
efit <- eBayes(fit2)

# Top DEGs
top.genes.all <- topTable(efit, number=Inf, adjust.method="BH", sort.by="F", p.value=0.05)
top.genes.subset <- topTable(efit, number=100, adjust.method="BH", sort.by="F", p.value=0.05)

# All DEGs 
all.deg <- top.genes.all[rownames(top.genes.all) %in% rownames(mat),]
all.deg <- cpm(mat, log = TRUE)[rownames(mat) %in% rownames(top.genes.all),]
all.deg <- all.deg[order(match(rownames(all.deg),rownames(top.genes.all))),]

# Subset of DEGs for plotting
top.deg <- all.deg[rownames(all.deg) %in% rownames(top.genes.subset),]

ADD REPLY • link 10 months ago Melissa • 0