Question

ribosome profiling and rna seq data with Deseq2 package

2

Entering edit mode

ecekartal89 ▴ 20

@ecekartal89-9834

Last seen 2.2 years ago

Germany

Hi all,

I have a dataset and people who worked with that dataset used Deseq and now I want to use Deseq2 and compare the results. However I am a bit confused. I have RNA seq and ribosome profiling data from same tissue. How I can use Deseq2 for ribosome profiling data. Do I need to add some normalisation steps to combine RNA-seq and ribosome profiling data?

> exprTable <- data.frame(L061ribo=L061_ribo$V3, L062ribo=L062_ribo$V3, L063ribo=L063_ribo$V3,
+                         L241ribo=L241_ribo$V3, L242ribo=L242_ribo$V3, L243ribo=L243_ribo$V3,
+                         L061mrna=L061_mRNA$V3, L062mrna=L062_mRNA$V3, L063mrna=L063_mRNA$V3,
+                         L241mrna=L241_mRNA$V3, L242mrna=L242_mRNA$V3, L243mrna=L243_mRNA$V3,
+                         row.names=L061_mRNA$V1)
> 
> allSamples = c("L061mrna","L061ribo", "L062mrna", "L062ribo", "L063mrna",
+                "L063ribo", "L241mrna", "L241ribo", "L242mrna", "L242ribo",
+                "L243mrna", "L243ribo")
> 
> mrnaSamples = c("L061mrna", "L062mrna", "L063mrna", "L241mrna", "L242mrna", "L243mrna")
> 
> conditions<-data.frame(
+   biol	=factor(c("mrna", "ribo", "mrna", "ribo","mrna", "ribo", "mrna", "ribo", "mrna", "ribo","mrna", "ribo")),
+   age		=factor(c("06", "06","06", "24", "24", "24","06", "06","06", "24", "24", "24")),
+   row.names=allSamples
+ )
> 
> 
> #1. countData           # read counts for all the samples
> #2. SampleTable                  #  sample names and their condition status
> exprAll<-exprTable[,allSamples]
> means<-rowMeans(exprAll[,mrnaSamples])
> countData<-exprAll[means>=100,]
> head(countData)
                   L061mrna L061ribo L062mrna L062ribo L063mrna L063ribo L241mrna L241ribo L242mrna L242ribo L243mrna
ENSRNOT00000045657      910      228      904      340      890      422     1005      208      918      220     1166
ENSRNOT00000061299      920      757     1635     1503     1501     1956     1392      719     1273     1036     1620
ENSRNOT00000057034     1436     1234     1392     1400     1485     1753     1895      986     1855     1309     2335
ENSRNOT00000047071      189      327      169      510      172      446      266      263      214      301      535
ENSRNOT00000005653      325      296      332      443      322      503      414      309      341      307      412
ENSRNOT00000025255      478      236      380      239      349      370      589      280      509      383      580
                   L243ribo
ENSRNOT00000045657      344
ENSRNOT00000061299     1174
ENSRNOT00000057034     1803
ENSRNOT00000047071      569
ENSRNOT00000005653      448
ENSRNOT00000025255      501
> ```
Error: attempt to use zero-length variable name
> 
> ```{r DeSeq2 analysis}
Error: attempt to use zero-length variable name
> dds <- DESeqDataSetFromMatrix(countData = countData, colData = conditions, design = ~ biol)
> dds <- DESeq(dds)
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
> result_age <- results(dds) ## contrast specifies conditions to be tested
> res<- result_age[complete.cases(result_age),] ## to remove rows with NA
>   mcols(res, use.names=TRUE)
DataFrame with 6 rows and 2 columns
                       type                               description
                <character>                               <character>
baseMean       intermediate mean of normalized counts for all samples
log2FoldChange      results log2 fold change (MAP): biol ribo vs mrna
lfcSE               results         standard error: biol ribo vs mrna
stat                results         Wald statistic: biol ribo vs mrna
pvalue              results      Wald test p-value: biol ribo vs mrna
padj                results                      BH adjusted p-values
>   summary(res)

out of 8908 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)     : 3391, 38% 
LFC < 0 (down)   : 3330, 37% 
outliers [1]     : 0, 0% 
low counts [2]   : 0, 0% 
(mean count < 35)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results

> ####################################################################################
>   #lower the false discovery rate threshold (the threshold on padj in the results table)
>   #raise the log2 fold change threshold from 0 using the lfcThreshold argument of results
>   res.05 <- results(dds, alpha=.05)
>   table(res.05$padj < .05)

FALSE  TRUE 
 2526  6382 
>   
>   # genes that show significant effects of treatment on gene counts more than doubling or less than halving
>   resLFC1 <- results(dds, lfcThreshold=1)
>   table(resLFC1$padj < 0.1)

FALSE  TRUE 
 8003   905 
>   
>   #Multiple testing
>   sum(res$pvalue < 0.05, na.rm=TRUE)
[1] 6534
>   sum(!is.na(res$pvalue))
[1] 8908
>   
>   sum(res$padj < 0.1, na.rm=TRUE)
[1] 6721
>   
>   #subset the results table to these genes and then sort it by the log2 fold change estimate to get the significant genes with the strongest down-regulation:
>   resSig <- subset(res, padj < 0.1)
>   head(resSig[ order(resSig$log2FoldChange), ])
log2 fold change (MAP): biol ribo vs mrna 
Wald test p-value: biol ribo vs mrna 
DataFrame with 6 rows and 6 columns
                    baseMean log2FoldChange     lfcSE      stat       pvalue         padj
                   <numeric>      <numeric> <numeric> <numeric>    <numeric>    <numeric>
ENSRNOT00000022122  375.8005      -8.617070 0.4611512 -18.68600 6.436621e-78 9.885762e-76
ENSRNOT00000041959  221.1546      -8.029477 0.4685828 -17.13566 8.043061e-66 7.541851e-64
ENSRNOT00000048151  202.3702      -7.880453 0.4742099 -16.61807 5.156761e-62 4.029511e-60
ENSRNOT00000008094  389.2988      -7.872536 0.3874021 -20.32136 8.324137e-92 2.180924e-89
ENSRNOT00000037432  270.4202      -7.456887 0.3836682 -19.43577 3.846162e-84 7.704694e-82
ENSRNOT00000050722  148.3404      -7.266285 0.5013328 -14.49393 1.323404e-47 5.953983e-46
>   
>   #significant genes with the strongest up-regulation:
>   head(resSig[ order(resSig$log2FoldChange, decreasing=TRUE), ])
log2 fold change (MAP): biol ribo vs mrna 
Wald test p-value: biol ribo vs mrna 
DataFrame with 6 rows and 6 columns
                    baseMean log2FoldChange      lfcSE      stat        pvalue          padj
                   <numeric>      <numeric>  <numeric> <numeric>     <numeric>     <numeric>
ENSRNOT00000050750 1376.2906       4.755635 0.18104828  26.26722 4.544604e-152 5.783333e-149
ENSRNOT00000034797 1358.0869       4.671882 0.22111888  21.12837  4.363183e-99  1.439527e-96
ENSRNOT00000065232  705.1629       4.053485 0.26129942  15.51280  2.842252e-54  1.622999e-52
ENSRNOT00000056004 1128.9684       3.453405 0.09453835  36.52914 3.823170e-292 3.405679e-288
ENSRNOT00000063903 1383.1973       3.443192 0.11967516  28.77115 4.925776e-182 7.313135e-179
ENSRNOT00000058371 1128.5519       3.424241 0.10957817  31.24930 2.281797e-214 5.081561e-211

deseq2 • 2.8k views

ADD COMMENT • link updated 8.7 years ago by Bernd Klaus ▴ 610 • written 8.7 years ago by ecekartal89 ▴ 20

score 2 · Answer 1 · 2016-03-08

2

Entering edit mode

Bernd Klaus ▴ 610

@bernd-klaus-6281

Last seen 6.1 years ago

Germany

Your interested in a ratio of ratios, so you need to test the interaction, not the main effects.

I think this post by mike describes exactly what you need:

DESeq2 testing ratio of ratios (RIP-Seq, CLIP-Seq)

where "condition" would be age, and "assay" would be "biol" in your case.

ADD COMMENT • link 8.7 years ago Bernd Klaus ▴ 610

score 1 · Answer 2 · 2016-03-02

hi,

So the way you've set it up is to compare ribosomal profiled samples to mRNA-seq directly. This will generate many differences obviously, but I don't know how informative this is given the likely large number of differences attributable to technical artifacts as well.

The typical experimental design I see with ribosomal profiling is to have ribosomal profiling + mRNA-seq sample both for control and treated groups, and then to ask, for which genes is the RP/mRNA ratio different across treatment.

This gives you more precise information than just: RP vs mRNA. Does that make sense? Is this the kind of results you are interested in here (e.g. across age)?

score 0 · Answer 3 · 2016-03-08

Hi Michael,

Thank you for your answer, yes, I want to see which genes with RP/mRNA ratio different across age. I have tried to use two conditions design from DeSeq2. Does it make sense?

> dds<-DESeqDataSetFromMatrix(countData=countData,colData=conditions,design = ~ biol + age + biol:age)
> dds <- DESeq(dds) 
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
> resultsNames(dds)
[1] "Intercept"         "biol_ribo_vs_mrna" "age_24_vs_06"      "biolribo.age24"   
> results(dds, contrast=c("age","06","24"))
log2 fold change (MLE): age 06 vs 24 
Wald test p-value: age 06 vs 24 
DataFrame with 8915 rows and 6 columns
                       baseMean log2FoldChange     lfcSE        stat     pvalue      padj
                      <numeric>      <numeric> <numeric>   <numeric>  <numeric> <numeric>
ENSRNOT00000045657     537.9098      0.1928019 0.1009639    1.909613 0.05618307 0.2223802
ENSRNOT00000061299    1304.8366      0.3117206 0.2438912    1.278113 0.20120958 0.4704225
ENSRNOT00000057034    1586.0504     -0.1120806 0.1030448   -1.087688 0.27673280 0.5569152
ENSRNOT00000047071     365.6962     -0.5205676 0.2675443   -1.945725 0.05168783 0.2125153
ENSRNOT00000005653     393.5829      0.1302588 0.1174897    1.108682 0.26756724 0.5467488
...                         ...            ...       ...         ...        ...       ...
ENSRNOT00000008637     89.05830    -0.25725925 0.3354751 -0.76685046  0.4431704 0.7022955
ENSRNOT00000023902    623.09212    -0.26311254 0.1634052 -1.61018472  0.1073575 0.3304994
ENSRNOT00000009726     95.18830     0.03859076 0.1986072  0.19430691  0.8459356 0.9350161
ENSRNOT00000046811 312142.02826    -0.10422536 0.2091783 -0.49826090  0.6183002 0.8204988
ENSRNOT00000061269     96.52924     0.02154499 0.3459115  0.06228468  0.9503361 0.9803039
> results(dds, contrast=c("biol","mrna","ribo"))
log2 fold change (MLE): biol mrna vs ribo 
Wald test p-value: biol mrna vs ribo 
DataFrame with 8915 rows and 6 columns
                       baseMean log2FoldChange     lfcSE       stat       pvalue         padj
                      <numeric>      <numeric> <numeric>  <numeric>    <numeric>    <numeric>
ENSRNOT00000045657     537.9098      0.6693836 0.1080685   6.194071 5.862998e-10 2.607306e-09
ENSRNOT00000061299    1304.8366     -0.7947959 0.2439649  -3.257829 1.122682e-03 2.147666e-03
ENSRNOT00000057034    1586.0504     -0.8235605 0.1037262  -7.939756 2.025794e-15 1.606023e-14
ENSRNOT00000047071     365.6962     -2.0798270 0.2666884  -7.798715 6.254090e-15 4.738737e-14
ENSRNOT00000005653     393.5829     -1.1285925 0.1171884  -9.630585 5.939017e-22 8.300639e-21
...                         ...            ...       ...        ...          ...          ...
ENSRNOT00000008637     89.05830     2.02859540 0.3765742  5.3869743 7.165366e-08 2.474525e-07
ENSRNOT00000023902    623.09212     0.04871822 0.1667360  0.2921877 7.701431e-01 8.071318e-01
ENSRNOT00000009726     95.18830     0.05892242 0.2145894  0.2745822 7.836373e-01 8.188592e-01
ENSRNOT00000046811 312142.02826     0.47050435 0.2091847  2.2492298 2.449788e-02 3.736396e-02
ENSRNOT00000061269     96.52924    -0.07780012 0.3529177 -0.2204484 8.255220e-01 8.556021e-01
> results(dds, list( c("age_24_vs_06","biolribo.age24") ))
log2 fold change (MLE): age_24_vs_06+biolribo.age24 effect 
Wald test p-value: age_24_vs_06+biolribo.age24 effect 
DataFrame with 8915 rows and 6 columns
                       baseMean log2FoldChange     lfcSE       stat      pvalue       padj
                      <numeric>      <numeric> <numeric>  <numeric>   <numeric>  <numeric>
ENSRNOT00000045657     537.9098    -0.25897003 0.1169584 -2.2142063  0.02681459  0.1955290
ENSRNOT00000061299    1304.8366    -0.38045511 0.2444221 -1.5565494  0.11957750  0.4184528
ENSRNOT00000057034    1586.0504    -0.02413684 0.1039528 -0.2321904  0.81639017  0.9425913
ENSRNOT00000047071     365.6962    -0.13695779 0.2629600 -0.5208313  0.60248426  0.8469789
ENSRNOT00000005653     393.5829    -0.11819660 0.1166615 -1.0131586  0.31098443  0.6438824
...                         ...            ...       ...        ...         ...        ...
ENSRNOT00000008637     89.05830     0.93810175 0.3942162  2.3796630 0.017328475 0.15896157
ENSRNOT00000023902    623.09212     0.07080414 0.1691422  0.4186072 0.675503233 0.88363287
ENSRNOT00000009726     95.18830     0.62994877 0.2185952  2.8818055 0.003954038 0.07097338
ENSRNOT00000046811 312142.02826    -0.10766766 0.2091914 -0.5146848 0.606773265 0.85043373
ENSRNOT00000061269     96.52924    -0.40859841 0.3634968 -1.1240772 0.260980289 0.59863824
> results(dds, name = "biolribo.age24")
log2 fold change (MLE): biolribo.age24 
Wald test p-value: biolribo.age24 
DataFrame with 8915 rows and 6 columns
                       baseMean log2FoldChange     lfcSE        stat     pvalue      padj
                      <numeric>      <numeric> <numeric>   <numeric>  <numeric> <numeric>
ENSRNOT00000045657     537.9098    -0.06616808 0.1545088 -0.42824794 0.66847062 0.9234757
ENSRNOT00000061299    1304.8366    -0.06873455 0.3452899 -0.19906334 0.84221320 0.9712126
ENSRNOT00000057034    1586.0504    -0.13621747 0.1463708 -0.93063257 0.35204366 0.7725624
ENSRNOT00000047071     365.6962    -0.65752537 0.3751372 -1.75275993 0.07964322 0.4566813
ENSRNOT00000005653     393.5829     0.01206220 0.1655710  0.07285215 0.94192377 0.9920722
...                         ...            ...       ...         ...        ...       ...
ENSRNOT00000008637     89.05830      0.6808425 0.5176388   1.3152850 0.18841413 0.6238519
ENSRNOT00000023902    623.09212     -0.1923084 0.2351815  -0.8177021 0.41352734 0.8157842
ENSRNOT00000009726     95.18830      0.6685395 0.2953450   2.2635884 0.02359944 0.2797681
ENSRNOT00000046811 312142.02826     -0.2118930 0.2958320  -0.7162612 0.47383007 0.8555158
ENSRNOT00000061269     96.52924     -0.3870534 0.5017814  -0.7713586 0.44049439 0.8322839
> result_age <- results(dds) ## contrast specifies conditions to be tested
> res<- result_age[complete.cases(result_age),] ## to remove rows with NA
> mcols(res, use.names=TRUE)
DataFrame with 6 rows and 2 columns
                       type                               description
                <character>                               <character>
baseMean       intermediate mean of normalized counts for all samples
log2FoldChange      results    log2 fold change (MLE): biolribo.age24
lfcSE               results            standard error: biolribo.age24
stat                results            Wald statistic: biolribo.age24
pvalue              results         Wald test p-value: biolribo.age24
padj                results                      BH adjusted p-values
> summary(res)

out of 8903 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)     : 177, 2% 
LFC < 0 (down)   : 70, 0.79% 
outliers [1]     : 0, 0% 
low counts [2]   : 0, 0% 
(mean count < 35)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results