Hi,
I have RNAseq from two cancer+normal cases. I used salmon and proceeded with DESeq2. I do get the ./results/ directory out but the PDF figures are broken. The error message for example in boxplot.ENSG00000077454.15.pdf states: "Error using packet 1 arguments must have same length". The X-axis is labeled "factor", Y-axis is labeled "ENSG.... normalized counts".
$ cat samples.txt
pop center assay sample type patient experiment topic quant_file
1 lab KAPA_stranded_RNAseq PleioAdenVz1 cancer patient1 PleiomorphicAdenoma1_S1 patient1_cancer ./PleioAdenVz1/PleiomorphicAdenoma1_S1.gencode_v28.salmon_quant/quant.sf
2 lab KAPA_stranded_RNAseq PleioAdenVz2 normal patient1 PleiomorphicAdenoma2_S2 patient1_normal ./PleioAdenVz2/PleiomorphicAdenoma2_S2.gencode_v28.salmon_quant/quant.sf
3 lab KAPA_stranded_RNAseq PleioAdenVz9 cancer patient2 PleiomorphicAdenoma9_S3 patient2_cancer ./PleioAdenVz9/PleiomorphicAdenoma9_S3.gencode_v28.salmon_quant/quant.sf
4 lab KAPA_stranded_RNAseq PleioAdenVz10 normal patient2 PleiomorphicAdenoma10_S4 patient2_normal ./PleioAdenVz10/PleiomorphicAdenoma10_S4.gencode_v28.salmon_quant/quant.sf
$
I did something like the following:
$ for d in PleioAdenVz1 PleioAdenVz2 PleioAdenVz9 PleioAdenVz10; do
pushd $d
salmon quant --numThreads $threads --validateMappings --writeUnmappedNames -i ftp.ensembl.org/pub/release-92/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all -l ISF -1 <(gunzip -c *.fwd_reads.gz) -2 <(gunzip -c *.rev_reads.gz) -o ./${d}.salmon_quant
popd
done
$
I see I should have used --gencode argument because later tximport broke on the extra bars delimiting gene aliases:
[ENST00000456328.2|ENSG00000223972.5|OTTHUMG00000000961.2|OTTHUMT00000362751.1|RP11-34P13.1-002|DDX11L1|1657|processed_transcript|, ENST00000450305.2|ENSG00000223972.5|OTTHUMG00000000961.2|OTTHUMT00000002844.2|RP11-34P13.1-001|DDX11L1|632|transcribed_unprocessed_pseudogene|, ENST00000488147.1|ENSG00000227232.5|OTTHUMG00000000958.1|OTTHUMT00000002839.1|RP11-34P13.2-001|WASH7P|1351|unprocessed_pseudogene|, ...]
# if somebody forgot to run salmon with --gencode here is a workaround
txi <- tximport(files, type = "salmon", tx2gene = tx2gene, ignoreAfterBar = TRUE)
I think I am still doing the right thing so far.
library(tximportData)
library(GenomicFeatures)
txdb <- makeTxDbFromGFF(file="ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_28/gencode.v28.annotation.gff3.gz")
txdb
saveDb(x=txdb, file = "gencode.v28.annotation.TxDb")
k <- keys(txdb, keytype = "TXNAME")
tx2gene <- select(txdb, k, "GENEID", "TXNAME")
head(k)
head(tx2gene)
dim(tx2gene)
length(k)
dir <- '.'
samples <- read.table(file.path(dir, "samples.txt"), header = TRUE)
samples
files <- file.path(samples$quant_file)
names(files) <- samples$run
all(file.exists(files))
files
library(tximport)
# work around
# [ENST00000456328.2|ENSG00000223972.5|OTTHUMG00000000961.2|OTTHUMT00000362751.1|RP11-34P13.1-002|DDX11L1|1657|processed_transcript|, ENST00000450305.2|ENSG00000223972.5|OTTHUMG00000000961.2|OTTHUMT00000002844.2|RP11-34P13.1-001|DDX11L1|632|transcribed_unprocessed_pseudogene|, ENST00000488147.1|ENSG00000227232.5|OTTHUMG00000000958.1|OTTHUMT00000002839.1|RP11-34P13.2-001|WASH7P|1351|unprocessed_pseudogene|, ...]
# if somebody forgot to run salmon with --gencode
txi <- tximport(files, type = "salmon", tx2gene = tx2gene, ignoreAfterBar = TRUE)
names(txi)
library(DESeq2)
dds <- DESeqDataSetFromTximport(txi, colData = samples, design = ~ type + patient)
library(RColorBrewer)
dds <- DESeq(dds)
resultsNames(dds)
# now the shaky part
# dds <- DESeqDataSetFromTximport(txi, colData = samples, design = ~ type + patient + type:patient)
#dds <- DESeqDataSetFromTximport(txi, colData = samples, design = ~ type)
dds$type <- relevel(dds$type, ref="normal")
dds$IndividualN <- factor(c(1,1,2,2))
dds <- DESeq(dds)
#res <- results(dds,contrast=list("type","cancer","normal"))
res <- results(dds,contrast=c("type","cancer","normal"))
library(ReportingTools)
desReport <- HTMLReport(shortName = "RNAseq_analysis_with_DESeq",
title = "Results of differential expression analysis using DESeq2 (cancer vs. normal)",
reportDirectory = "./reports")
publish(res, desReport, DataSet = dds, n = 50, pvalueCutoff = 0.01, lfc = 1,
make.plots = TRUE, factor = dds$dex)
finish(desReport)
As you can see I tried several DESeqDataSetFromTximport alternatives but I always ended up with no result. Well, the page ./reports/RNAseq_analysis_with_DESeq.html does show p-values but can I trust that if elsewhere is silently broke? I haven't seen an error from DESeq2 in my R session.
ENSG00000077454.15 | mini.ENSG00000077454.15.png | -7.08 | 1.23e-38 | 4.09e-35 |
ENSG00000114098.17 | mini.ENSG00000114098.17.png | -3.58 | 8.59e-13 | 5.72e-10 |
ENSG00000119321.8 | mini.ENSG00000119321.8.png | -5.37 | 1.56e-47 |
1.30e-43
|
Thank you for your help.