Hi,
I am trying to analyse and RNASeq data set using DeSeq2. I would like to have the annotated genes in heat maps and also for making a HTML report using ReportingTools, but I am not sure how to pass an already annotated matrix to DESeqDataSetFromMatrix() or how to annotate the results after I call results() on the DeSeq object.
Here are my code and the output:
conddata<-read.csv("Pheno_data_clean.csv", row.names = 1)
conddata
count_matrix<-read.table('Summary/Raw_data_for_genes.csv', sep=',', header=TRUE, row.names = 1)
rawsamples<-c("JK1","JK2","JK3","neg1","neg2","neg3","pos1","pos2","pos3")
count_matrix<-count_matrix[,rawsamples]
annot_matrix<-count_matrixt[,c('EntrezGene','JK1', 'JK2', 'JK3', 'neg1', 'neg2', 'neg3', 'pos1', 'pos2', 'pos3' )]
data_counts<-DESeqDataSetFromMatrix(countData = count_matrix,
colData = conddata,
design = ~Treatment)
data_counts$EntrezGene<-annot_matrix$EntrezGene
data_counts$Treatment <- relevel(data_counts$Treatment, ref="Control")
head(data_counts)
#Perform differential expression analysis
library("AnnotationDbi")
library("org.Ce.eg.db")
data_counts<- DESeq(data_counts)
res<-results(data_counts)
columns(org.Ce.eg.db)
res$symbol <- mapIds(org.Ce.eg.db,
keys=row.names(res),
column="SYMBOL",
keytype="ENSEMBLTRANS",
multiVals="first")
res$entrez <- mapIds(org.Ce.eg.db,
keys=row.names(res),
column="ENTREZID",
keytype="ENSEMBL",
multiVals="first")
rownames(data_counts)
> conddata<-read.csv("Pheno_data_clean.csv", row.names = 1)
> conddata
Strain Treatment
JK1 JK1 PDXJK
JK2 JK2 PDXJK
JK3 JK3 PDXJK
neg1 neg1 Control
neg2 neg2 Control
neg3 neg3 Control
pos1 pos1 UV
pos2 pos2 UV
pos3 pos3 UV
> count_matrix<-read.table('Summary/Raw_data_for_genes.csv', sep=',', header=TRUE, row.names = 1)
> rawsamples<-c("JK1","JK2","JK3","neg1","neg2","neg3","pos1","pos2","pos3")
> count_matrix<-count_matrix[,rawsamples]
> annot_matrix<-count_matrixt[,c('EntrezGene','JK1', 'JK2', 'JK3', 'neg1', 'neg2', 'neg3', 'pos1', 'pos2', 'pos3' )]
> data_counts<-DESeqDataSetFromMatrix(countData = count_matrix,
+ colData = conddata,
+ design = ~Treatment)
> data_counts$Treatment <- relevel(data_counts$Treatment, ref="Control")
> library("AnnotationDbi")
> library("org.Ce.eg.db")
> data_counts<- DESeq(data_counts)
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
> res<-results(data_counts)
> columns(org.Ce.eg.db)
[1] "ACCNUM" "ALIAS" "ENSEMBL" "ENSEMBLPROT"
[5] "ENSEMBLTRANS" "ENTREZID" "ENZYME" "EVIDENCE"
[9] "EVIDENCEALL" "GENENAME" "GO" "GOALL"
[13] "ONTOLOGY" "ONTOLOGYALL" "PATH" "PMID"
[17] "REFSEQ" "SYMBOL" "UNIGENE" "UNIPROT"
[21] "WORMBASE"
> res$symbol <- mapIds(org.Ce.eg.db,
+ keys=row.names(res),
+ column="SYMBOL",
+ keytype="ENSEMBLTRANS",
+ multiVals="first")
Error in .testForValidKeys(x, keys, keytype, fks) :
None of the keys entered are valid keys for 'ENSEMBLTRANS'. Please use the keys method to see a listing of valid arguments. #I guess my keys are in the wrong format, if I call row names() on data_counts I get info such as: transcript:F31C3.9
> res$entrez <- mapIds(org.Ce.eg.db,
+ keys=row.names(res),
+ column="ENTREZID",
+ keytype="ENSEMBL",
+ multiVals="first")
Error in .testForValidKeys(x, keys, keytype, fks) :
None of the keys entered are valid keys for 'ENSEMBL'. Please use the keys method to see a listing of valid arguments.
> biocLite("org.Ce.eg.db")
BioC_mirror: https://bioconductor.org
Using Bioconductor 3.5 (BiocInstaller 1.26.0), R 3.4.1 (2017-06-30).
Installing package(s) 'org.Ce.eg.db'
installing the source package 'org.Ce.eg.db'
trying URL 'https://bioconductor.org/packages/3.5/data/annotation/src/contrib/org.Ce.eg.db_3.4.1.tar.gz'
Content type 'application/x-gzip' length 19307427 bytes (18.4 MB)
==================================================
downloaded 18.4 MB
* installing *source* package ‘org.Ce.eg.db’ ...
** R
** inst
** preparing package for lazy loading
** help
*** installing help indices
** building package indices
** testing if installed package can be loaded
* DONE (org.Ce.eg.db)
The downloaded source packages are in
'/private/var/folders/d4/yy86_b6109d2pm8pvznv6qrh0000gn/T/RtmpJ6dj0i/downloaded_packages'
> library("org.Ce.eg.db")
> desReport<-HTMLReport(shortName='RNASeqAnalysis', title = 'RNASeq_VitB6', reportDirectory = "./reports")
> publish(res, desReport, name="df", countTable=data_counts, pvalueCutoff=0.05, conditions=Treatment, annotation.db="org.Ce.eg", expName="deseq", reportDir="./reports", .modifyDF=makeDESeqDF)
Error: Ids do not appear to be Entrez Ids for the specified species. #I understand that the error here is because res does not have Entrez IDs, but I am really not sure how to pass them
> rownames(data_counts)
[1] "transcript:F31E3.5.1" "transcript:F31C3.9"
[3] "transcript:Y82E9BR.3.3" "transcript:K02B2.5"
[5] "transcript:F02A9.3.1" "transcript:F54D7.7"
R version 3.4.1 (2017-06-30)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS Sierra 10.12.5
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
Thank you very much, let me know if there is anything I missed. I just need to know how to have the annotations in all the heat maps/graphs/reports.
Best,
Alex

