I am analyzing my Clariom D Human WT Assay data, which tests expression level of mRNA, lncRNA, miRNA and circRNA at the same time. While I find that the official .csv document contains annotation information from many different sources like RefSeq, Ensembl, AceView and lncRNAwiki at the same time, which is too complex to extract the gene id. And I did use clariomdhumantranscriptcluster.db
package from James W. MacDonload before, however, I found there are still many NA values even after I deleted them and the matched probesets are only about 25000. Besides, I did DEG analysis of the whole expression matrix and only get about 1000 DEGs and found almost nothing enriched in the consequent enrichment analysis. Emily from Ensembl Team told me there is no annotation package for my assay.
So I have two questions now. 1) Does the clariomdhumantranscriptcluster.db
package still work? If not, how can I extract the gene id from the official annotation document? 2) Do you think I should annotate first and do DEG analysis by different types of RNA? I used to do DEG analysis for the whole expression matrix before annotation and found almost nothing significant. Besides, I read some paper and the authors separated lncRNA and mRNA first, did DEG respectively, and got a pretty result. Since I am a freshman, I have no idea about my DEG analysis. Should I analyze the different RNAs respectively?
> ann.df <- read.csv ("Clariom_D_Human.r1.na36.hg38.a1.transcript.csv",
+ header = T,
+ sep = ",",
+ dec = ".",
+ fill = T,
+ comment.char = "#"
+ )
> head (ann.df,1)
transcript_cluster_id probeset_id seqname strand start stop total_probes
1 TC0100006432.hg.1 TC0100006432.hg.1 chr1 + 11869 14412 10
gene_assignment
1 NR_046018 // DDX11L1 // DEAD/H (Asp-Glu-Ala-Asp/His) box helicase 11 like 1 // 1p36.33 // 100287102 /// OTTHUMT00000002844 // DDX11L1 // DEAD/H (Asp-Glu-Ala-Asp/His) box helicase 11 like 1 // 1p36.33 // 100287102 /// OTTHUMT00000362751 // DDX11L1 // DEAD/H (Asp-Glu-Ala-Asp/His) box helicase 11 like 1 // 1p36.33 // 100287102
mrna_assignment
1 NR_046018 // RefSeq // Homo sapiens DEAD/H (Asp-Glu-Ala-Asp/His) box helicase 11 like 1 (DDX11L1), non-coding RNA. // chr1 // 100 // 100 // 0 // --- // 0 /// OTTHUMT00000002844 // Havana transcript // DEAD/H (Asp-Glu-Ala-Asp/His) box helicase 11 like 1[gene_biotype:transcribed_unprocessed_pseudogene transcript_biotype:transcribed_unprocessed_pseudogene] // chr1 // 100 // 100 // 0 // --- // 0 /// OTTHUMT00000362751 // Havana transcript // DEAD/H (Asp-Glu-Ala-Asp/His) box helicase 11 like 1[gene_biotype:transcribed_unprocessed_pseudogene transcript_biotype:processed_transcript] // chr1 // 100 // 100 // 0 // --- // 0 /// ENST00000450305 // ENSEMBL // DEAD/H (Asp-Glu-Ala-Asp/His) box helicase 11 like 1 [gene_biotype:transcribed_unprocessed_pseudogene transcript_biotype:transcribed_unprocessed_pseudogene] // chr1 // 100 // 100 // 0 // --- // 0 /// ENST00000456328 // ENSEMBL // DEAD/H (Asp-Glu-Ala-Asp/His) box helicase 11 like 1 [gene_biotype:transcribed_unprocessed_pseudogene transcript_biotype:processed_transcript] // chr1 // 100 // 100 // 0 // --- // 0 /// NONHSAT000001 // lncRNAWiki // Non-coding transcript identified by NONCODE // chr1 // 100 // 100 // 0 // --- // 0 /// NONHSAT000001 // NONCODE // Non-coding transcript identified by NONCODE: Linc // chr1 // 100 // 100 // 0 // --- // 0 /// NONHSAT000002 // lncRNAWiki // Non-coding transcript identified by NONCODE // chr1 // 100 // 100 // 0 // --- // 0 /// NONHSAT000002 // NONCODE // Non-coding transcript identified by NONCODE: Linc // chr1 // 100 // 100 // 0 // --- // 0 /// NONHSAT000003 // lncRNAWiki // Non-coding transcript identified by NONCODE // chr1 // 100 // 100 // 0 // --- // 0 /// NONHSAT000003 // NONCODE // Non-coding transcript identified by NONCODE: Linc // chr1 // 100 // 100 // 0 // --- // 0 /// NONHSAT000004 // lncRNAWiki // Non-coding transcript identified by NONCODE // chr1 // 100 // 100 // 0 // --- // 0 /// NONHSAT000004 // NONCODE // Non-coding transcript identified by NONCODE: Linc // chr1 // 100 // 100 // 0 // --- // 0
swissprot
1 NR_046018 // B7ZGX0 /// NR_046018 // B7ZGX2 /// NR_046018 // B7ZGX7 /// NR_046018 // B7ZGX8 /// OTTHUMT00000002844 // B7ZGX0 /// OTTHUMT00000002844 // B7ZGX2 /// OTTHUMT00000002844 // B7ZGX7 /// OTTHUMT00000002844 // B7ZGX8 /// OTTHUMT00000362751 // B7ZGX0 /// OTTHUMT00000362751 // B7ZGX2 /// OTTHUMT00000362751 // B7ZGX7 /// OTTHUMT00000362751 // B7ZGX8 /// ENST00000450305 // B7ZGX0 /// ENST00000450305 // B7ZGX2 /// ENST00000450305 // B7ZGX7 /// ENST00000450305 // B7ZGX8 /// ENST00000450305 // B4E2Z4 /// ENST00000450305 // B7ZGW9 /// ENST00000450305 // Q6ZU42 /// ENST00000450305 // B7ZGX3 /// ENST00000450305 // B5WYT6 /// ENST00000456328 // B7ZGX0 /// ENST00000456328 // B7ZGX2 /// ENST00000456328 // B7ZGX7 /// ENST00000456328 // B7ZGX8 /// ENST00000456328 // B4E2Z4 /// ENST00000456328 // B7ZGW9 /// ENST00000456328 // Q6ZU42 /// ENST00000456328 // B7ZGX3 /// ENST00000456328 // B5WYT6
unigene
1 NR_046018 // Hs.714157 // testis| normal| adult /// OTTHUMT00000002844 // Hs.714157 // testis| normal| adult /// OTTHUMT00000362751 // Hs.714157 // testis| normal| adult /// ENST00000450305 // Hs.719844 // brain| testis| normal /// ENST00000450305 // Hs.714157 // testis| normal| adult /// ENST00000450305 // Hs.740212 // --- /// ENST00000450305 // Hs.712940 // bladder| bone marrow| brain| embryonic tissue| intestine| mammary gland| muscle| pharynx| placenta| prostate| skin| spleen| stomach| testis| thymus| breast (mammary gland) tumor| gastrointestinal tumor| glioma| non-neoplasia| normal| prostate cancer| skin tumor| soft tissue/muscle tissue tumor|embryoid body| adult /// ENST00000456328 // Hs.719844 // brain| testis| normal /// ENST00000456328 // Hs.714157 // testis| normal| adult /// ENST00000456328 // Hs.740212 // --- /// ENST00000456328 // Hs.712940 // bladder| bone marrow| brain| embryonic tissue| intestine| mammary gland| muscle| pharynx| placenta| prostate| skin| spleen| stomach| testis| thymus| breast (mammary gland) tumor| gastrointestinal tumor| glioma| non-neoplasia| normal| prostate cancer| skin tumor| soft tissue/muscle tissue tumor|embryoid body| adult
GO_biological_process
1 ENST00000450305 // GO:0006139 // nucleobase-containing compound metabolic process // inferred from electronic annotation /// ENST00000456328 // GO:0006139 // nucleobase-containing compound metabolic process // inferred from electronic annotation
GO_cellular_component
1 ---
GO_molecular_function
1 ENST00000450305 // GO:0003676 // nucleic acid binding // inferred from electronic annotation /// ENST00000450305 // GO:0005524 // ATP binding // inferred from electronic annotation /// ENST00000450305 // GO:0008026 // ATP-dependent helicase activity // inferred from electronic annotation /// ENST00000450305 // GO:0016818 // hydrolase activity, acting on acid anhydrides, in phosphorus-containing anhydrides // inferred from electronic annotation /// ENST00000456328 // GO:0003676 // nucleic acid binding // inferred from electronic annotation /// ENST00000456328 // GO:0005524 // ATP binding // inferred from electronic annotation /// ENST00000456328 // GO:0008026 // ATP-dependent helicase activity // inferred from electronic annotation /// ENST00000456328 // GO:0016818 // hydrolase activity, acting on acid anhydrides, in phosphorus-containing anhydrides // inferred from electronic annotation
pathway protein_domains category locus.type notes Best_Coverage_TaqMan_Assay
1 --- --- main Multiple_Complex --- TaqMan Probe Unavailable
Best_Coverage_TaqMan_Assay_HTML
1 TaqMan Probe Unavailable