Calculating dn/ds ratios
1
0
Entering edit mode
@38ec6314
Last seen 6 weeks ago
United Kingdom

Hi

I am using the attached biomart script for calculating the dn/ds ratio's between the human and model organisms.

However, in an output file, I am getting dn/ds ratios only for mouse and rat.

For the rest of the model organisms (worm, zebrafish, fruitfly), I am getting NA values

Input file = list of human Ensembl id's

library(biomaRt)

## This gives the full list of all marts, including archives
archives <- listEnsemblArchives()

## For the human data
 ensemblhsapiens = useMart(host = 'jan2020.archive.ensembl.org', biomart = 'ENSEMBL_MART_ENSEMBL', 
                 dataset = "hsapiens_gene_ensembl")

filters = listFilters(ensemblhsapiens)
attributes = listAttributes(ensemblhsapiens)

## Input human gene list
hsapiens_GFList <- scan("Human_GF_list.txt", what = character())

## human and Rat
hsapiens_rat <- getBM(attributes = c('ensembl_gene_id', 'rnorvegicus_homolog_ensembl_gene',  
'rnorvegicus_homolog_dn', 
                                    'rnorvegicus_homolog_ds', 'rnorvegicus_homolog_orthology_type'), 
                     filters = 'ensembl_gene_id', 
                     values = hsapiens_GFList, 
                     mart = ensemblhsapiens)

hsapiens_rat1 <- subset(hsapiens_rat, hsapiens_rat$rnorvegicus_homolog_orthology_type == 'ortholog_one2one')
hsapiens_rat1$dnds_ratios <- hsapiens_rat1$rnorvegicus_homolog_dn/hsapiens_rat1$rnorvegicus_homolog_ds
write.csv(hsapiens_rat1, file = "hsapiens_rat.csv")

## human and mouse
 hsapiens_mouse <- getBM(attributes = c('ensembl_gene_id', 'mmusculus_homolog_ensembl_gene',  
 'mmusculus_homolog_dn', 
                                    'mmusculus_homolog_ds', 'mmusculus_homolog_orthology_type'), 
                     filters = 'ensembl_gene_id', 
                     values = hsapiens_GFList, 
                     mart = ensemblhsapiens)

 hsapiens_mouse1 <- subset(hsapiens_mouse, hsapiens_mouse$mmusculus_homolog_orthology_type == 
 'ortholog_one2one')
 hsapiens_mouse1$dnds_ratios <- 
 hsapiens_mouse1$mmusculus_homolog_dn/hsapiens_mouse1$mmusculus_homolog_ds
 write.csv(hsapiens_mouse1, file = "hsapiens_mouse.csv")

## human and c.elegans

 hsapiens_celegans <- getBM(attributes = c('ensembl_gene_id', 'celegans_homolog_ensembl_gene',  
 'celegans_homolog_dn', 
                                    'celegans_homolog_ds', 'celegans_homolog_orthology_type'), 
                     filters = 'ensembl_gene_id', 
                     values = hsapiens_GFList, 
                     mart = ensemblhsapiens)

hsapiens_celegans1 <- subset(hsapiens_celegans, hsapiens_celegans$celegans_homolog_orthology_type == 
'ortholog_one2one')
hsapiens_celegans1$dnds_ratios <- 
hsapiens_celegans1$celegans_homolog_dn/hsapiens_celegans1$celegans_homolog_ds
write.csv(hsapiens_celegans1, file = "hsapiens_celegans.csv")

## human and Fruitfly

hsapiens_fly <- getBM(attributes = c('ensembl_gene_id', 'dmelanogaster_homolog_ensembl_gene',  
'dmelanogaster_homolog_dn', 
                                    'dmelanogaster_homolog_ds', 'dmelanogaster_homolog_orthology_type'), 
                     filters = 'ensembl_gene_id', 
                     values = hsapiens_GFList, 
                     mart = ensemblhsapiens)

hsapiens_fly1 <- subset(hsapiens_fly, hsapiens_fly$dmelanogaster_homolog_orthology_type == 'ortholog_one2one')
hsapiens_fly1$dnds_ratios <- 
hsapiens_fly1$dmelanogaster_homolog_dn/hsapiens_fly1$dmelanogaster_homolog_ds
write.csv(hsapiens_fly1, file = "hsapiens_fly.csv")

###human and Zebrafish

 hsapiens_fish <- getBM(attributes = c('ensembl_gene_id', 'drerio_homolog_ensembl_gene',  'drerio_homolog_dn', 
                                    'drerio_homolog_ds', 'drerio_homolog_orthology_type'), 
                     filters = 'ensembl_gene_id', 
                     values = hsapiens_GFList, 
                     mart = ensemblhsapiens)

 hsapiens_fish1 <- subset(hsapiens_fish, hsapiens_fish$drerio_homolog_orthology_type == 'ortholog_one2one')
 hsapiens_fish1$dnds_ratios <- hsapiens_fish1$drerio_homolog_dn/hsapiens_fish1$drerio_homolog_ds
 write.csv(hsapiens_fish1, file = "hsapiens_fish.csv")
biomaRt • 97 views
ADD COMMENT
0
Entering edit mode
Mike Smith ★ 5.3k
@mike-smith
Last seen 13 hours ago
EMBL Heidelberg / de.NBI

You're getting NA values for your ratios because Ensembl doesn't seem to have dN or dS values for the some of the homologs you look at e.g. C.elegans. Lets just take a look at 3 genes and the tables returned from Ensembl:

library(biomaRt)

## For the human data
ensemblhsapiens = useEnsembl(version = 99,
                             biomart = 'ENSEMBL_MART_ENSEMBL', 
                            dataset = 'hsapiens_gene_ensembl')

## Input human gene list
hsapiens_GFList <- c("ENSG00000010404", "ENSG00000277796", "ENSG00000198888")

## human and Rat
hsapiens_rat <- getBM(attributes = c('ensembl_gene_id', 
                                     'rnorvegicus_homolog_ensembl_gene',  
                                     'rnorvegicus_homolog_dn', 
                                     'rnorvegicus_homolog_ds',
                                     'rnorvegicus_homolog_orthology_type'), 
                      filters = 'ensembl_gene_id', 
                      values = hsapiens_GFList, 
                      mart = ensemblhsapiens)

## human and celegans
hsapiens_celegans <- getBM(attributes = c('ensembl_gene_id', 
                                     'celegans_homolog_ensembl_gene',  
                                     'celegans_homolog_dn', 
                                     'celegans_homolog_ds',
                                     'celegans_homolog_orthology_type'), 
                      filters = 'ensembl_gene_id', 
                      values = hsapiens_GFList, 
                      mart = ensemblhsapiens)

For the rat homologs, we see that there are some which do have dN and dS scores and some that don't even when a homolog exists. Your code would also filter out the top line because of the ortholog type.

hsapiens_rat
#>   ensembl_gene_id rnorvegicus_homolog_ensembl_gene rnorvegicus_homolog_dn rnorvegicus_homolog_ds rnorvegicus_homolog_orthology_type
#> 1 ENSG00000010404               ENSRNOG00000001465                 0.1061                 0.5588                  ortholog_one2many
#> 2 ENSG00000198888               ENSRNOG00000030644                     NA                     NA                   ortholog_one2one
#> 3 ENSG00000277796                                                      NA                     NA

With C.elegans it doesn't look like there are dN or dS scores for any homologs.

hsapiens_celegans
#>   ensembl_gene_id celegans_homolog_ensembl_gene celegans_homolog_dn celegans_homolog_ds celegans_homolog_orthology_type
#> 1 ENSG00000010404                                                NA                  NA                                
#> 2 ENSG00000198888                WBGene00010959                  NA                  NA                ortholog_one2one
#> 3 ENSG00000277796                                                NA                  NA

We can actually check that there are no dN scores for any C.elegans homologs with the code below. Running a query with no filters/values will search for all human genes, and we see that everything that is return is an NA.

tmp <- getBM(attributes = c('ensembl_gene_id', 
                      'celegans_homolog_ensembl_gene',  
                      'celegans_homolog_dn', 
                      'celegans_homolog_ds'), 
       mart = ensemblhsapiens)
all( is.na(tmp$celegans_homolog_dn) )
#> [1] TRUE

I'm afraid I don't know why these entries exist in Ensembl BioMart if there are zero entries to be returned, you'd have to ask Ensembl directly. However I think it's telling that support for the dN/dS analysis has been dropped for over a year now, from Ensembl 100 onwards (https://www.ebi.ac.uk/about/news/service-news/ensembl-100-has-been-released).

ADD COMMENT

Login before adding your answer.

Traffic: 242 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6