No idea what the issue with biomaRt
might be, but there are other ways to do this. As an example:
## load stuff
> library(Homo.sapiens)
> library(TxDb.Hsapiens.UCSC.hg38.refGene)
> TxDb(Homo.sapiens) <- TxDb.Hsapiens.UCSC.hg38.refGene
> library(Mus.musculus)
> library(Orthology.eg.db)
## cob together a function
mapFun <- function(genes) {
humeg <- mapIds(org.Hs.eg.db, genes, "ENTREZID","SYMBOL")
mouseg <- mapIds(Orthology.eg.db, humeg, "Mus.musculus","Homo.sapiens")
humdat <- select(Homo.sapiens, humeg, c("SYMBOL","CDSCHROM","CDSSTART","CDSEND", "CDSSTRAND"), "GENEID")
mousedat <- select(Mus.musculus, mouseg, c("SYMBOL","CDSCHROM","CDSSTART","CDSEND", "CDSSTRAND"), "GENEID")
return(list(humdat = humdat, mousedat = mousedat))
}
## Test drive
> mapFun("TP53")
'select()' returned 1:1 mapping between keys and columns
'select()' returned 1:many mapping between keys and columns
'select()' returned 1:many mapping between keys and columns
$humdat
GENEID CDSCHROM CDSSTRAND CDSSTART CDSEND SYMBOL
1 7157 chr17 - 7675053 7675134 TP53
2 7157 chr17 - 7674859 7674971 TP53
3 7157 chr17 - 7674181 7674290 TP53
4 7157 chr17 - 7673701 7673837 TP53
5 7157 chr17 - 7673535 7673608 TP53
6 7157 chr17 - 7670609 7670715 TP53
7 7157 chr17 - 7669609 7669690 TP53
8 7157 chr17 - 7673307 7673339 TP53
9 7157 chr17 - 7673219 7673266 TP53
10 7157 chr17 - 7675053 7675215 TP53
11 7157 chr17 - 7675994 7676251 TP53
12 7157 chr17 - 7675053 7675236 TP53
13 7157 chr17 - 7676521 7676594 TP53
14 7157 chr17 - 7676382 7676403 TP53
15 7157 chr17 - 7675994 7676272 TP53
$mousedat
GENEID CDSCHROM CDSSTRAND CDSSTART CDSEND SYMBOL
1 22059 chr11 + 69586891 69586973 Trp53
2 22059 chr11 + 69587257 69587278 Trp53
3 22059 chr11 + 69587372 69587632 Trp53
4 22059 chr11 + 69588364 69588547 Trp53
5 22059 chr11 + 69588626 69588738 Trp53
6 22059 chr11 + 69589141 69589250 Trp53
7 22059 chr11 + 69589573 69589709 Trp53
8 22059 chr11 + 69589789 69589862 Trp53
9 22059 chr11 + 69590656 69590762 Trp53
10 22059 chr11 + 69591350 69591431 Trp53
11 22059 chr11 + 69591254 69591308 Trp53
12 22059 chr11 + 69586900 69586973 Trp53
But maybe you don't really want the TSS, but instead the general region of the gene.
naiveFun <- function(genes) {
humeg <- mapIds(org.Hs.eg.db, genes, "ENTREZID","SYMBOL")
mouseg <- as.character(mapIds(Orthology.eg.db, humeg, "Mus.musculus","Homo.sapiens"))
mousesymb <- mapIds(org.Mm.eg.db, mouseg, "SYMBOL", "ENTREZID")
humdat <- as(suppressMessages(genes(Homo.sapiens)[humeg]), "data.frame")
mousedat <- as(suppressMessages(genes(Mus.musculus)[mouseg]), "data.frame")
data.frame(rbind(humdat, mousedat), SYMBOL = c(genes, mousesymb), SPECIES = c("Homo sapiens","Mus musculus"))
}
> naiveFun("TP53")
'select()' returned 1:1 mapping between keys and columns
'select()' returned 1:1 mapping between keys and columns
seqnames start end width strand GENEID SYMBOL SPECIES
7157 chr17 7668402 7687550 19149 - 7157 TP53 Homo sapiens
22059 chr11 69580359 69591873 11515 + 22059 Trp53 Mus musculus
thank you it solved my problem. 8 ball pool