Question: error in Bioconductor getBM
1
0
Entering edit mode
@nitandressa-24299
Last seen 4.2 years ago

Hello everyone!

so, I'm trying to retireve some information from biomaRt and just found this error:

 > library(biomaRt)
 > ensembl = useMart(host='http://sep2019.archive.ensembl.org', biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl')

 > unique(ad.b.wb.a3.3$groupID)
     [1] "ENSG00000168280" "ENSG00000144867" "ENSG00000091513" "ENSG00000242337"
     [5] "ENSG00000197971" "ENSG00000279811" "ENSG00000106125" "ENSG00000241644"
     [9] "ENSG00000240583" "ENSG00000250424" "ENSG00000254959" "ENSG00000273269"
    [13] "ENSG00000143933" "ENSG00000239605" "ENSG00000173786" "ENSG00000007237"


 > et = getBM(attributes=c("ensembl_gene_id","external_gene_name","gene_biotype"),
                 filters = "ensembl_gene_id", 
                 values = unique(ad.b.wb.a3.3$groupID), 
                 mart = ensembl)
        Error in readChar(con, 5L, useBytes = TRUE) : cannot open the connection
        In addition: Warning message:
        In readChar(con, 5L, useBytes = TRUE) :
          cannot open compressed file '/tmp/biomaRt/5ea3405a17cc_file5ea353305cde', probable reason 'No such file or directory'

I don't know why this error is happening. The object exists and the vector had the ids. Most curious, if I remove one of the ids, it works:

> et = getBM(attributes=c("ensembl_gene_id","external_gene_name","gene_biotype"), 
             filters = "ensembl_gene_id", 
             values = unique(ad.b.wb.a3.3$groupID)[1:15], 
             mart = ensembl) # Minus the last element
> et = getBM(attributes=c("ensembl_gene_id","external_gene_name","gene_biotype"), 
             filters = "ensembl_gene_id", 
             values = unique(ad.b.wb.a3.3$groupID)[2:16], 
             mart = ensembl) # Minus the 1st element
> et = getBM(attributes=c("ensembl_gene_id","external_gene_name","gene_biotype"), 
             filters = "ensembl_gene_id", 
             values = unique(ad.b.wb.a3.3$groupID)[c(1:6,8:16)], 
             mart = ensembl) #Minus the 7th element

I also trying saving it in a different vetor, but the same error happened:

> aa=c("ENSG00000168280","ENSG00000144867","ENSG00000091513","ENSG00000242337","ENSG00000197971","ENSG00000279811","ENSG00000106125","ENSG00000241644","ENSG00000240583","ENSG00000250424","ENSG00000254959","ENSG00000273269","ENSG00000143933","ENSG00000239605","ENSG00000173786","ENSG00000007237")
> et = getBM(attributes=c("ensembl_gene_id","external_gene_name","gene_biotype"), filters = "ensembl_gene_id", values = aa, mart = ensembl)
Error in readChar(con, 5L, useBytes = TRUE) : cannot open the connection
In addition: Warning message:
In readChar(con, 5L, useBytes = TRUE) :
  cannot open compressed file '/tmp/biomaRt/5ea3405a17cc_file5ea353305cde', probable reason 'No such file or directory'

Here is the sessionInfo()

> sessionInfo()
R version 4.0.2 (2020-06-22)
Platform: x86_64-conda_cos6-linux-gnu (64-bit)
Running under: Fedora 30 (Workstation Edition)

Matrix products: default
BLAS/LAPACK: /homes/brauerei/natasha/miniconda2/envs/r4/lib/libopenblasp-r0.3.10.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats4    parallel  stats     graphics  grDevices utils     datasets 
[8] methods   base     

other attached packages:
 [1] GenomicAlignments_1.24.0    Rsamtools_2.4.0            
 [3] Biostrings_2.56.0           XVector_0.28.0             
 [5] SummarizedExperiment_1.18.2 DelayedArray_0.14.1        
 [7] matrixStats_0.57.0          GenomicFeatures_1.40.1     
 [9] AnnotationDbi_1.50.3        Biobase_2.48.0             
[11] GenomicRanges_1.40.0        GenomeInfoDb_1.24.2        
[13] IRanges_2.22.2              S4Vectors_0.26.1           
[15] BiocGenerics_0.34.0         ComplexUpset_0.5.17        
[17] ggplot2_3.3.2               pathview_1.28.1            
[19] gageData_2.26.0             gage_2.38.3                
[21] dplyr_1.0.2                 biomaRt_2.44.1             
[23] tidyr_1.1.2                

loaded via a namespace (and not attached):
 [1] httr_1.4.2             bit64_4.0.5            assertthat_0.2.1      
 [4] askpass_1.1            BiocManager_1.30.10    BiocFileCache_1.12.1  
 [7] blob_1.2.1             GenomeInfoDbData_1.2.3 progress_1.2.2        
[10] pillar_1.4.6           RSQLite_2.2.1          lattice_0.20-41       
[13] glue_1.4.2             digest_0.6.25          colorspace_1.4-1      
[16] Matrix_1.2-18          XML_3.99-0.5           pkgconfig_2.0.3       
[19] zlibbioc_1.34.0        purrr_0.3.4            GO.db_3.11.4          
[22] patchwork_1.0.1        scales_1.1.1           BiocParallel_1.22.0   
[25] tibble_3.0.3           openssl_1.4.3          KEGGREST_1.28.0       
[28] generics_0.0.2         farver_2.0.3           ellipsis_0.3.1        
[31] withr_2.3.0            cli_2.0.2              magrittr_1.5          
[34] crayon_1.3.4           memoise_1.1.0          KEGGgraph_1.48.0      
[37] fansi_0.4.1            graph_1.66.0           tools_4.0.2           
[40] prettyunits_1.1.1      hms_0.5.3              org.Hs.eg.db_3.11.4   
[43] lifecycle_0.2.0        stringr_1.4.0          munsell_0.5.0         
[46] compiler_4.0.2         rlang_0.4.7            grid_4.0.2            
[49] RCurl_1.98-1.2         rappdirs_0.3.1         bitops_1.0-6          
[52] labeling_0.3           gtable_0.3.0           DBI_1.1.0             
[55] curl_4.3               R6_2.4.1               rtracklayer_1.48.0    
[58] bit_4.0.4              Rgraphviz_2.32.0       stringi_1.5.3         
[61] Rcpp_1.0.5             vctrs_0.3.4            png_0.1-7             
[64] dbplyr_1.4.4           tidyselect_1.1.0

Can anyone help me? Thanks in advance

Natasha

biomaRt getBM • 1.8k views
ADD COMMENT
0
Entering edit mode

I also tried not using the tidyr paclage:

(r4) [natasha@becherovka natasha]$ R

R version 4.0.2 (2020-06-22) -- "Taking Off Again"
Copyright (C) 2020 The R Foundation for Statistical Computing
Platform: x86_64-conda_cos6-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

  Natural language support but running in an English locale

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(biomaRt)
> ensembl = useMart(host='http://sep2019.archive.ensembl.org', biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl')
> ad.b.wb.a3=read.table("AD_Whole.BrainxArea.III.b.tsv",header=T,sep='\t')
> ad.b.wb.a3.2=ad.b.wb.a3[ad.b.wb.a3$padj <= 0.05 & abs(ad.b.wb.a3$log2fold_Whole.Brain_Area.III) > 1.5,]
> aa=unique(unlist(strsplit(ad.b.wb.a3.2$groupID, "\\+")))
> et = getBM(attributes=c("ensembl_gene_id","external_gene_name","gene_biotype","entrezgene_id"), filters = "ensembl_gene_id", values = aa, mart = ensembl)
Error in readChar(con, 5L, useBytes = TRUE) : cannot open the connection
In addition: Warning message:
In readChar(con, 5L, useBytes = TRUE) :
  cannot open compressed file '/tmp/biomaRt/5ea3405a17cc_file5ea353305cde', probable reason 'No such file or directory'
> aa
 [1] "ENSG00000168280" "ENSG00000144867" "ENSG00000091513" "ENSG00000242337"
 [5] "ENSG00000197971" "ENSG00000279811" "ENSG00000106125" "ENSG00000241644"
 [9] "ENSG00000240583" "ENSG00000250424" "ENSG00000254959" "ENSG00000273269"
[13] "ENSG00000143933" "ENSG00000239605" "ENSG00000173786" "ENSG00000007237"
ADD REPLY
0
Entering edit mode

Does the folder /tmp/biomaRt exist? If so, did you create it, and can you delete if not?

Under some circumstances biomaRt creates a set of temporary files, but should only be when you have a query with more than 500 values, and it should be in an R temp folder, so I don't think that's what's going on here.

Maybe try running with the cache turned off. You can do that with the useCache argument to getBM() e.g.

getBM(..., useCache = FALSE)
ADD REPLY
0
Entering edit mode
@james-w-macdonald-5106
Last seen 2 days ago
United States

Might just be some weirdness in your session. I'd try restarting R and trying again.

> ids <-  c("ENSG00000168280","ENSG00000144867","ENSG00000091513","ENSG00000242337",
 "ENSG00000197971","ENSG00000279811","ENSG00000106125","ENSG00000241644",
 "ENSG00000240583","ENSG00000250424","ENSG00000254959","ENSG00000273269",
 "ENSG00000143933","ENSG00000239605","ENSG00000173786","ENSG00000007237")
ids <-  c("ENSG00000168280","ENSG00000144867","ENSG00000091513","ENSG00000242337",
+  "ENSG00000197971","ENSG00000279811","ENSG00000106125","ENSG00000241644",
+  "ENSG00000240583","ENSG00000250424","ENSG00000254959","ENSG00000273269",
+  "ENSG00000143933","ENSG00000239605","ENSG00000173786","ENSG00000007237")
> mart <- useMart(host='http://sep2019.archive.ensembl.org', biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl')
> getBM(attributes=c("ensembl_gene_id","external_gene_name","gene_biotype"),
                 filters = "ensembl_gene_id", values = ids, mart = mart)
getBM(attributes=c("ensembl_gene_id","external_gene_name","gene_biotype"),
+                  filters = "ensembl_gene_id", values = ids, mart = mart)
   ensembl_gene_id external_gene_name                       gene_biotype
1  ENSG00000007237               GAS7                     protein_coding
2  ENSG00000091513                 TF                     protein_coding
3  ENSG00000106125             MINDY4                     protein_coding
4  ENSG00000143933              CALM2                     protein_coding
5  ENSG00000144867              SRPRB                     protein_coding
6  ENSG00000168280              KIF5C                     protein_coding
7  ENSG00000173786                CNP                     protein_coding
8  ENSG00000197971                MBP                     protein_coding
9  ENSG00000239605              STPG4                     protein_coding
10 ENSG00000240583               AQP1                     protein_coding
11 ENSG00000241644               INMT                     protein_coding
12 ENSG00000242337             INHCAP transcribed_unprocessed_pseudogene
13 ENSG00000250424         AC004691.2                     protein_coding
14 ENSG00000254959        INMT-MINDY4                     protein_coding
15 ENSG00000273269         AC073283.3                     protein_coding
16 ENSG00000279811         AC093330.2                                TEC
> aa=c("ENSG00000168280","ENSG00000144867","ENSG00000091513","ENSG00000242337","ENSG00000197971","ENSG00000279811","ENSG00000106125","ENSG00000241644","ENSG00000240583","ENSG00000250424","ENSG00000254959","ENSG00000273269","ENSG00000143933","ENSG00000239605","ENSG00000173786","ENSG00000007237")
> getBM(attributes=c("ensembl_gene_id","external_gene_name","gene_biotype"),
                 filters = "ensembl_gene_id", values = aa, mart = mart)
getBM(attributes=c("ensembl_gene_id","external_gene_name","gene_biotype"),
+                  filters = "ensembl_gene_id", values = aa, mart = mart)
   ensembl_gene_id external_gene_name                       gene_biotype
1  ENSG00000007237               GAS7                     protein_coding
2  ENSG00000091513                 TF                     protein_coding
3  ENSG00000106125             MINDY4                     protein_coding
4  ENSG00000143933              CALM2                     protein_coding
5  ENSG00000144867              SRPRB                     protein_coding
6  ENSG00000168280              KIF5C                     protein_coding
7  ENSG00000173786                CNP                     protein_coding
8  ENSG00000197971                MBP                     protein_coding
9  ENSG00000239605              STPG4                     protein_coding
10 ENSG00000240583               AQP1                     protein_coding
11 ENSG00000241644               INMT                     protein_coding
12 ENSG00000242337             INHCAP transcribed_unprocessed_pseudogene
13 ENSG00000250424         AC004691.2                     protein_coding
14 ENSG00000254959        INMT-MINDY4                     protein_coding
15 ENSG00000273269         AC073283.3                     protein_coding
16 ENSG00000279811         AC093330.2                                TEC
ADD COMMENT
0
Entering edit mode

Hi James,

thanks for your reply. I tried doing what you said, but the error continues:

(r4) [natasha@becherovka natasha]$ R

R version 4.0.2 (2020-06-22) -- "Taking Off Again"
Copyright (C) 2020 The R Foundation for Statistical Computing
Platform: x86_64-conda_cos6-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

  Natural language support but running in an English locale

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(tidyr)
> library(biomaRt)
> ensembl = useMart(host='http://sep2019.archive.ensembl.org', biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl')
> ad.b.wb.a3=read.table("AD_Whole.BrainxArea.III.b.tsv",header=T,sep='\t')
> ad.b.wb.a3.2=ad.b.wb.a3[ad.b.wb.a3$padj <= 0.05 & abs(ad.b.wb.a3$log2fold_Whole.Brain_Area.III) > 1.5,]
> ad.b.wb.a3.3=data.frame(separate_rows(ad.b.wb.a3.2,groupID, sep="\\+"))
> ad.b.wb.a3.3$groupID
 [1] "ENSG00000168280" "ENSG00000144867" "ENSG00000091513" "ENSG00000242337"
 [5] "ENSG00000144867" "ENSG00000091513" "ENSG00000242337" "ENSG00000197971"
 [9] "ENSG00000279811" "ENSG00000144867" "ENSG00000091513" "ENSG00000242337"
[13] "ENSG00000197971" "ENSG00000279811" "ENSG00000197971" "ENSG00000279811"
[17] "ENSG00000168280" "ENSG00000144867" "ENSG00000091513" "ENSG00000242337"
[21] "ENSG00000106125" "ENSG00000241644" "ENSG00000240583" "ENSG00000250424"
[25] "ENSG00000254959" "ENSG00000273269" "ENSG00000143933" "ENSG00000239605"
[29] "ENSG00000273269" "ENSG00000143933" "ENSG00000239605" "ENSG00000197971"
[33] "ENSG00000279811" "ENSG00000173786" "ENSG00000106125" "ENSG00000241644"
[37] "ENSG00000240583" "ENSG00000250424" "ENSG00000254959" "ENSG00000144867"
[41] "ENSG00000091513" "ENSG00000242337" "ENSG00000173786" "ENSG00000106125"
[45] "ENSG00000241644" "ENSG00000240583" "ENSG00000250424" "ENSG00000254959"
[49] "ENSG00000106125" "ENSG00000241644" "ENSG00000240583" "ENSG00000250424"
[53] "ENSG00000254959" "ENSG00000007237" "ENSG00000106125" "ENSG00000241644"
[57] "ENSG00000240583" "ENSG00000250424" "ENSG00000254959"
> et = getBM(attributes=c("ensembl_gene_id","external_gene_name","gene_biotype"), filters = "ensembl_gene_id", values = unique(ad.b.wb.a3.3$groupID), mart = ensembl)
Error in readChar(con, 5L, useBytes = TRUE) : cannot open the connection
In addition: Warning message:
In readChar(con, 5L, useBytes = TRUE) :
  cannot open compressed file '/tmp/biomaRt/5ea3405a17cc_file5ea353305cde', probable reason 'No such file or directory'

Any tips?

ADD REPLY

Login before adding your answer.

Traffic: 704 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6