Branchpointer issues with chromosome names and getListElement(x, i, ...)
0
0
Entering edit mode
@3590d877
Last seen 7 months ago
France

I have been struggling with Branchpointer for a few hours, using either a .fa file or the BSgenome option. Below is the code and errors reported

require(data.table)

library(dplyr)

library(branchpointer)

library(BSgenome.Hsapiens.UCSC.hg38)

g <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38

>

exons <- gtfToExons("//home/data1/Genome/gencode.v38.primary_assembly.annotation.gtf")

setwd("/home/alex/RNAseq_data_et_analyses/Analyse_HeLa_v2/analyse1")

head(exons)

GRanges object with 6 ranges and 6 metadata columns:

seqnames ranges strand | gene_id gene_type transcript_id

<Rle> <IRanges> <Rle> | <character> <character> <character>

[1] chr1 11869-12227 + | ENSG00000223972.5 transcribed_unproces.. ENST00000456328.2

[2] chr1 12613-12721 + | ENSG00000223972.5 transcribed_unproces.. ENST00000456328.2

[3] chr1 13221-14409 + | ENSG00000223972.5 transcribed_unproces.. ENST00000456328.2

[4] chr1 12010-12057 + | ENSG00000223972.5 transcribed_unproces.. ENST00000450305.2

[5] chr1 12179-12227 + | ENSG00000223972.5 transcribed_unproces.. ENST00000450305.2

[6] chr1 12613-12697 + | ENSG00000223972.5 transcribed_unproces.. ENST00000450305.2

transcript_type exon_id exon_number

<character> <character> <character>

[1] processed_transcript ENSE00002234944.1 1

[2] processed_transcript ENSE00003582793.1 2

[3] processed_transcript ENSE00002312635.1 3

[4] transcribed_unproces.. ENSE00001948541.1 1

[5] transcribed_unproces.. ENSE00001671638.2 2

[6] transcribed_unproces.. ENSE00001758273.2 3


seqinfo: 47 sequences from an unspecified genome; no seqlengths

>

getwd()

[1] "/home/alex/RNAseq_data_et_analyses/Analyse_HeLa_v2/analyse1"

queryIntron <- readQueryFile("exC_bp1",

  • queryType = "region",

  • exons = exons)

>

head(queryIntron)

GRanges object with 6 ranges and 6 metadata columns:

seqnames ranges strand | id to_3prime to_5prime same_gene

<Rle> <IRanges> <Rle> | <character> <numeric> <numeric> <logical>

[1] chr17 66688907-66688933 + | chr17:66688951-66689.. 18 497 TRUE

[2] chr3 75432888-75432914 - | chr3:75432808-75432870 18 1837 TRUE

[3] chr3 48580362-48580388 - | chr3:48580300-48580344 18 219 TRUE

[4] chr4 103147664-103147690 - | chr4:103147356-10314.. 18 1180 TRUE

[5] chr7 100627992-100628018 - | chr7:100627907-10062.. 18 81 TRUE

[6] chr11 65183084-65183110 + | chr11:65183128-65183.. 18 142 TRUE

exon_3prime exon_5prime

<character> <character>

[1] ENSE00003551970.1 ENSE00003465619.1

[2] ENSE00002371856.1 ENSE00001958820.1

[3] ENSE00003548991.1 ENSE00003598438.1

[4] ENSE00000970107.1 ENSE00000970106.1

[5] ENSE00003651039.1 ENSE00003463524.1

[6] ENSE00003483377.1 ENSE00002150254.1


seqinfo: 25 sequences from an unspecified genome; no seqlengths

branchpointPredictionsIntron <- predictBranchpoints(queryIntron,

  • queryType = "region",

  • genome="//home/data1/Genome/GRCh38.primary_assembly.genome.fa",

  • bedtoolsLocation = "/usr/bin/bedtools" )

Error in getListElement(x, i, ...) :

GRanges objects don't support [[, as.list(), lapply(), or unlist() at the moment

>

branchpointPredictionsIntron <- predictBranchpoints(queryIntron,

  • queryType = "region",

  • rmChr = TRUE,

  • BSgenome = g)

Error in getBranchpointSequence(query, uniqueId = uniqueId, queryType = queryType, :

Chromosome names of query and genome do not match

head(BSgenome,100)

1 function (organism, common_name, genome, provider, provider_version,

2 release_date, release_name, source_url, seqnames, circ_seqs = NA,

3 mseqnames, seqs_pkgname, seqs_dirpath, species = NA_character_)

4 {

5 single_sequences <- OnDiskNamedSequences(seqs_dirpath, seqnames = seqnames)

6 if (missing(genome))

7 genome <- provider_version

8 seqinfo <- .make_BSgenome_seqinfo(single_sequences, circ_seqs,

9 genome, seqnames)

10 seqnames <- seqnames(seqinfo)

11 if (missing(common_name))

12 common_name <- species

13 metadata <- list(organism = organism, common_name = common_name,

14 genome = genome, provider = provider, release_date = release_date,

15 source_url = source_url)

16 if (is.null(mseqnames))

17 mseqnames <- character(0)

18 multiple_sequences <- RdaCollection(seqs_dirpath, mseqnames)

19 names(user_seqnames) <- user_seqnames <- seqnames

20 new("BSgenome", metadata = metadata, pkgname = seqs_pkgname,

21 single_sequences = single_sequences, multiple_sequences = multiple_sequences,

22 seqinfo = seqinfo, user_seqnames = user_seqnames, .seqs_cache = new.env(parent = emptyenv()),

23 .link_counts = new.env(parent = emptyenv()))

24 }

sessionInfo()

R version 4.1.2 (2021-11-01)

Platform: x86_64-pc-linux-gnu (64-bit)

Running under: Ubuntu 20.04.3 LTS

Matrix products: default

BLAS: /usr/local/lib/R/lib/libRblas.so

LAPACK: /usr/local/lib/R/lib/libRlapack.so

locale:

[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8

[4] LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8

[7] LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C

[10] LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C

attached base packages:

[1] stats4 stats graphics grDevices utils datasets methods base

other attached packages:

[1] BiocManager_1.30.16 BSgenome.Hsapiens.UCSC.hg38_1.4.4 BSgenome_1.62.0

[4] rtracklayer_1.54.0 Biostrings_2.62.0 XVector_0.34.0

[7] GenomicRanges_1.46.1 GenomeInfoDb_1.30.1 IRanges_2.28.0

[10] S4Vectors_0.32.3 BiocGenerics_0.40.0 branchpointer_1.20.0

[13] caret_6.0-90 lattice_0.20-45 ggplot2_3.3.5

[16] devtools_2.4.3 usethis_2.1.5 dplyr_1.0.8

[19] data.table_1.14.2

loaded via a namespace (and not attached):

[1] colorspace_2.0-2 rjson_0.2.21 ellipsis_0.3.2

[4] class_7.3-19 rprojroot_2.0.2 fs_1.5.2

[7] listenv_0.8.0 remotes_2.4.2 bit64_4.0.5

[10] AnnotationDbi_1.56.2 prodlim_2019.11.13 fansi_1.0.2

[13] lubridate_1.8.0 xml2_1.3.3 codetools_0.2-18

[16] splines_4.1.2 cachem_1.0.6 pkgload_1.2.4

[19] Rsamtools_2.10.0 pROC_1.18.0 kernlab_0.9-29

[22] dbplyr_2.1.1 png_0.1-7 compiler_4.1.2

[25] httr_1.4.2 assertthat_0.2.1 Matrix_1.3-4

[28] fastmap_1.1.0 cli_3.1.1 prettyunits_1.1.1

[31] tools_4.1.2 gtable_0.3.0 glue_1.6.1

[34] GenomeInfoDbData_1.2.7 reshape2_1.4.4 rappdirs_0.3.3

[37] Rcpp_1.0.8 Biobase_2.54.0 vctrs_0.3.8

[40] nlme_3.1-153 iterators_1.0.14 timeDate_3043.102

[43] gower_1.0.0 stringr_1.4.0 globals_0.14.0

[46] ps_1.6.0 brio_1.1.3 testthat_3.1.2

[49] lifecycle_1.0.1 restfulr_0.0.13 XML_3.99-0.8

[52] future_1.23.0 zlibbioc_1.40.0 MASS_7.3-55

[55] scales_1.1.1 ipred_0.9-12 MatrixGenerics_1.6.0

[58] hms_1.1.1 SummarizedExperiment_1.24.0 parallel_4.1.2

[61] yaml_2.2.2 curl_4.3.2 memoise_2.0.1

[64] biomaRt_2.50.3 rpart_4.1-15 stringi_1.7.6

[67] RSQLite_2.2.9 BiocIO_1.4.0 desc_1.4.0

[70] foreach_1.5.2 filelock_1.0.2 BiocParallel_1.28.3

[73] pkgbuild_1.3.1 lava_1.6.10 matrixStats_0.61.0

[76] rlang_1.0.1 pkgconfig_2.0.3 bitops_1.0-7

[79] purrr_0.3.4 GenomicAlignments_1.30.0 recipes_0.1.17

[82] cowplot_1.1.1 bit_4.0.4 processx_3.5.2

[85] tidyselect_1.1.1 parallelly_1.30.0 gbm_2.1.8

[88] plyr_1.8.6 magrittr_2.0.2 R6_2.5.1

[91] generics_0.1.2 DelayedArray_0.20.0 DBI_1.1.2

[94] pillar_1.7.0 withr_2.4.3 survival_3.2-13

[97] KEGGREST_1.34.0 RCurl_1.98-1.5 nnet_7.3-16

[100] tibble_3.1.6 future.apply_1.8.1 crayon_1.4.2

[103] utf8_1.2.2 BiocFileCache_2.2.1 progress_1.2.2

[106] grid_4.1.2 blob_1.2.2 callr_3.7.0

[109] ModelMetrics_1.2.2.2 digest_0.6.29 munsell_0.5.0

[112] sessioninfo_1.2.2

branchpointer • 140 views
ADD COMMENT

Login before adding your answer.

Traffic: 622 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6