Question: GenomicFeatures::getPromoterSeq error when selecting all promoters sequences (in mice)
0
gravatar for branislav misovic
4.7 years ago by
Netherlands/Amsterdam
branislav misovic120 wrote:

hi ,

  When running the search for all promoters sequences  in mice , with  1000 bp  up /downstream, parameter the getPromoterSeq  works fine,  but increasing the up/downstream parameter to say 5000  or 2000 it errors  (log bellow):

Any advice appreciated .
Branko
-------------------------------------------

library(GenomicFeatures)
library(BSgenome.Mmusculus.UCSC.mm10)
library(TxDb.Mmusculus.UCSC.mm10.knownGene)  

chr.loc = transcriptsBy (TxDb.Mmusculus.UCSC.mm10.knownGene, by = "gene")
system.time ( prom.all <- getPromoterSeq(chr.loc, Mmusculus, upstream=2000, downstream=1000))

Error in loadFUN(x, seqname, ranges) :
  trying to load regions beyond the boundaries of non-circular sequence "chr4_JH584294_random"

In addition: Warning messages:
1: In valid.GenomicRanges.seqinfo(x, suggest.trim = TRUE) :
  GRanges object contains 1 out-of-bound range located on sequence
  chr4_JH584294_random. Note that only ranges located on a non-circular

........
--------------------

14: stop("trying to load regions beyond the boundaries ", "of non-circular sequence \"",
        seqname, "\"")
13: loadFUN(x, seqname, ranges)
12: loadFUN(x, seqname, ranges)
11: loadSubseqsFromStrandedSequence(x@single_sequences, seqname,
        ranges(gr), strand(gr), is_circular)
10: FUN(1:35[[27L]], ...)
9: lapply(seq_len(length(grl)), function(i) {
       gr <- grl[[i]]
       if (length(gr) == 0L)
           return(DNAStringSet())
       seqlevel <- grl_seqlevels[i]
       is_circular <- isCircular(x)[[seqlevel]]
       idx <- match(seqlevel, x@user_seqnames)
       if (is.na(idx))
           stop("invalid sequence name: ", seqlevel)
       seqname <- names(x@user_seqnames)[[idx]]
       if (is.null(snplocs(x, seqname))) {
           subject <- try(get(seqname, envir = x@.seqs_cache, inherits = FALSE),
               silent = TRUE)
           if (is(subject, "try-error")) {
               ans <- loadSubseqsFromStrandedSequence(x@single_sequences,
                   seqname, ranges(gr), strand(gr), is_circular)
               return(ans)
           }
           .linkToCachedObject(subject) <- .newLinkToCachedObject(seqname,
               x@.seqs_cache, x@.link_counts)
       }
       else {
           subject <- x[[seqlevel]]
       }
       masks(subject) <- NULL
       loadSubseqsFromStrandedSequence(subject, seqlevel, ranges(gr),
           strand(gr), is_circular)
   })
8: lapply(seq_len(length(grl)), function(i) {
       gr <- grl[[i]]
       if (length(gr) == 0L)
           return(DNAStringSet())
       seqlevel <- grl_seqlevels[i]
       is_circular <- isCircular(x)[[seqlevel]]
       idx <- match(seqlevel, x@user_seqnames)
       if (is.na(idx))
           stop("invalid sequence name: ", seqlevel)
       seqname <- names(x@user_seqnames)[[idx]]
       if (is.null(snplocs(x, seqname))) {
           subject <- try(get(seqname, envir = x@.seqs_cache, inherits = FALSE),
               silent = TRUE)
           if (is(subject, "try-error")) {
               ans <- loadSubseqsFromStrandedSequence(x@single_sequences,
                   seqname, ranges(gr), strand(gr), is_circular)
               return(ans)
           }
           .linkToCachedObject(subject) <- .newLinkToCachedObject(seqname,
               x@.seqs_cache, x@.link_counts)
       }
       else {
           subject <- x[[seqlevel]]
       }
       masks(subject) <- NULL
       loadSubseqsFromStrandedSequence(subject, seqlevel, ranges(gr),
           strand(gr), is_circular)
   })
7: .extractFromBSgenomeSingleSequences(x, sseq_args$names, sseq_args$start,
       sseq_args$end, sseq_args$width, sseq_args$strand)
6: .local(x, ...)
5: getSeq(subject, promoter.granges)
4: getSeq(subject, promoter.granges)
3: getPromoterSeq(chr.loc, Mmusculus, upstream = 2000, downstream = 1000)
2: getPromoterSeq(chr.loc, Mmusculus, upstream = 2000, downstream = 1000)
1: system.time(prom.all <- getPromoterSeq(chr.loc, Mmusculus, upstream = 2000,
       downstream = 1000))


---------------------------------------------

> sessionInfo()
R version 3.1.1 (2014-07-10)
Platform: x86_64-pc-linux-gnu (64-bit)

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C
 [9] LC_ADDRESS=C               LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C

attached base packages:
[1] stats4    parallel  stats     graphics  grDevices utils     datasets
[8] methods   base

other attached packages:
 [1] TxDb.Mmusculus.UCSC.mm10.knownGene_3.0.0
 [2] BSgenome.Mmusculus.UCSC.mm10_1.4.0
 [3] BSgenome_1.34.0
 [4] rtracklayer_1.26.2
 [5] Biostrings_2.34.0
 [6] XVector_0.6.0
 [7] org.Mm.eg.db_3.0.0
 [8] RSQLite_1.0.0
 [9] DBI_0.3.1
[10] GenomicFeatures_1.18.2
[11] AnnotationDbi_1.28.1
[12] Biobase_2.26.0
[13] GenomicRanges_1.18.1
[14] GenomeInfoDb_1.2.3
[15] IRanges_2.0.0
[16] S4Vectors_0.4.0
[17] BiocGenerics_0.12.1
[18] BiocInstaller_1.16.1

loaded via a namespace (and not attached):
 [1] base64enc_0.1-2         BatchJobs_1.5           BBmisc_1.8
 [4] BiocParallel_1.0.0      biomaRt_2.22.0          bitops_1.0-6
 [7] brew_1.0-6              checkmate_1.5.0         codetools_0.2-9
[10] digest_0.6.4            fail_1.2                foreach_1.4.2
[13] GenomicAlignments_1.2.1 iterators_1.0.7         RCurl_1.95-4.3
[16] Rsamtools_1.18.2        sendmailR_1.2-1         stringr_0.6.2
[19] tools_3.1.1             XML_3.98-1.1            zlibbioc_1.12.0

 

ADD COMMENTlink modified 4.7 years ago by James W. MacDonald50k • written 4.7 years ago by branislav misovic120
Answer: GenomicFeatures::getPromoterSeq error when selecting all promoters sequences (i
1
gravatar for James W. MacDonald
4.7 years ago by
United States
James W. MacDonald50k wrote:

It depends on if you care about all the unplaced haplotypes. If not, just nuke them and go forward.

> trs <- transcriptsBy(TxDb.Mmusculus.UCSC.mm10.knownGene, by = "gene")
> trs <- keepStandardChromosomes(trs)

> prom <- getPromoterSeq(trs, Mmusculus, upstream = 2e3, downstream=1e3)

> prom
DNAStringSetList of length 23696
[["100009600"]] CTTCTTCTTTTTCGAGACAGGGTTTCTCTGTGTAGCCCTGGCTATCAACTCAGAAATCCGC...
[["100009609"]] CACCTCACACCTGTCAGAATGGCTAGGATCAAAAATTCAGGTGACAGCAGATGCTGGTGAG...
[["100009614"]] TGGGTGTCTCATTAGCTCTTTGTATAGCTTCTGAGAGTCATAAAGCCTATATGTCAAAACT...
[["100009664"]] CACCTACGGGACTAAATAAAGAGTCAGCACTTTTTGGCAAAAGCAGCCTAAGTCCCTGTTT...
[["100012"]] GTTGTCTTACTAAAGCAAACACTAGGAAGATTGTTTTCCTGAGGCAGGCCAAGGTGGAAAAAGT...
[["100017"]] TGCCTGCCCCTCTTTGACTTATTGATGAGGCCTGCTTCTGGGGAGCAAGATAAGGATGTGGACA...
[["100019"]] AGAAAGTAAAATTAAAGAATGAGTAAGCTGAAAAATCAGGTCAAGAAACTCCCAAAAGGAACCC...
[["100033459"]] AATTTCTGTATCCATTCTTCTGTCATGGGACATCTGGGTTGTTTCCAGCTTTGGGCTATCA...
[["100034251"]] GTCTGAGTGAGGGACACTATAAGAAGTGCACATCCAATCAGCAATGAGCTAATGAGCAGAG...
[["100034361"]] TGCAGGATCTAAATCAGCACTTACACTGGATCCTGTAATATTGAGCCCAGTCTACTCCTCC...
...
<23686 more elements>

 

ADD COMMENTlink written 4.7 years ago by James W. MacDonald50k

thank you ,  also for the lesson about unplaced ones ...

ADD REPLYlink written 4.6 years ago by branislav misovic120
Please log in to add an answer.

Help
Access

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.
Powered by Biostar version 16.09
Traffic: 281 users visited in the last hour