Entering edit mode
hi ,
When running the search for all promoters sequences in mice , with 1000 bp up /downstream, parameter the getPromoterSeq works fine, but increasing the up/downstream parameter to say 5000 or 2000 it errors (log bellow):
Any advice appreciated .
Branko
-------------------------------------------
library(GenomicFeatures)
library(BSgenome.Mmusculus.UCSC.mm10)
library(TxDb.Mmusculus.UCSC.mm10.knownGene)
chr.loc = transcriptsBy (TxDb.Mmusculus.UCSC.mm10.knownGene, by = "gene")
system.time ( prom.all <- getPromoterSeq(chr.loc, Mmusculus, upstream=2000, downstream=1000))
Error in loadFUN(x, seqname, ranges) :
trying to load regions beyond the boundaries of non-circular sequence "chr4_JH584294_random"
In addition: Warning messages:
1: In valid.GenomicRanges.seqinfo(x, suggest.trim = TRUE) :
GRanges object contains 1 out-of-bound range located on sequence
chr4_JH584294_random. Note that only ranges located on a non-circular
........
--------------------
14: stop("trying to load regions beyond the boundaries ", "of non-circular sequence \"",
seqname, "\"")
13: loadFUN(x, seqname, ranges)
12: loadFUN(x, seqname, ranges)
11: loadSubseqsFromStrandedSequence(x@single_sequences, seqname,
ranges(gr), strand(gr), is_circular)
10: FUN(1:35[[27L]], ...)
9: lapply(seq_len(length(grl)), function(i) {
gr <- grl[[i]]
if (length(gr) == 0L)
return(DNAStringSet())
seqlevel <- grl_seqlevels[i]
is_circular <- isCircular(x)[[seqlevel]]
idx <- match(seqlevel, x@user_seqnames)
if (is.na(idx))
stop("invalid sequence name: ", seqlevel)
seqname <- names(x@user_seqnames)[[idx]]
if (is.null(snplocs(x, seqname))) {
subject <- try(get(seqname, envir = x@.seqs_cache, inherits = FALSE),
silent = TRUE)
if (is(subject, "try-error")) {
ans <- loadSubseqsFromStrandedSequence(x@single_sequences,
seqname, ranges(gr), strand(gr), is_circular)
return(ans)
}
.linkToCachedObject(subject) <- .newLinkToCachedObject(seqname,
x@.seqs_cache, x@.link_counts)
}
else {
subject <- x[[seqlevel]]
}
masks(subject) <- NULL
loadSubseqsFromStrandedSequence(subject, seqlevel, ranges(gr),
strand(gr), is_circular)
})
8: lapply(seq_len(length(grl)), function(i) {
gr <- grl[[i]]
if (length(gr) == 0L)
return(DNAStringSet())
seqlevel <- grl_seqlevels[i]
is_circular <- isCircular(x)[[seqlevel]]
idx <- match(seqlevel, x@user_seqnames)
if (is.na(idx))
stop("invalid sequence name: ", seqlevel)
seqname <- names(x@user_seqnames)[[idx]]
if (is.null(snplocs(x, seqname))) {
subject <- try(get(seqname, envir = x@.seqs_cache, inherits = FALSE),
silent = TRUE)
if (is(subject, "try-error")) {
ans <- loadSubseqsFromStrandedSequence(x@single_sequences,
seqname, ranges(gr), strand(gr), is_circular)
return(ans)
}
.linkToCachedObject(subject) <- .newLinkToCachedObject(seqname,
x@.seqs_cache, x@.link_counts)
}
else {
subject <- x[[seqlevel]]
}
masks(subject) <- NULL
loadSubseqsFromStrandedSequence(subject, seqlevel, ranges(gr),
strand(gr), is_circular)
})
7: .extractFromBSgenomeSingleSequences(x, sseq_args$names, sseq_args$start,
sseq_args$end, sseq_args$width, sseq_args$strand)
6: .local(x, ...)
5: getSeq(subject, promoter.granges)
4: getSeq(subject, promoter.granges)
3: getPromoterSeq(chr.loc, Mmusculus, upstream = 2000, downstream = 1000)
2: getPromoterSeq(chr.loc, Mmusculus, upstream = 2000, downstream = 1000)
1: system.time(prom.all <- getPromoterSeq(chr.loc, Mmusculus, upstream = 2000,
downstream = 1000))
---------------------------------------------
> sessionInfo()
R version 3.1.1 (2014-07-10)
Platform: x86_64-pc-linux-gnu (64-bit)
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] stats4 parallel stats graphics grDevices utils datasets
[8] methods base
other attached packages:
[1] TxDb.Mmusculus.UCSC.mm10.knownGene_3.0.0
[2] BSgenome.Mmusculus.UCSC.mm10_1.4.0
[3] BSgenome_1.34.0
[4] rtracklayer_1.26.2
[5] Biostrings_2.34.0
[6] XVector_0.6.0
[7] org.Mm.eg.db_3.0.0
[8] RSQLite_1.0.0
[9] DBI_0.3.1
[10] GenomicFeatures_1.18.2
[11] AnnotationDbi_1.28.1
[12] Biobase_2.26.0
[13] GenomicRanges_1.18.1
[14] GenomeInfoDb_1.2.3
[15] IRanges_2.0.0
[16] S4Vectors_0.4.0
[17] BiocGenerics_0.12.1
[18] BiocInstaller_1.16.1
loaded via a namespace (and not attached):
[1] base64enc_0.1-2 BatchJobs_1.5 BBmisc_1.8
[4] BiocParallel_1.0.0 biomaRt_2.22.0 bitops_1.0-6
[7] brew_1.0-6 checkmate_1.5.0 codetools_0.2-9
[10] digest_0.6.4 fail_1.2 foreach_1.4.2
[13] GenomicAlignments_1.2.1 iterators_1.0.7 RCurl_1.95-4.3
[16] Rsamtools_1.18.2 sendmailR_1.2-1 stringr_0.6.2
[19] tools_3.1.1 XML_3.98-1.1 zlibbioc_1.12.0

thank you , also for the lesson about unplaced ones ...