Question

Using readBAM function in segmentSeq

0

Entering edit mode

james perkins ▴ 300

@james-perkins-2675

Last seen 11.5 years ago

Dear Thomas, I want to use the seqmentSeq package to look for intergenic expression (lincRNAs etc.) above background. I have case and control samples, with 3 biological replicates. I have bam files (produced using bowtie and samtools) that read in fine with RSamtools. > xx <- readBamGappedAlignments("../../raw/file.sorted.bam") However I can't get it to read the files in using readBAM in the segmentSeq package: > zz <- readBAM("../../raw/file.sorted.bam", replicates=1, chrs="chr1",chrlens=267910886) Reading files...Error in .Call2("solve_user_SEW0", start, end, width, PACKAGE = "IRanges") : solving row 9146840: range cannot be determined from the supplied arguments (too many NAs) Am I missing an argument/doing something else wrong? I hope this is enough information for you to go on, please let me know if not. Many thanks! Jim > traceback() 8: .Call(.NAME, ..., PACKAGE = PACKAGE) 7: .Call2("solve_user_SEW0", start, end, width, PACKAGE = "IRanges") 6: solveUserSEW0(start = start, end = end, width = width) 5: IRanges(start = as.integer(tags$pos), width = tags$qwidth) 4: FUN(1L[[1L]], ...) 3: lapply(sampleNumbers, function(ii) { tags <- scanBam(files[ii])[[1]] if (!is.null(countID)) { counts <- Rle(scanBam(files[ii], param = ScanBamParam(tag = countID))[[1]][[1]][[1]]) } else counts <- Rle(rep(1L, length(tags$seq))) ir <- IRanges(start = as.integer(tags$pos), width = tags$qwidth) aln <- GRanges(seqnames = tags$rname, ir, strand = tags$strand, tag = Rle(as.character(tags$seq)), count = counts) aln <- aln[as.integer(seqnames(aln)) %in% which(seqlevels(aln) %in% chrs), ] if (!is.null(polyLength)) { polyBaseRemove <- unique(unlist(lapply(polyBase, grep, as.character(values(aln)$tag)))) if (length(polyBaseRemove) > 0) aln <- aln[-polyBaseRemove, ] } if (is.null(countID)) { aln <- aln[order(as.factor(seqnames(aln)), as.integer(start(aln)), as.character(values(aln)$tag)), ] dupTags <- which(!(as.character(seqnames(aln)) == c(as.character(seqnames(aln))[-1], "!") & start(aln) == c(start(aln)[-1], Inf) & end(aln) == c(end(aln)[-1], Inf) & as.character(strand(aln)) == c(as.character(strand(aln))[-1], "!") & as.character(values(aln)$tag) == as.character(c(values(aln)$tag[-1], "!")))) aln <- aln[dupTags, ] values(aln)$count <- diff(c(0, dupTags)) } message(".", appendLF = FALSE) aln }) 2: lapply(sampleNumbers, function(ii) { tags <- scanBam(files[ii])[[1]] if (!is.null(countID)) { counts <- Rle(scanBam(files[ii], param = ScanBamParam(tag = countID))[[1]][[1]][[1]]) } else counts <- Rle(rep(1L, length(tags$seq))) ir <- IRanges(start = as.integer(tags$pos), width = tags$qwidth) aln <- GRanges(seqnames = tags$rname, ir, strand = tags$strand, tag = Rle(as.character(tags$seq)), count = counts) aln <- aln[as.integer(seqnames(aln)) %in% which(seqlevels(aln) %in% chrs), ] if (!is.null(polyLength)) { polyBaseRemove <- unique(unlist(lapply(polyBase, grep, as.character(values(aln)$tag)))) if (length(polyBaseRemove) > 0) aln <- aln[-polyBaseRemove, ] } if (is.null(countID)) { aln <- aln[order(as.factor(seqnames(aln)), as.integer(start(aln)), as.character(values(aln)$tag)), ] dupTags <- which(!(as.character(seqnames(aln)) == c(as.character(seqnames(aln))[-1], "!") & start(aln) == c(start(aln)[-1], Inf) & end(aln) == c(end(aln)[-1], Inf) & as.character(strand(aln)) == c(as.character(strand(aln))[-1], "!") & as.character(values(aln)$tag) == as.character(c(values(aln)$tag[-1], "!")))) aln <- aln[dupTags, ] values(aln)$count <- diff(c(0, dupTags)) } message(".", appendLF = FALSE) aln }) 1: readBAM("../../raw/file.sorted.bam", replicates = 1, chrs = "chr1", chrlens = 267910886) > sessionInfo() R version 2.15.0 (2012-03-30) Platform: x86_64-unknown-linux-gnu (64-bit) locale: [1] LC_CTYPE=en_US LC_NUMERIC=C [3] LC_TIME=en_GB.UTF-8 LC_COLLATE=en_GB.UTF-8 [5] LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8 [7] LC_PAPER=C LC_NAME=C [9] LC_ADDRESS=C LC_TELEPHONE=C [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C attached base packages: [1] stats graphics grDevices utils datasets methods base other attached packages: [1] segmentSeq_1.8.0 ShortRead_1.14.4 latticeExtra_0.6-19 [4] RColorBrewer_1.0-5 Rsamtools_1.8.5 lattice_0.20-6 [7] Biostrings_2.24.1 GenomicRanges_1.8.7 IRanges_1.14.4 [10] BiocGenerics_0.2.0 baySeq_1.10.0 loaded via a namespace (and not attached): [1] Biobase_2.16.0 bitops_1.0-4.1 grid_2.15.0 hwriter_1.3 stats4_2.15.0 [6] zlibbioc_1.2.0

GO Rsamtools GO Rsamtools • 1.9k views

ADD COMMENT • link updated 13.6 years ago by Thomas J Hardcastle ▴ 180 • written 13.6 years ago by james perkins ▴ 300

score 0 · Answer 1 · 2012-07-05

Hi James, My tentative guess is that the problem is that scanBam (IRanges), which the readBAM function is using, doesn't like your file for some reason. Could you try scanBam("../../raw/file.sorted.bam") and see if you get the same error? If you do, hopefully someone from the IRanges team will be able to explain why. Cheers, Tom On 03/07/2012 11:58, James Perkins wrote: > Dear Thomas, > > I want to use the seqmentSeq package to look for intergenic expression > (lincRNAs etc.) above background. > > I have case and control samples, with 3 biological replicates. > > I have bam files (produced using bowtie and samtools) that read in > fine with RSamtools. > >> xx <- readBamGappedAlignments("../../raw/file.sorted.bam") > However I can't get it to read the files in using readBAM in the > segmentSeq package: > >> zz <- readBAM("../../raw/file.sorted.bam", replicates=1, chrs="chr1",chrlens=267910886) > Reading files...Error in .Call2("solve_user_SEW0", start, end, width, > PACKAGE = "IRanges") : > solving row 9146840: range cannot be determined from the supplied > arguments (too many NAs) > > Am I missing an argument/doing something else wrong? I hope this is > enough information for you to go on, please let me know if not. > > Many thanks! > > Jim > >> traceback() > 8: .Call(.NAME, ..., PACKAGE = PACKAGE) > 7: .Call2("solve_user_SEW0", start, end, width, PACKAGE = "IRanges") > 6: solveUserSEW0(start = start, end = end, width = width) > 5: IRanges(start = as.integer(tags$pos), width = tags$qwidth) > 4: FUN(1L[[1L]], ...) > 3: lapply(sampleNumbers, function(ii) { > tags <- scanBam(files[ii])[[1]] > if (!is.null(countID)) { > counts <- Rle(scanBam(files[ii], param = ScanBamParam(tag = > countID))[[1]][[1]][[1]]) > } > else counts <- Rle(rep(1L, length(tags$seq))) > ir <- IRanges(start = as.integer(tags$pos), width = tags$qwidth) > aln <- GRanges(seqnames = tags$rname, ir, strand = tags$strand, > tag = Rle(as.character(tags$seq)), count = counts) > aln <- aln[as.integer(seqnames(aln)) %in% which(seqlevels(aln) %in% > chrs), ] > if (!is.null(polyLength)) { > polyBaseRemove <- unique(unlist(lapply(polyBase, grep, > as.character(values(aln)$tag)))) > if (length(polyBaseRemove) > 0) > aln <- aln[-polyBaseRemove, ] > } > if (is.null(countID)) { > aln <- aln[order(as.factor(seqnames(aln)), as.integer(start(aln)), > as.character(values(aln)$tag)), ] > dupTags <- which(!(as.character(seqnames(aln)) == > c(as.character(seqnames(aln))[-1], > "!") & start(aln) == c(start(aln)[-1], Inf) & end(aln) == > c(end(aln)[-1], Inf) & as.character(strand(aln)) == > c(as.character(strand(aln))[-1], "!") & > as.character(values(aln)$tag) == > as.character(c(values(aln)$tag[-1], "!")))) > aln <- aln[dupTags, ] > values(aln)$count <- diff(c(0, dupTags)) > } > message(".", appendLF = FALSE) > aln > }) > 2: lapply(sampleNumbers, function(ii) { > tags <- scanBam(files[ii])[[1]] > if (!is.null(countID)) { > counts <- Rle(scanBam(files[ii], param = ScanBamParam(tag = > countID))[[1]][[1]][[1]]) > } > else counts <- Rle(rep(1L, length(tags$seq))) > ir <- IRanges(start = as.integer(tags$pos), width = tags$qwidth) > aln <- GRanges(seqnames = tags$rname, ir, strand = tags$strand, > tag = Rle(as.character(tags$seq)), count = counts) > aln <- aln[as.integer(seqnames(aln)) %in% which(seqlevels(aln) %in% > chrs), ] > if (!is.null(polyLength)) { > polyBaseRemove <- unique(unlist(lapply(polyBase, grep, > as.character(values(aln)$tag)))) > if (length(polyBaseRemove) > 0) > aln <- aln[-polyBaseRemove, ] > } > if (is.null(countID)) { > aln <- aln[order(as.factor(seqnames(aln)), as.integer(start(aln)), > as.character(values(aln)$tag)), ] > dupTags <- which(!(as.character(seqnames(aln)) == > c(as.character(seqnames(aln))[-1], > "!") & start(aln) == c(start(aln)[-1], Inf) & end(aln) == > c(end(aln)[-1], Inf) & as.character(strand(aln)) == > c(as.character(strand(aln))[-1], "!") & > as.character(values(aln)$tag) == > as.character(c(values(aln)$tag[-1], "!")))) > aln <- aln[dupTags, ] > values(aln)$count <- diff(c(0, dupTags)) > } > message(".", appendLF = FALSE) > aln > }) > 1: readBAM("../../raw/file.sorted.bam", > replicates = 1, chrs = "chr1", chrlens = 267910886) > >> sessionInfo() > R version 2.15.0 (2012-03-30) > Platform: x86_64-unknown-linux-gnu (64-bit) > > locale: > [1] LC_CTYPE=en_US LC_NUMERIC=C > [3] LC_TIME=en_GB.UTF-8 LC_COLLATE=en_GB.UTF-8 > [5] LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8 > [7] LC_PAPER=C LC_NAME=C > [9] LC_ADDRESS=C LC_TELEPHONE=C > [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C > > attached base packages: > [1] stats graphics grDevices utils datasets methods base > > other attached packages: > [1] segmentSeq_1.8.0 ShortRead_1.14.4 latticeExtra_0.6-19 > [4] RColorBrewer_1.0-5 Rsamtools_1.8.5 lattice_0.20-6 > [7] Biostrings_2.24.1 GenomicRanges_1.8.7 IRanges_1.14.4 > [10] BiocGenerics_0.2.0 baySeq_1.10.0 > > loaded via a namespace (and not attached): > [1] Biobase_2.16.0 bitops_1.0-4.1 grid_2.15.0 hwriter_1.3 stats4_2.15.0 > [6] zlibbioc_1.2.0 -- Dr. Thomas J. Hardcastle Department of Plant Sciences University of Cambridge Downing Street Cambridge, CB2 3EA United Kingdom