When using mapToAlignments() with multiple GAlignments, it appears to terminate if none of the query GRanges map to a GAlignment.
library(GenomicAlignments)
alignments <- GAlignments(
c("chr1", "chr1"),
c(20L, 10L),
c("11M", "11M"),
strand(c("*", "*")),
names=c("read_A", "read_B")
)
x <- GRanges("chr1", IRanges(c(12, 12), width=c(6, 20)))
mapToAlignments(x, alignments) ## Empty output
However if we reverse the order
mapToAlignments(x, alignments[c(2, 1)])
# GAlignments object with 1 alignment and 0 metadata columns:
# seqnames strand cigar qwidth start end width njunc
# <Rle> <Rle> <character> <integer> <integer> <integer> <integer> <integer>
# read_B chr1 * 11M 11 10 20 11 0
# -------
# seqinfo: 1 sequence from an unspecified genome; no seqlengths
The behaviour appears to be that if a GAlignment cannot be mapped to by any of the GRanges, then function terminates and returns the results obtained so far.
Is this the intended behaviour of the function? It makes it very difficult to use as a batch request, because there's no efficient way to figure out ahead of time which reads will cause the function to prematurely terminate. The fact it terminates silently is also dangerous, because if the offending read occurred half way down a large query then it would be very difficult for a user to realise what happened.
EDIT: I found another strange termination issue shown here:
library(GenomicAlignments) | |
alignments <- GAlignments( | |
seqnames = Rle(factor(rep("chrX", 3))), | |
pos = c(37701723L, 37753335L, 37753335L), | |
cigar = c( | |
"24H6M1I13M2D5M1D12M1I16M3I6M2D20M1D18M1I34M2I116M2D9M1D9M1I30M1D9M1D44M6I3M2I4M2D19M9D7M1I5M1D18M1I17M2D1M1D16M1D32M1D3M1D18M1I3M1D49M2D34M2D8M1D7M2D1M1D5M1I2M5D4M1D2M1I3M2D10M1I21M1D20M2D1M1D2M1D18M2I3M2D74M1D9M1D5M2D34M1D12M2D20M1I2M1I35M1D10M2D16M1I55M3D82M1D6M1I65M2I8M1I30M1D7M2I15M1D15M1D20M1I2M1I14M1D29M3D12M2D3M1D5M1D20M1D21M2D56M1D7M4D2M1I60M2I9M1D49M2I5M2I28M13206H", | |
"6994H26M2I6M1I1M1I50M1D15M2I4M4D35M1D9M1D5M2D37M1I54M1D59M3I27M1I3M1D5M1I8M1I4M2I10M1I47M1D25M1I84M1D8M7D8M1D69M1I23M3D6M1D1M1D11M2D8M1I17M1D43M1I17M1D60M1I1M3D17M1D17M2D1M1D4M1I12M1D28M1D13M1D7M2I44M5D30M1D2M2D1M1D20M1D23M1I7M1D22M1D3M2D73M1I4M5D23M3D4M2D23M1D13M1D9M3D12M1I9M1D11M1D1M3I31M2I56M1D5M6461H", | |
"308H103M1D16M2D47M1D1M1D18M1D36M1I26M3I51M1D2M1I8M1D9M1D17M3I53M1D15M1D7M3I22M1D60M3I10M2D20M7D8M1D65M2I16M1D12M1D6M1D11M3D4M1D4M1I8M1I31M3I7M2I5M1I10M1D16M2D9M1D10M4I11M1D107M2D22M1I18M1D40M1I1M2D13M1I15M1D15M1D5M1I31M1D6M1D12M1D21M1I52M1D14M2D6M2I17M3D4M1D27M1I11M1D9M1D2M1D5M1D14M1D4M1D3M1D73M1D30M1D9M11706H" | |
), | |
strand = Rle(factor(rep("+", 3), levels = c("+","-", "*"))), | |
names = c("read1", "read2", "read3") | |
) | |
variants <- GRanges( | |
seqnames = Rle(factor(rep("chrX", 6))), | |
ranges = IRanges( | |
start = c( | |
37701926L, | |
37702423L, | |
37703182L, | |
37703236L, | |
37753664L, | |
37753942L | |
), | |
width = 1 | |
), | |
strand = Rle(factor(rep("*", 6), levels = c("+","-", "*"))), | |
) | |
names(variants) <- c("read1", "read2", "read3", "read4", "read5", "read6") | |
mapToAlignments(variants, alignments) # only returns mapping to first alignment | |
mapToAlignments(variants, alignments[2:3]) # returns mapping for 2nd and 3rd alignments |
Hi Shian, Thanks for the report and clear example. I'll look into it. Valerie