Entering edit mode
Hermann Norpois
▴
110
@hermann-norpois-5483
Last seen 10.2 years ago
I have data as GRange object
test.gr
GRanges with 20 ranges and 0 metadata columns:
seqnames ranges strand
<rle> <iranges> <rle>
[1] chr1 [ 10246, 10283] *
[2] chr1 [237718, 237887] *
[3] chr1 [521553, 521613] *
[4] chr1 [564413, 564686] *
or as classical dataframe
head (test)
seqnames start end width strand
1 chr1 10246 10283 38 *
2 chr1 237718 237887 170 *
3 chr1 521553 521613 61 *
4 chr1 564413 564686 274 *
and I wish to count how often do intervals of a certain range exist. I
would be happy to get something like this:
width occurence of the width sum of the occurence
0-50 2
2
50-100 4
6
100-150 5
11
I would prefer to do this using GRange objects directly but I would be
happy to know a way how to do it via test (classical dataframe).
Could you please give me a hint.
Thanks Hermann
P.S.: My data:
> dput (test)
structure(list(seqnames = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label =
c("chr1",
"chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16",
"chr17", "chr18", "chr19", "chr2", "chr20", "chr21", "chr22",
"chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chrX",
"chrY"), class = "factor"), start = c(10246L, 237718L, 521553L,
564413L, 565252L, 566538L, 567450L, 568777L, 569457L, 669190L,
713884L, 740270L, 752164L, 753266L, 754421L, 762051L, 785811L,
786878L, 791087L, 793485L), end = c(10283L, 237887L, 521613L,
564686L, 566081L, 567275L, 568583L, 569302L, 570125L, 669259L,
714603L, 740423L, 752782L, 753697L, 754711L, 763151L, 786025L,
787053L, 791138L, 793582L), width = c(38L, 170L, 61L, 274L, 830L,
738L, 1134L, 526L, 669L, 70L, 720L, 154L, 619L, 432L, 291L, 1101L,
215L, 176L, 52L, 98L), strand = structure(c(3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label =
c("+",
"-", "*"), class = "factor")), .Names = c("seqnames", "start",
"end", "width", "strand"), row.names = c(NA, 20L), class =
"data.frame")
> dput test.gr)
new("GRanges"
, seqnames = new("Rle"
, values = structure(1L, .Label = c("chr1", "chr10", "chr11",
"chr12",
"chr13",
"chr14", "chr15", "chr16", "chr17", "chr18", "chr19", "chr2",
"chr20", "chr21", "chr22", "chr3", "chr4", "chr5", "chr6", "chr7",
"chr8", "chr9", "chrX", "chrY"), class = "factor")
, lengths = 20L
, elementMetadata = NULL
, metadata = list()
)
, ranges = new("IRanges"
, start = c(10246L, 237718L, 521553L, 564413L, 565252L, 566538L,
567450L,
568777L, 569457L, 669190L, 713884L, 740270L, 752164L, 753266L,
754421L, 762051L, 785811L, 786878L, 791087L, 793485L)
, width = c(38L, 170L, 61L, 274L, 830L, 738L, 1134L, 526L, 669L,
70L,
720L,
154L, 619L, 432L, 291L, 1101L, 215L, 176L, 52L, 98L)
, NAMES = NULL
, elementType = "integer"
, elementMetadata = NULL
, metadata = list()
)
, strand = new("Rle"
, values = structure(3L, .Label = c("+", "-", "*"), class =
"factor")
, lengths = 20L
, elementMetadata = NULL
, metadata = list()
)
, elementMetadata = new("DataFrame"
, rownames = NULL
, nrows = 20L
, listData = structure(list(), .Names = character(0))
, elementType = "ANY"
, elementMetadata = NULL
, metadata = list()
)
, seqinfo = new("Seqinfo"
, seqnames = c("chr1", "chr10", "chr11", "chr12", "chr13",
"chr14",
"chr15",
"chr16", "chr17", "chr18", "chr19", "chr2", "chr20", "chr21",
"chr22", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9",
"chrX", "chrY")
, seqlengths = c(NA_integer_, NA_integer_, NA_integer_,
NA_integer_,
NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_)
, is_circular = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA,
NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA)
, genome = c(NA_character_, NA_character_, NA_character_,
NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_
)
)
, metadata = list()
)
>
[[alternative HTML version deleted]]