Question

importing bigwig file: "sequence names not known"

0

Entering edit mode

liz.ingsimmons ▴ 140

@lizingsimmons-6935

Last seen 4.1 years ago

Germany

I want to use rtracklayer to import data from a bigwig file within certain ranges, but I get an error when the bigwig file does not contain data for chrY and chrY is present in the seqlevels of the GRanges object I'm using.

I realise that this is HeLa data, so chrY should have no data on it anyway. I also realise that it's a good idea to tell users when the seqlevels of their "which" ranges and the data they're trying to import don't match, in case it's entirely the wrong genome... I don't understand why it's an error to have "which" ranges where the seqlevels include a chromosome which isn't present in the bigwig file, even if none of the ranges are on that chromosome -- should this be a warning instead?

Example:

#bigwig file from ENCODE, 
#available here: http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeSydhTfbs/

fn <- "wgEncodeSydhTfbsHelas3Rad21IggrabSig.bw"

#dput of first ten ranges of my real data

test_gr <- new("GRanges"
    , seqnames = new("Rle"
                     , values = structure(1L, .Label = c("chr1", "chr2", "chr3", "chr4", "chr5",
                                                         "chr6", "chr7", "chr8", "chr9", "chr10", "chr11", "chr12", "chr13",
                                                         "chr14", "chr15", "chr16", "chr17", "chr18", "chr19", "chr20",
                                                         "chr21", "chr22", "chrX", "chrY"), class = "factor")
                     , lengths = 10L
                     , elementMetadata = NULL
                     , metadata = list()
    )
    , ranges = new("IRanges"
                   , start = c(5295L, 950781L, 1297502L, 1685479L, 1690824L, 1709330L, 1835632L,
                               1845935L, 1895474L, 1976700L)
                   , width = c(10000L, 10000L, 10000L, 10000L, 10000L, 10000L, 10000L, 10000L,
                               10000L, 10000L)
                   , NAMES = NULL
                   , elementType = "integer"
                   , elementMetadata = NULL
                   , metadata = list()
    )
    , strand = new("Rle"
                   , values = structure(3L, .Label = c("+", "-", "*"), class = "factor")
                   , lengths = 10L
                   , elementMetadata = NULL
                   , metadata = list()
    )
    , elementMetadata = new("DataFrame"
                            , rownames = NULL
                            , nrows = 10L
                            , listData = structure(list(length = c(940L, 3071L, 2534L, 1254L, 1003L, 1106L,
                                                                   1908L, 1798L, 2210L, 2375L), summit = c(226L, 955L, 1706L, 540L,
                                                                                                           513L, 689L, 990L, 935L, 1368L, 1196L), tags = c(426L, 220L, 91L,
                                                                                                                                                           99L, 34L, 36L, 153L, 250L, 475L, 302L), X.10.log10.pvalue. = c(1115.24,
                                                                                                                                                                                                                          73.32, 256.5, 72.1, 58.18, 56.09, 60.68, 553.67, 3100, 360.59
                                                                                                                                                           ), fold_enrichment = c(7.7, 4.89, 6.59, 4.5, 5.83, 4.89, 4.72,
                                                                                                                                                                                  8.27, 61.16, 9.33), FDR... = c(0, 3.83, 0.96, 3.9, 6.71, 7.41,
                                                                                                                                                                                                                 6.21, 0, 0, 0)), .Names = c("length", "summit", "tags", "X.10.log10.pvalue.",
                                                                                                                                                                                                                                             "fold_enrichment", "FDR..."))
                            , elementType = "ANY"
                            , elementMetadata = NULL
                            , metadata = list()
    )
    , seqinfo = new("Seqinfo"
                    , seqnames = c("chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8",
                                   "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15",
                                   "chr16", "chr17", "chr18", "chr19", "chr20", "chr21", "chr22",
                                   "chrX", "chrY")
                    , seqlengths = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
                                     NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
                                     NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
                                     NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
                                     NA_integer_, NA_integer_, NA_integer_, NA_integer_)
                    , is_circular = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
                                      NA, NA, NA, NA, NA, NA, NA, NA, NA)
                    , genome = c(NA_character_, NA_character_, NA_character_, NA_character_,
                                 NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
                                 NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
                                 NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
                                 NA_character_, NA_character_, NA_character_, NA_character_, NA_character_
                    )
    )
    , metadata = list()
)

x <- import("wgEncodeSydhTfbsHelas3Rad21IggrabSig.bw", which = test_gr)

## Error in .local(con, format, text, ...) :
 ## 'which' contains sequence names not known to BigWig file: chrY

sessionInfo()

R version 3.2.1 (2015-06-18)
Platform: x86_64-unknown-linux-gnu (64-bit)
Running under: Debian GNU/Linux stretch/sid

locale:
 [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
 [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
 [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats4    parallel  stats     graphics  grDevices utils     datasets
[8] methods   base     

other attached packages:
[1] rtracklayer_1.28.5   GenomicRanges_1.20.5 GenomeInfoDb_1.4.1  
[4] IRanges_2.2.5        S4Vectors_0.6.1      BiocGenerics_0.14.0

loaded via a namespace (and not attached):
 [1] XML_3.98-1.2            Rsamtools_1.20.4        Biostrings_2.36.1      
 [4] bitops_1.0-6            GenomicAlignments_1.4.1 futile.options_1.0.0   
 [7] zlibbioc_1.14.0         XVector_0.8.0           futile.logger_1.4.1    
[10] lambda.r_1.1.7          BiocParallel_1.2.6      tools_3.2.1            
[13] RCurl_1.95-4.6

rtracklayer • 1.6k views

ADD COMMENT • link updated 9.4 years ago by Michael Lawrence ★ 11k • written 9.4 years ago by liz.ingsimmons ▴ 140

score 1 · Accepted Answer · 2015-08-01

1

Entering edit mode

Michael Lawrence ★ 11k

@michael-lawrence-3846

Last seen 3.0 years ago

United States

Ok, I changed it to a warning in devel.

ADD COMMENT • link 9.4 years ago Michael Lawrence ★ 11k

0

Entering edit mode

Great, thank you!

ADD REPLY • link 9.4 years ago liz.ingsimmons ▴ 140