You should probably coerce to a GRanges first, then use liftOver()
. There is a bit of extra coercion you have to do first.
> library(pd.genomewidesnp.6)
> library(GenomicRanges)
> library(rtracklayer)
> con <- db(pd.genomewidesnp.6)
> snp <- dbGetQuery(con, "select man_fsetid, dbsnp_rs_id, chrom, physical_pos, strand from featureSet;")
## make chromosomes the same as a chain file
> snp$chrom <- paste0("chr", snp$chrom)
## subset out SNPs with missing location
> snp <- snp[!(is.na(snp$chrom) | is.na(snp$physical_pos)),]
## convert strand info to +,-,*
> snp$strand[is.na(snp$strand)] <- 2
> snp$strand <- snp$strand + 1
> snp$strand <- c("+", "-", "*")[snp$strand]
> head(snp)
man_fsetid dbsnp_rs_id chrom physical_pos strand
1 SNP_A-2131660 rs2887286 chr1 1156131 +
2 SNP_A-1967418 rs1496555 chr1 2234251 +
3 SNP_A-1969580 rs41477744 chr1 2329564 +
4 SNP_A-4263484 rs3890745 chr1 2553624 +
5 SNP_A-1978185 rs10492936 chr1 2936870 -
6 SNP_A-4264431 rs10489588 chr1 2951834 -
## convert to GRanges
> snpgr <- GRanges(snp[,3], IRanges(snp[,4],snp[,4]), snp$strand, ID = snp[,1], dbsnp = snp[,2])
## now use liftOver from rtracklayer, using the hg19ToHg18.over.chain from UCSC
> download.file("http://hgdownload.cse.ucsc.edu/gbdb/hg19/liftOver/hg19ToHg18.over.chain.gz", "hg19ToHg18.over.chain.gz")
trying URL 'http://hgdownload.cse.ucsc.edu/gbdb/hg19/liftOver/hg19ToHg18.over.chain.gz'
Content type 'application/x-gzip' length 225973 bytes (220 KB)
==================================================
downloaded 220 KB
## have to decompress
> system("gzip -d hg19ToHg18.over.chain.gz")
> chain <- import.chain("hg19ToHg18.over.chain")
> snpgr.18 <- unlist(liftOver(snpgr, chain))
Discarding unchained sequences: chrMT
> snpgr
GRanges object with 905422 ranges and 2 metadata columns:
seqnames ranges strand | ID dbsnp
<Rle> <IRanges> <Rle> | <character> <character>
[1] chr1 [1156131, 1156131] + | SNP_A-2131660 rs2887286
[2] chr1 [2234251, 2234251] + | SNP_A-1967418 rs1496555
[3] chr1 [2329564, 2329564] + | SNP_A-1969580 rs41477744
[4] chr1 [2553624, 2553624] + | SNP_A-4263484 rs3890745
[5] chr1 [2936870, 2936870] - | SNP_A-1978185 rs10492936
... ... ... ... ... ... ...
[905418] chrX [2462586, 2462586] - | SNP_A-8573804 rs5982546
[905419] chrX [2633624, 2633624] - | SNP_A-8573845 rs6567640
[905420] chrX [2589261, 2589261] - | SNP_A-8573915 rs7892606
[905421] chrX [2692710, 2692710] + | SNP_A-8573964 rs2259750
[905422] chrX [1812233, 1812233] + | SNP_A-8574011 rs7058531
-------
seqinfo: 25 sequences from an unspecified genome; no seqlengths
> snpgr.18
GRanges object with 905198 ranges and 2 metadata columns:
seqnames ranges strand | ID dbsnp
<Rle> <IRanges> <Rle> | <character> <character>
[1] chr1 [1145994, 1145994] + | SNP_A-2131660 rs2887286
[2] chr1 [2224111, 2224111] + | SNP_A-1967418 rs1496555
[3] chr1 [2319424, 2319424] + | SNP_A-1969580 rs41477744
[4] chr1 [2543484, 2543484] + | SNP_A-4263484 rs3890745
[5] chr1 [2926730, 2926730] - | SNP_A-1978185 rs10492936
... ... ... ... ... ... ...
[905194] chrX [2472586, 2472586] - | SNP_A-8573804 rs5982546
[905195] chrX [2643624, 2643624] - | SNP_A-8573845 rs6567640
[905196] chrX [2599261, 2599261] - | SNP_A-8573915 rs7892606
[905197] chrX [2702710, 2702710] + | SNP_A-8573964 rs2259750
[905198] chrX [1772233, 1772233] + | SNP_A-8574011 rs7058531
-------
seqinfo: 24 sequences from an unspecified genome; no seqlengths
>
Et voila!
Or maybe more useful
Excellent ! Thank you. Both answers would work for me.
I wish this example was there as a vignette for this package. Thanks again !
Excellent ! Thank you. Both answers would work for me.
I wish this example was there as a vignette for this package. Thanks again !