Rename read-chromosome-names to refseq-chromosome-names.
1
0
Entering edit mode
marcovth • 0
@marcovth-6828
Last seen 9.6 years ago
Canada

Hello ...

I have the following chromosome names in my hg19 reference sequence.

>    names(seqlengths(tx_by_gene))
 [1] "chr1"                  "chr2"                  "chr3"                  "chr4"                  "chr5"                  "chr6"                  "chr7"                  "chr8"                 
 [9] "chr9"                  "chr10"                 "chr11"                 "chr12"                 "chr13"                 "chr14"                 "chr15"                 "chr16"                
[17] "chr17"                 "chr18"                 "chr19"                 "chr20"                 "chr21"                 "chr22"                 "chrX"                  "chrY"                 
[25] "chrM"                  "chr1_gl000191_random"  "chr1_gl000192_random"  "chr4_ctg9_hap1"        "chr4_gl000193_random"  "chr4_gl000194_random"  "chr6_apd_hap1"         "chr6_cox_hap2"        
[33] "chr6_dbb_hap3"         "chr6_mann_hap4"        "chr6_mcf_hap5"         "chr6_qbl_hap6"         "chr6_ssto_hap7"        "chr7_gl000195_random"  "chr8_gl000196_random"  "chr8_gl000197_random" 
[41] "chr9_gl000198_random"  "chr9_gl000199_random"  "chr9_gl000200_random"  "chr9_gl000201_random"  "chr11_gl000202_random" "chr17_ctg5_hap1"       "chr17_gl000203_random" "chr17_gl000204_random"
[49] "chr17_gl000205_random" "chr17_gl000206_random" "chr18_gl000207_random" "chr19_gl000208_random" "chr19_gl000209_random" "chr21_gl000210_random" "chrUn_gl000211"        "chrUn_gl000212"       
[57] "chrUn_gl000213"        "chrUn_gl000214"        "chrUn_gl000215"        "chrUn_gl000216"        "chrUn_gl000217"        "chrUn_gl000218"        "chrUn_gl000219"        "chrUn_gl000220"       
[65] "chrUn_gl000221"        "chrUn_gl000222"        "chrUn_gl000223"        "chrUn_gl000224"        "chrUn_gl000225"        "chrUn_gl000226"        "chrUn_gl000227"        "chrUn_gl000228"       
[73] "chrUn_gl000229"        "chrUn_gl000230"        "chrUn_gl000231"        "chrUn_gl000232"        "chrUn_gl000233"        "chrUn_gl000234"        "chrUn_gl000235"        "chrUn_gl000236"       
[81] "chrUn_gl000237"        "chrUn_gl000238"        "chrUn_gl000239"        "chrUn_gl000240"        "chrUn_gl000241"        "chrUn_gl000242"        "chrUn_gl000243"        "chrUn_gl000244"       
[89] "chrUn_gl000245"        "chrUn_gl000246"        "chrUn_gl000247"        "chrUn_gl000248"        "chrUn_gl000249"       

 

And the following chromosome names in the reads ...

>    as.character(unique(rname(reads)))
 [1] "1"          "2"          "3"          "4"          "5"          "6"          "7"          "8"          "9"          "10"         "11"         "12"         "13"         "14"         "15"         "16"        
[17] "17"         "18"         "19"         "20"         "21"         "22"         "X"          "Y"          "MT"         "GL000229.1" "GL000231.1" "GL000210.1" "GL000239.1" "GL000235.1" "GL000201.1" "GL000247.1"
[33] "GL000197.1" "GL000249.1" "GL000196.1" "GL000248.1" "GL000244.1" "GL000238.1" "GL000232.1" "GL000240.1" "GL000236.1" "GL000241.1" "GL000243.1" "GL000242.1" "GL000237.1" "GL000233.1" "GL000204.1" "GL000198.1"
[49] "GL000191.1" "GL000227.1" "GL000228.1" "GL000214.1" "GL000221.1" "GL000218.1" "GL000220.1" "GL000213.1" "GL000211.1" "GL000199.1" "GL000217.1" "GL000216.1" "GL000215.1" "GL000205.1" "GL000219.1" "GL000224.1"
[65] "GL000223.1" "GL000195.1" "GL000212.1" "GL000222.1" "GL000200.1" "GL000193.1" "GL000194.1" "GL000225.1" "GL000192.1"

 

I am only interested in the full chromosome reads. How can I change e.g. 1 to chr1 in the chromosome 1 reads?

 

Thanks a lot for your help.

 

reads • 2.2k views
ADD COMMENT
0
Entering edit mode
@james-w-macdonald-5106
Last seen 3 hours ago
United States

It's not clear from your question what you are after. Choices are 1) how to change chromosome names to be compatible, and 2.) how to just use 'main' chromosomes. For 1:

> tx <- transcriptsBy(TxDb.Hsapiens.UCSC.hg19.knownGene, "gene")
> seqlevels(tx)
 [1] "chr1"                  "chr2"                  "chr3"                 
 [4] "chr4"                  "chr5"                  "chr6"                 
 [7] "chr7"                  "chr8"                  "chr9"                 
[10] "chr10"                 "chr11"                 "chr12"                
[13] "chr13"                 "chr14"                 "chr15"                
[16] "chr16"                 "chr17"                 "chr18"                
[19] "chr19"                 "chr20"                 "chr21"                
[22] "chr22"                 "chrX"                  "chrY"                 
[25] "chrM"                  "chr1_gl000191_random"  "chr1_gl000192_random"
[28] "chr4_ctg9_hap1"        "chr4_gl000193_random"  "chr4_gl000194_random"
[31] "chr6_apd_hap1"         "chr6_cox_hap2"         "chr6_dbb_hap3"        
[34] "chr6_mann_hap4"        "chr6_mcf_hap5"         "chr6_qbl_hap6"        
[37] "chr6_ssto_hap7"        "chr7_gl000195_random"  "chr8_gl000196_random"
[40] "chr8_gl000197_random"  "chr9_gl000198_random"  "chr9_gl000199_random"
[43] "chr9_gl000200_random"  "chr9_gl000201_random"  "chr11_gl000202_random"
[46] "chr17_ctg5_hap1"       "chr17_gl000203_random" "chr17_gl000204_random"
[49] "chr17_gl000205_random" "chr17_gl000206_random" "chr18_gl000207_random"
[52] "chr19_gl000208_random" "chr19_gl000209_random" "chr21_gl000210_random"
[55] "chrUn_gl000211"        "chrUn_gl000212"        "chrUn_gl000213"       
[58] "chrUn_gl000214"        "chrUn_gl000215"        "chrUn_gl000216"       
[61] "chrUn_gl000217"        "chrUn_gl000218"        "chrUn_gl000219"       
[64] "chrUn_gl000220"        "chrUn_gl000221"        "chrUn_gl000222"       
[67] "chrUn_gl000223"        "chrUn_gl000224"        "chrUn_gl000225"       
[70] "chrUn_gl000226"        "chrUn_gl000227"        "chrUn_gl000228"       
[73] "chrUn_gl000229"        "chrUn_gl000230"        "chrUn_gl000231"       
[76] "chrUn_gl000232"        "chrUn_gl000233"        "chrUn_gl000234"       
[79] "chrUn_gl000235"        "chrUn_gl000236"        "chrUn_gl000237"       
[82] "chrUn_gl000238"        "chrUn_gl000239"        "chrUn_gl000240"       
[85] "chrUn_gl000241"        "chrUn_gl000242"        "chrUn_gl000243"       
[88] "chrUn_gl000244"        "chrUn_gl000245"        "chrUn_gl000246"       
[91] "chrUn_gl000247"        "chrUn_gl000248"        "chrUn_gl000249"       
> seqlevelsStyle(tx) <- "NCBI"
> seqlevels(tx)
 [1] "1"                     "2"                     "3"                    
 [4] "4"                     "5"                     "6"                    
 [7] "7"                     "8"                     "9"                    
[10] "10"                    "11"                    "12"                   
[13] "13"                    "14"                    "15"                   
[16] "16"                    "17"                    "18"                   
[19] "19"                    "20"                    "21"                   
[22] "22"                    "X"                     "Y"                    
[25] "MT"                    "chr1_gl000191_random"  "chr1_gl000192_random"
[28] "chr4_ctg9_hap1"        "chr4_gl000193_random"  "chr4_gl000194_random"
[31] "chr6_apd_hap1"         "chr6_cox_hap2"         "chr6_dbb_hap3"        
[34] "chr6_mann_hap4"        "chr6_mcf_hap5"         "chr6_qbl_hap6"        
[37] "chr6_ssto_hap7"        "chr7_gl000195_random"  "chr8_gl000196_random"
[40] "chr8_gl000197_random"  "chr9_gl000198_random"  "chr9_gl000199_random"
[43] "chr9_gl000200_random"  "chr9_gl000201_random"  "chr11_gl000202_random"
[46] "chr17_ctg5_hap1"       "chr17_gl000203_random" "chr17_gl000204_random"
[49] "chr17_gl000205_random" "chr17_gl000206_random" "chr18_gl000207_random"
[52] "chr19_gl000208_random" "chr19_gl000209_random" "chr21_gl000210_random"
[55] "chrUn_gl000211"        "chrUn_gl000212"        "chrUn_gl000213"       
[58] "chrUn_gl000214"        "chrUn_gl000215"        "chrUn_gl000216"       
[61] "chrUn_gl000217"        "chrUn_gl000218"        "chrUn_gl000219"       
[64] "chrUn_gl000220"        "chrUn_gl000221"        "chrUn_gl000222"       
[67] "chrUn_gl000223"        "chrUn_gl000224"        "chrUn_gl000225"       
[70] "chrUn_gl000226"        "chrUn_gl000227"        "chrUn_gl000228"       
[73] "chrUn_gl000229"        "chrUn_gl000230"        "chrUn_gl000231"       
[76] "chrUn_gl000232"        "chrUn_gl000233"        "chrUn_gl000234"       
[79] "chrUn_gl000235"        "chrUn_gl000236"        "chrUn_gl000237"       
[82] "chrUn_gl000238"        "chrUn_gl000239"        "chrUn_gl000240"       
[85] "chrUn_gl000241"        "chrUn_gl000242"        "chrUn_gl000243"       
[88] "chrUn_gl000244"        "chrUn_gl000245"        "chrUn_gl000246"       
[91] "chrUn_gl000247"        "chrUn_gl000248"        "chrUn_gl000249"       

If you would rather, you can change your reads to use UCSC style seqlevels by

seqlevelsStyle(reads) <- "UCSC"

And for 2:

> txx <- keepStandardChromosomes(tx, "Homo_sapiens", "NCBI")
> seqlevels(txx)
 [1] "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10" "11" "12" "13" "14" "15"
[16] "16" "17" "18" "19" "20" "21" "22" "X"  "Y"  "MT"

You can subset your reads similarly.

Best,

Jim

 

ADD COMMENT

Login before adding your answer.

Traffic: 1031 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6