Entering edit mode
Hello, I am trying to search for the methyltransferase sequence in a set of complete prokaryotic genomes (ex: L. fermentum; L. plantarum ). However, I can't find any results. The question would be, how to change the code to find some nucleotide sequence similar to the methyltransferase?
library(Biostrings)
library(ape)
genbank_ids <- c(
"NZ_CP124737.1", "NZ_CP010413.1", "NZ_CP022988.1", "NC_021181.2",
"NZ_CP014170.1", "NZ_CP021061.1", "NZ_CP026038.1", "NZ_CP040780.1",
"NZ_CP028221.1", "NZ_CP038996.1", "NZ_AP012544.1", "NZ_CP059276.1"
)
seq_bche<-read.GenBank(genbank_ids)
seq_bche
metiltransferase <- DNAString("GATCCGTTCGCACAGCATCGCCCGCTTGATGATAAACGCTACGCGCTGGATCATTTCCAGACTAAGTTGCTGAAGCTACCGCAAACCATGCAAACCGCAAGAGGCAAGCAGCTGGCGCAGCACAACGCGCATTTTTTAGTCGAGTTTATGGCGAAGCTCAGTGCCGAACTGGCGGGGGAGAATGAAGGTGTTGATCACAAGGTGATAGATGCGTTTTCATCCGCTGGCTGAGCGCGTGGCCCTAAATGGCTGTAATTATGTTAACCTGTCGGCCATCTCAGATGGCCGGTGAAATCTATGCAGGAAAATATATCAGTAACCGATTCATACAGCACCGGGAATGCCGCACAGGCAATGCTGGAGAAACTGCTGCAAATTTATGATGTTAAAACGTTGGTGGCGCAGCTTAATGGTGTAGGTGAGAATCACTGGAGCGCGGCAATTTTAAAACGTGCGCTGGCGAATGACTCGGCATGGCACCGTTTAAGTGAGAAAGAGTTCGCCCATCTGCAAACGTTATTACCCAAACCACCGGCACATCATCCGCATTATGCGTTTCGCTTTATCGATCTATTCGCCGGAATTGGCGGCATCCGTCGCGGTTTTGAATCGATTGGCGGACAGTGCGTGTTTACCAGCGAATGGAACAAACATGCGGTACGCACTTATAAAGCCAACCATTATTGCGATCCGGCGACGCATCATTTTAATGAAGATATCCGCGACATCACCCTCAGCCATAAAGAAGGCGTGAGTGATGAGGCGGCGGCGGAACATATTCGTCAACACATTCCTGAACACGATGTTTTACTGGCCGGTTTCCCTTGTCAGCCATTTTCGCTGGCTGGCGTATCGAAAAAGAACTCGCTCGGGCGGGCGCACGGTTTTGCCTGCGATACCCAGGGCACGCTGTTTTTTGATGTGGTACGCATTATCGACGCGCGTCGTCCGGCGATGTTTGTGCTCGAAAACGTCAAAAACCTGAAAAGTCACGACCAGGGTAAAACGTTCCGCATCATCATGCAGACGCTGGACGAACTGGGCTATGACGTGGCTGATGCAGAAGATAATGGGCCAGACGATCCGAAAATCATCGACGGCAAACATTTTCTGCCGCAGCACCGTGAACGCATCGTGCTGGTGGGTTTTCGTCGCGATCTGAATCTGAAAGCCGATTTTACCCTGCGTGATATCAGCGAATGTTTCCCTGCGCAGCGAGTGACGCTGGCGCAGCTGTTGGACCCGATGGTCGAGGCGAAATATATCCTGACGCCGGTGCTGTGGAAGTACCTCTATCGATATGCGAAAAAACATCAGGCGCGCGGTAACGGCTTCGGTTATGGAATGGTTTATCCGAACAATCCGCAAAGCGTCACGCGTACGCTGTCTGCGCGTTATTACAAAGATGGCGCGGAAATTTTAATCGATCGCGGCTGGGATATGGCCACGGGTGAGAAAGACTTTGACGATCCGCTGAATCAGCAACATCGTCCACGTCGGTTAACGCCTCGGGAATGCGCGCGCTTAATGGGTTTTGAAGCGCCGGGAGAAGCGAAATTCCGTATTCCGGTTTCGGACACTCAGGCCTATCGCCAGTTCGGTAACTCGGTGGTCGTGCCGGTCTTTGCCGCGGTGGCAAAACTGCTTGAGCCAAAAATCAAACAGGCGGTGGCGTTGCGTCAGCAAGAGGCACAACATGGCCGACGTTCACGATAAGGCCACTCGCAGCAAAAATATGCGCGCGATTGCCACGCGTGATACGGCGATAGAGAAGCGCCTCGCCAGTCTGTTAACCGGGCAGGGCCTGGCATTTCGCGTTCAGGACGCCAGTCTGCCCGGACGTCCGGATTTTGTCGTTGATGAATATCGCTGCGTGATATTTACCCATGGCTGCTTCTGGCATCATCATCACTGCTATCTGTTTAAAGTGCCTGCGACTCGAACCGAGTTCTGGCTGGAGAAGATAGGTAAAAATGTTGAGCGCGATCGCCGCGATATCAGTCGCTTGCAGGAACTCGGCTGGCGCGTATTGATTGTCTGGGAGTGCGCGTTACGTGGGCGCGAGAAGCTGACGGATGAAGCGCTTACCGAGCGTCTGGAAGAGTGGATCTGCGGCGAAGGTGCCAGCGCGCAGATCGACACGCAGGGGATTCATTTACTCGCTTGATGCGTCCTGAATAACTGGCGCAACTACGGGTTTTGCCGGGAAGAGATATTTTCCCAACGTGACCAGTACCACCGCGAAGACAATTACGCCGAGCGCCAGCCATTCAATCTTCGACAGTGTTTCTCCACCCAGTCCCGTACCCAGCAAGACCGCGACCACCGGGTTAACGTAAGCGTAGCTGGTGGCGAGAGCCGGACTGACATTACGGATTAAATACATATAAGCGTTGATGGCGATAATCGAACCAAACAGCGCCAGATAGCCGACCGCAAGGAAGCCTGAAAGGGAAGGGAGCGCCGTCAGTTTTTCACCCGCAATCATCGACGCGATCATTAACACCACGCCTGCCGCCAGCATCTCAATCGCACCCGCCATCATCCCTACAGGTAAGGTAATGCGCGAGCCATAAACTGAGCCAAACGCCCAGCTAATCGAGCCGATTAAAATCAGAATCGCGCCCCACGGATTGCCGCTTAAATTTCCACCGCTATTGAGCATGATGATTCCGGCAAGCCCAATGGCAATACCCACCCATTCCAGTTTGCGCGTTTTAATGCCAAACAGGCGGCTGAAGCACAGGGTAAAGAGGGGCACGGTTGCAACCACTACGGCGGCGATGCCGGAAGGAACATTTTGATGTTCGGCAACCGTCACCATGCCATTACCGACAGCCAGCAATAACAGGCCAATCAGCGCGGCATTGAGCAGCGGACGTAGCGGGGGGAGTTTGTGTCCGCGCAGTAGCAAAAATGCCAGCAATAAAATACCGGCTGCCAGGAATCGAACGCCCGCCATCATTAACGGAGGCCAGCTTTCCACGCCAATCCGAATGACAAAATAGGTTGAGCCCCAAATGATATACAACGCAAACAGCGCGCCAAAAAGCGGTAACAACTGGCGGAAACGCATAATCCCTCACGGTGGAAATAAAAAGGTGGTTCATAGTAAACGTGAAAATCATTCTGCTGGCGAGAGATATAATTGCACTTGATTGTTAAAAAAATGTTGACCTGTGAAGCAAGTATCAGAGCTGCCGTTTTTGCTTCATACTTACACCCTTCAACAATAAAAATGAGAGGGAATGCTTTTGGCCGGGAGTAGTTTACTGACGTTGCTCGATGATATCGCCACACTACTGGACGATATCTCCGTGATGGGCAAACTGGCGGCGAAGAAAACCGCCGGTGTATTAGGGGATGACTTATCGCTCAATGCGCAACAAGTTTCAGGCGTGCGGGCCAACCGGGAACTTCCCGTGGTCTGGGGCGTGGCGAAAGGATCGCTGATTAATAAAGTGATTCTGGTGCCGCTGGCGCTGATCATCAGTGCGTTTATCCCGTGGGCGATTACGCCTCTGTTGATGATTGGTGGCGCGTTTCTCTGCTTTGAAGGAGTAGAGAAAGTGCTGCATATGCTGGAGGCGCGTAAACATAAAGAAGATCCGGCGCAGAGCCAGCAGCGTCTGGAGAAGCTGGCGGCGCAGGATCC")
library(magrittr)
seq_bche_dnastring <-seq_bche %>%
as.character %>%
lapply(., paste0, collapse="") %>%
unlist %>%
DNAStringSet
matches <- vmatchPattern(metiltransferase, seq_bche_dnastring)
elementNROWS(matches)
matches
Here are the results
> matches <- vmatchPattern(metiltransferase, seq_bche_dnastring)
> elementNROWS(matches)
[1] 0 0 0 0 0 0 0 0 0 0 0 0
> matches
MIndex object of length 12
$NZ_CP124737.1
IRanges object with 0 ranges and 0 metadata columns:
start end width
<integer> <integer> <integer>
$NZ_CP010413.1
IRanges object with 0 ranges and 0 metadata columns:
start end width
<integer> <integer> <integer>
$NZ_CP022988.1
IRanges object with 0 ranges and 0 metadata columns:
start end width
<integer> <integer> <integer>
...
<9 more elements>