Store output of matchPWM()
2
0
Entering edit mode
mat149 ▴ 80
@mat149-11450
Last seen 2 days ago
United States

I'm having trouble finding a solution regarding the storage of output from matchPWM(). I'd like to store the output in xlsx/csv/txt file format. Trying to convert to dataframe after unlist() returns the error 'unimplemented type 'list' in 'encodeelement'

pwm<-PWM(adc,type="prob",prior.params = c(A=0.25,C=0.25,G=0.25,T=0.25))
pwm
          -4          -3           -2         -1           BP         +1
A 0.05377283 0.005631321 -0.017774842 0.01002784  0.326073962 0.04033945
C 0.04510563 0.115371305  0.022894940 0.11125792 -0.021888223 0.09707738
G 0.02962736 0.016846775 -0.006567253 0.03466881 -0.058405298 0.03089362
T 0.11013323 0.100789643  0.240086200 0.08268447 -0.007141397 0.07032860



i<-makeGRangesFromDataFrame(intsd)
mcols(i)[1]<-intsd$GID
mcols(i)[2]<-DNAStringSet(intsd$FASTA)
ir<-resize(i,width=29,fix="end")
irs<-getSeq(genome,ir)

irs
DNAStringSet object of length 20734:
        width seq                                           names               
    [1]    29 TGGACGTTCCTGACTGTCACGATAGATAG                 VEintron1
    [2]    29 ACTTGGCCATGTGCTAACAACTTCCATAG                 VEintron2
    [3]    29 TTTCTTTACTTCCTAATTTGTCTTAATAG                 VEintron3
    [4]    29 AGCGTTACTAATGAATTGTTTTTCCACAG                 VEintron4
    [5]    29 TGATTGTGATGGAACTGATGGCAGAACAG                 VEintron5
    ...   ... ...
[20730]    29 TGAGCAGTATTGAAGGCTGATCATTGCAG                 VEintron20730
[20731]    29 TGGAAGCGTCTAGAGCTAATCTCCCTTAG                 VEintron20731
[20732]    29 TGGAAGAGTTTAAAGCTAATTTGCCTTAG                 VEintron20732
[20733]    29 ATCCCTCCGTACATTTGACTGTTAGAAAG                 VEintron20733
[20734]    29 CGGCCGAAAACTCACAAACACCTGTTCAG                 VEintron20734


hitlist<-lapply(irs, FUN = matchPWM, min.score="90%", with.score=TRUE, pwm = pwm)
hitlist
$VEintron20730
Views on a 29-letter DNAString subject
subject: TGAGCAGTATTGAAGGCTGATCATTGCAG
views: NONE

$VEintron20731
Views on a 29-letter DNAString subject
subject: TGGAAGCGTCTAGAGCTAATCTCCCTTAG
views: NONE

$VEintron20732
Views on a 29-letter DNAString subject
subject: TGGAAGAGTTTAAAGCTAATTTGCCTTAG
views: NONE

$VEintron20733
Views on a 29-letter DNAString subject
subject: ATCCCTCCGTACATTTGACTGTTAGAAAG
views:
      start end width
  [1]    14  19     6 [TTTGAC]

$VEintron20734
Views on a 29-letter DNAString subject
subject: CGGCCGAAAACTCACAAACACCTGTTCAG
views:
      start end width
  [1]    10  15     6 [ACTCAC]


adf<-as.data.frame(do.call(cbind, hitlist)) 
Warning message:
In cbind(...) :
  number of rows of result is not a multiple of vector length (arg 14)

  VEintron20712 VEintron20713 VEintron20714 VEintron20722 VEintron20729
1        GCTCAC        TTTCAC        TTTCAC        TCTGAC        TTTCAA
2        GCTCAC        TTTCAC        TTTCAC        TCTGAC        TTTCAA
3        GCTCAC        TTTCAC        TTTCAC        TCTGAC        TTTCAA
4        GCTCAC        TTTCAC        TTTCAC        TCTGAC        TTTCAA
5        GCTCAC        TTTCAC        TTTCAC        TCTGAC        TTTCAA


write.table(adf,file="adf.txt")
Error in write.table(adf, file = "adf.txt") : 
  unimplemented type 'list' in 'EncodeElement'
biostrings Biostrings • 605 views
ADD COMMENT
1
Entering edit mode
mat149 ▴ 80
@mat149-11450
Last seen 2 days ago
United States

hitlist<-lapply(irs, FUN = matchPWM, min.score="90%", with.score=TRUE, pwm = pwm)
ot <- endoapply(hitlist, function(x) as(x, "IRanges"))
to<-SplitDataFrameList(ot)

SplitDataFrameList of length 20734

$VEintron1 DataFrame with 1 row and 1 column X <IRanges> 1 10-14

$VEintron2 DataFrame with 0 rows and 1 column

$VEintron3 DataFrame with 2 rows and 1 column X <IRanges> 1 5-9 2 22-26

...

<20731 more elements>
ADD COMMENT
1
Entering edit mode
@james-w-macdonald-5106
Last seen 2 days ago
United States

Something like this?

ind <- as.logical(sapply(hitlist, length))
out <- do.call(c, lapply(hitlist, function(x) as(x, "IRanges")))
mcols(out)$seq <- do.call(c, lapply(hitlist, as.character))
mcols(out)$name <- names(hitlist)[ind]
write.table(as(out, "data.frame"), <name goes here>)
0
Entering edit mode

out <- do.call(c, lapply(hitlist, function(x) as(x, "IRanges")))

Error in do.call(c, lapply(hitlist, function(x) as(x, "IRanges"))) : 'what' must be a function or character string

do.call does not like 'c'/concatenate for the 'what' argument. any other suggestions?

ADD REPLY
1
Entering edit mode

Here's a self contained reproducible example, using part of the example in ?matchPWM

> library(Biostrings)
> data(HNF4alpha)
> library(BSgenome.Dmelanogaster.UCSC.dm3)
> chr3R <- Dmelanogaster$chr3R
> pfm <- consensusMatrix(HNF4alpha)
> pwm <- PWM(pfm)  

> fakeirs <- DNAStringSet(chr3R, seq(1, 5e5, 1000), width = 1000)
> hitlst <- lapply(fakeirs, matchPWM, min.score = "90%", pwm = pwm)
> out <- do.call(c, lapply(hitlst, function(x) as(x, "IRanges")))
> out
IRanges object with 13 ranges and 0 metadata columns:
           start       end     width
       <integer> <integer> <integer>
   [1]       767       779        13
   [2]       603       615        13
   [3]       429       441        13
   [4]       515       527        13
   [5]       141       153        13
   ...       ...       ...       ...
   [9]       226       238        13
  [10]       549       561        13
  [11]       481       493        13
  [12]       502       514        13
  [13]       750       762        13
ADD REPLY

Login before adding your answer.

Traffic: 438 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6