Entering edit mode
Hello,
I am getting NAs from TSSEscore()
in some of the 20 windows. They occur mostly in the windows before the TSS. Do you have any idea what may be causing this or how I could go about figuring out what is going on? I have 35 samples and they all show this effect. The input alignments were produced with bwa mem -t {threads} {bwa_idx} {input} | samblaster --addMateTags | samtools sort -m 6G -@ {threads} -O "BAM" -o {output.outbam} -
.
Thanks!
R version 4.1.0 (2021-05-18) -- "Camp Pontanezen"
Copyright (C) 2021 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
Natural language support but running in an English locale
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
> library(ATACseqQC)
Loading required package: BiocGenerics
Loading required package: parallel
Attaching package: ‘BiocGenerics’
The following objects are masked from ‘package:parallel’:
clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
clusterExport, clusterMap, parApply, parCapply, parLapply,
parLapplyLB, parRapply, parSapply, parSapplyLB
The following objects are masked from ‘package:stats’:
IQR, mad, sd, var, xtabs
The following objects are masked from ‘package:base’:
anyDuplicated, append, as.data.frame, basename, cbind, colnames,
dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
union, unique, unsplit, which.max, which.min
Loading required package: S4Vectors
Loading required package: stats4
Attaching package: ‘S4Vectors’
The following objects are masked from ‘package:base’:
expand.grid, I, unname
> library(stringr)
> library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:S4Vectors':
first, intersect, rename, setdiff, setequal, union
The following objects are masked from 'package:BiocGenerics':
combine, intersect, setdiff, union
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
> library(readr)
> library(GenomicFeatures)
Loading required package: IRanges
Attaching package: 'IRanges'
The following objects are masked from 'package:dplyr':
collapse, desc, slice
Loading required package: GenomeInfoDb
Loading required package: GenomicRanges
Loading required package: AnnotationDbi
Loading required package: Biobase
Welcome to Bioconductor
Vignettes contain introductory material; view with
'browseVignettes()'. To cite Bioconductor, see
'citation("Biobase")', and for packages 'citation("pkgname")'.
Attaching package: 'AnnotationDbi'
The following object is masked from 'package:dplyr':
select
> args <- c("./analysis/bwamem/626A.bam","bin/testatacseqqc/","TxDb.Mmusculus.UCSC.mm10.knownGene")
> bamfile <- args[1]
> known_genes_lib <- args[3]
> ## bamfile tags to be read in
> possibleTag <- list("integer"=c("AM", "AS", "CM", "CP", "FI", "H0", "H1", "H2",
+ "HI", "IH", "MQ", "NH", "NM", "OP", "PQ", "SM",
+ "TC", "UQ"),
+ "character"=c("BC", "BQ", "BZ", "CB", "CC", "CO", "CQ", "CR",
+ "CS", "CT", "CY", "E2", "FS", "LB", "MC", "MD",
+ "MI", "OA", "OC", "OQ", "OX", "PG", "PT", "PU",
+ "Q2", "QT", "QX", "R2", "RG", "RX", "SA", "TS",
+ "U2"))
> library(Rsamtools)
> bamTop100 <- scanBam(BamFile(bamfile, yieldSize = 100),
+ param = ScanBamParam(tag=unlist(possibleTag)))[[1]]$tag
> tags <- names(bamTop100)[lengths(bamTop100)>0]
> tags
integer2 integer11 integer13 character15 character16
"AS" "MQ" "NM" "MC" "MD"
> gal <- readBamFile(bamfile, asMates=FALSE, tag=tags, bigFile=TRUE)
> gal1 <- gal
> library(known_genes_lib, character.only=TRUE)
> txs <- transcripts(eval(as.name(known_genes_lib)))
> txs <- keepStandardChromosomes(txs, pruning.mode = 'coarse')
>
> tsse <- TSSEscore(gal1, txs)
> tsse
$values
1 2 3 4 5 6 7 8
NaN NaN NaN NaN NaN NaN NaN NaN
9 10 11 12 13 14 15 16
NaN NaN 22.102069 16.191962 11.392983 8.235327 5.965839 4.574932
17 18 19 20
3.511574 2.876810 2.425101 NaN
$TSSEscore
[1] 22.10207
> sessionInfo()
R version 4.1.0 (2021-05-18)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: CentOS Linux 7 (Core)
Matrix products: default
BLAS: /R/build-4.1.0/lib64/R/lib/libRblas.so
LAPACK: /R/build-4.1.0/lib64/R/lib/libRlapack.so
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] stats4 parallel stats graphics grDevices utils datasets
[8] methods base
other attached packages:
[1] TxDb.Mmusculus.UCSC.mm10.knownGene_3.10.0
[2] Rsamtools_2.8.0
[3] Biostrings_2.60.2
[4] XVector_0.32.0
[5] GenomicFeatures_1.44.1
[6] AnnotationDbi_1.54.1
[7] Biobase_2.52.0
[8] GenomicRanges_1.44.0
[9] GenomeInfoDb_1.28.1
[10] IRanges_2.26.0
[11] readr_2.0.1
[12] dplyr_1.0.7
[13] stringr_1.4.0
[14] ATACseqQC_1.16.0
[15] S4Vectors_0.30.0
[16] BiocGenerics_0.38.0
loaded via a namespace (and not attached):
[1] colorspace_2.0-2 rjson_0.2.20
[3] ellipsis_0.3.2 futile.logger_1.4.3
[5] ChIPpeakAnno_3.26.3 bit64_4.0.5
[7] interactiveDisplayBase_1.30.0 fansi_0.5.0
[9] xml2_1.3.2 motifStack_1.36.0
[11] splines_4.1.0 cachem_1.0.6
[13] ade4_1.7-17 polynom_1.4-0
[15] dbplyr_2.1.1 png_0.1-7
[17] graph_1.70.0 shiny_1.6.0
[19] HDF5Array_1.20.0 BiocManager_1.30.16
[21] compiler_4.1.0 httr_1.4.2
[23] assertthat_0.2.1 Matrix_1.3-3
[25] fastmap_1.1.0 lazyeval_0.2.2
[27] limma_3.48.3 later_1.3.0
[29] formatR_1.11 htmltools_0.5.1.1
[31] prettyunits_1.1.1 tools_4.1.0
[33] gtable_0.3.0 glue_1.4.2
[35] GenomeInfoDbData_1.2.6 rappdirs_0.3.3
[37] Rcpp_1.0.7 vctrs_0.3.8
[39] rhdf5filters_1.4.0 multtest_2.48.0
[41] rtracklayer_1.52.1 mime_0.11
[43] lifecycle_1.0.0 restfulr_0.0.13
[45] ensembldb_2.16.4 XML_3.99-0.7
[47] InteractionSet_1.20.0 AnnotationHub_3.0.1
[49] edgeR_3.34.0 zlibbioc_1.38.0
[51] MASS_7.3-54 scales_1.1.1
[53] BSgenome_1.60.0 promises_1.2.0.1
[55] hms_1.1.0 MatrixGenerics_1.4.2
[57] ProtGenerics_1.24.0 SummarizedExperiment_1.22.0
[59] RBGL_1.68.0 rhdf5_2.36.0
[61] AnnotationFilter_1.16.0 lambda.r_1.2.4
[63] yaml_2.2.1 curl_4.3.2
[65] memoise_2.0.0 ggplot2_3.3.5
[67] biomaRt_2.48.3 stringi_1.7.3
[69] RSQLite_2.2.8 BiocVersion_3.13.1
[71] BiocIO_1.2.0 randomForest_4.6-14
[73] filelock_1.0.2 BiocParallel_1.26.2
[75] rlang_0.4.11 pkgconfig_2.0.3
[77] matrixStats_0.60.1 bitops_1.0-7
[79] lattice_0.20-44 purrr_0.3.4
[81] Rhdf5lib_1.14.2 htmlwidgets_1.5.3
[83] GenomicAlignments_1.28.0 bit_4.0.4
[85] tidyselect_1.1.1 magrittr_2.0.1
[87] R6_2.5.1 generics_0.1.0
[89] DelayedArray_0.18.0 DBI_1.1.1
[91] preseqR_4.0.0 pillar_1.6.2
[93] survival_3.2-11 KEGGREST_1.32.0
[95] RCurl_1.98-1.4 tibble_3.1.3
[97] crayon_1.4.1 futile.options_1.0.1
[99] KernSmooth_2.23-20 utf8_1.2.2
[101] BiocFileCache_2.0.0 tzdb_0.1.2
[103] progress_1.2.2 locfit_1.5-9.4
[105] grid_4.1.0 blob_1.2.2
[107] GenomicScores_2.4.0 digest_0.6.27
[109] xtable_1.8-4 VennDiagram_1.6.20
[111] httpuv_1.6.2 regioneR_1.24.0
[113] munsell_0.5.0