Hi,
I'd like to use SomaticSignatures to analysis the mutation patten of our samples. I split the vcf file into multiple files and run the following code.
############################################################################################################
library("SomaticSignatures")
library("BSgenome.Hsapiens.UCSC.hg19")
library("ggdendro")
library('ggplot2')
vr_g6 = readVcfAsVRanges('~/prostate/prostate.gdac.g6.vcf', "hg19")
vr_g6 = mutationContext(vr_g6, BSgenome.Hsapiens.UCSC.hg19)
vr_g6$study = c('TCGA_PRAD_G6')
vr_g7 = readVcfAsVRanges('~/prostate/prostate.gdac.g7.vcf', "hg19")
vr_g7 = mutationContext(vr_g7, BSgenome.Hsapiens.UCSC.hg19)
vr_g7$study = c('TCGA_PRAD_G7')
vr_g8 = readVcfAsVRanges('~/prostate/prostate.gdac.g8.vcf', "hg19")
vr_g8 = mutationContext(vr_g8, BSgenome.Hsapiens.UCSC.hg19)
vr_g8$study = c('TCGA_PRAD_G8')
vr_g9 = readVcfAsVRanges('~/prostate/prostate.gdac.g9.vcf', "hg19")
vr_g9 = mutationContext(vr_g9, BSgenome.Hsapiens.UCSC.hg19)
vr_g9$study = c('TCGA_PRAD_G9')
vr_g10 = readVcfAsVRanges('~/prostate/prostate.gdac.g10.vcf', "hg19")
vr_g10 = mutationContext(vr_g10, BSgenome.Hsapiens.UCSC.hg19)
vr_g10$study = c('TCGA_PRAD_G10')
########## Single Cell Genome Sequencing ##########
sc_g3 = readVcfAsVRanges('~/prostate/prostate.single_cell.g3.vcf', "hg19")
sc_g3 = mutationContext(sc_g3, BSgenome.Hsapiens.UCSC.hg19)
sc_g3$study = c('Prostate_G3')
sc_g4 = readVcfAsVRanges('~/prostate/prostate.single_cell.g4.vcf', "hg19")
sc_g4 = mutationContext(sc_g4, BSgenome.Hsapiens.UCSC.hg19)
sc_g4$study = c('Prostate_G4')
vr = c(vr_g6, vr_g7, vr_g8, vr_g9, vr_g10, sc_g3, sc_g4)
vr_mm = motifMatrix(vr, group = "study", normalize = TRUE)
pdf('mutation_spectrum.pdf')
plotMutationSpectrum(vr, "study")
dev.off()
n_sigs = 5
# n_sigs = 2:5
sigs_nmf = identifySignatures(vr_mm, n_sigs, nmfDecomposition)
sigs_pca = identifySignatures(vr_mm, n_sigs, pcaDecomposition)
pdf('fitted_spectrum_nmf.pdf')
plotFittedSpectrum(sigs_nmf)
dev.off()
pdf('sample_map_nmf.pdf')
plotSampleMap(sigs_nmf)
dev.off()
pdf('samples_nmf.pdf')
plotSamples(sigs_nmf)
dev.off()
pdf('fitted_spectrum_pca.pdf')
plotFittedSpectrum(sigs_pca)
dev.off()
pdf('sample_map_pca.pdf')
plotSampleMap(sigs_pca)
dev.off()
pdf('samples_pca.pdf')
plotSamples(sigs_pca)
dev.off()
######################################################################
#####
##### assess the number of signatures
#####
######################################################################
# gof_nmf = assessNumberSignatures(vr_mm, n_sigs, nReplicates = 5)
# pdf('number_signature_nmf.pdf')
# plotNumberSignatures(gof_nmf)
# dev.off()
# gof_pca = assessNumberSignatures(vr_mm, n_sigs, pcaDecomposition)
# pdf('number_signature_pca.pdf')
# plotNumberSignatures(gof_pca)
# dev.off()
## Grouping by Motifs or Samples
clu_motif = clusterSpectrum(vr_mm, "motif")
p = ggdendrogram(clu_motif, rotate = FALSE, size=5)
ggsave('dendrogram.pdf', p)
######### Output ###############################################################################
Loading required package: VariantAnnotation
Loading required package: methods
Loading required package: BiocGenerics
Loading required package: parallel
Attaching package: ‘BiocGenerics’
The following objects are masked from ‘package:parallel’:
clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
clusterExport, clusterMap, parApply, parCapply, parLapply,
parLapplyLB, parRapply, parSapply, parSapplyLB
The following objects are masked from ‘package:stats’:
IQR, mad, sd, var, xtabs
The following objects are masked from ‘package:base’:
anyDuplicated, append, as.data.frame, cbind, colMeans, colnames,
colSums, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
grepl, intersect, is.unsorted, lapply, lengths, Map, mapply, match,
mget, order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
rbind, Reduce, rowMeans, rownames, rowSums, sapply, setdiff, sort,
table, tapply, union, unique, unsplit, which, which.max, which.min
Loading required package: GenomeInfoDb
Loading required package: S4Vectors
Loading required package: stats4
Attaching package: ‘S4Vectors’
The following object is masked from ‘package:base’:
expand.grid
Loading required package: IRanges
Loading required package: GenomicRanges
Loading required package: SummarizedExperiment
Loading required package: Biobase
Welcome to Bioconductor
Vignettes contain introductory material; view with
'browseVignettes()'. To cite Bioconductor, see
'citation("Biobase")', and for packages 'citation("pkgname")'.
Loading required package: DelayedArray
Loading required package: matrixStats
Attaching package: ‘matrixStats’
The following objects are masked from ‘package:Biobase’:
anyMissing, rowMedians
Attaching package: ‘DelayedArray’
The following objects are masked from ‘package:matrixStats’:
colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
The following object is masked from ‘package:base’:
apply
Loading required package: Rsamtools
Loading required package: Biostrings
Loading required package: XVector
Attaching package: ‘Biostrings’
The following object is masked from ‘package:DelayedArray’:
type
The following object is masked from ‘package:base’:
strsplit
Attaching package: ‘VariantAnnotation’
The following object is masked from ‘package:base’:
tabulate
Loading required package: NMF
Loading required package: pkgmaker
Loading required package: registry
Attaching package: ‘pkgmaker’
The following object is masked from ‘package:S4Vectors’:
new2
The following object is masked from ‘package:base’:
isNamespaceLoaded
Loading required package: rngtools
Loading required package: cluster
NMF - BioConductor layer [OK] | Shared memory capabilities [OK] | Cores 23/24
Attaching package: ‘NMF’
The following object is masked from ‘package:DelayedArray’:
seed
The following object is masked from ‘package:S4Vectors’:
nrun
No methods found in "RSQLite" for requests: dbGetQuery
Loading required package: BSgenome
Loading required package: rtracklayer
Warning message:
In VRanges(seqnames, ranges, ref, alt, totalDepth, refDepth, altDepth, :
'refDepth' + 'altDepth' exceeds 'totalDepth'; using GATK?
Warning message:
In VRanges(seqnames, ranges, ref, alt, totalDepth, refDepth, altDepth, :
'refDepth' + 'altDepth' exceeds 'totalDepth'; using GATK?
Warning message:
In VRanges(seqnames, ranges, ref, alt, totalDepth, refDepth, altDepth, :
'refDepth' + 'altDepth' exceeds 'totalDepth'; using GATK?
Warning message:
In VRanges(seqnames, ranges, ref, alt, totalDepth, refDepth, altDepth, :
'refDepth' + 'altDepth' exceeds 'totalDepth'; using GATK?
Warning message:
In VRanges(seqnames, ranges, ref, alt, totalDepth, refDepth, altDepth, :
'refDepth' + 'altDepth' exceeds 'totalDepth'; using GATK?
Warning message:
In VRanges(seqnames, ranges, ref, alt, totalDepth, refDepth, altDepth, :
'refDepth' + 'altDepth' exceeds 'totalDepth'; using GATK?
Warning message:
In VRanges(seqnames, ranges, ref, alt, totalDepth, refDepth, altDepth, :
'refDepth' + 'altDepth' exceeds 'totalDepth'; using GATK?
Error in .Method(..., deparse.level = deparse.level) :
number of columns for arg 6 do not match those of first arg
Calls: c ... <Anonymous> -> standardGeneric -> eval -> eval -> eval -> .Method
Execution halted