I am following Jeff Leek's tutorial on batch effects (http://jtleek.com/genstats/inst/doc/02_13_batch-effects.html) and I wanted to plot PC1 vs PC2 of the test data.
After applying svaseq, I see better separation between Cancer and Normal than what I see with the uncorrected data (Biopsy clusters with Normal). ComBat doesn't seem to improve clustering. Is this correct?
Thank you!
My code:
library(ggplot2)
library(devtools)
library(Biobase)
library(limma)
library(sva)
library(bladderbatch)
pca_any <- function(counts, colorby, label, name, size, scale){
pcax = prcomp(t( counts ), scale=scale)
pcvar = pcax$sdev^2/sum(pcax$sdev^2)*100
p = qplot(pcax$x[,1],pcax$x[,2], main=paste(name, ', scale=', scale, sep=''), colour=colorby,
xlab=paste("PCA 1: ", round(pcvar[1], digits=1), "% variance", sep=""),
xlim = c(min(pcax$x[,1])*2, max(pcax$x[,1])*1.2),
ylab=paste("PCA 2: ", round(pcvar[2], digits=1), "% variance", sep=""), geom="text", label=label) +
labs(colour='groups')
png(file=paste("pca-", name, ".png", sep=''), res=200, width=size, height=size)
print(p)
dev.off()
}
data(bladderdata)
pheno = pData(bladderEset)
edata = exprs(bladderEset)
pca_any(counts=edata, colorby=pheno$cancer, rownames(pheno), name='uncorrected', size=1200, scale=FALSE)
mod = model.matrix(~cancer,data=pheno)
mod0 = model.matrix(~1, data=pheno)
sva1 = sva(edata,mod,mod0,n.sv=2)
cov = cbind(sva1$sv[,1], sva1$sv[,2])
counts.fixed <- removeBatchEffect(edata, covariates = cov)
pca_any(counts=counts.fixed, colorby=pheno$cancer, rownames(pheno), name='svaseq-removeBatchEffect', size=1200, scale=FALSE)
mod = model.matrix(~1, data=pheno)
combat = ComBat(dat=edata, batch=pheno$batch, mod=mod, par.prior=TRUE, prior.plots = FALSE)
pca_any(counts=combat, colorby=pheno$cancer, rownames(pheno), name='combat', size=1200, scale=FALSE)



