[MEDIPS] mergeFrames: `Error in rbind(output, cbind(chromosomes[i], new_start, new

[MEDIPS] mergeFrames: `Error in rbind(output, cbind(chromosomes[i], new_start, new_stop)) `

Entering edit mode

Pierre Lindenbaum ▴ 10

@pierre-lindenbaum-6329

Last seen 12 months ago

France

Hello, I'm trying to run the MEDIPS package with the following script (loop over each chromosomes, compare each conditions )

	source("http://bioconductor.org/biocLite.R")
	#biocLite("BSgenome")
	## GTOOLS AND GDATA INSTALLED FROM SOURCES `R CMD INSTALL archive.tar.gz`
	#biocLite("edgeR")
	#biocLite("DNAcopy")
	#biocLite("Rsamtools")
	#biocLite("rtracklayer")
	#biocLite("MEDIPS")
	#biocLite("BSgenome.Rnorvegicus.UCSC.rn5")

	library(MEDIPS)
	library(BSgenome.Rnorvegicus.UCSC.rn5)


	#genome <-
	genome <- "BSgenome.Rnorvegicus.UCSC.rn5"


	#MEDIPS will replace all reads which map to exactly the same start and end
	#positions on the same strand by only one representative:
	uniq=TRUE


	#All reads will be extended to a length of 300nt according to the given strand
	#information
	extend=300

	shift=0

	#The genome will be divided into adjacent windows of length 100nt and
	ws=50

	print(seqnames(BSgenome.Rnorvegicus.UCSC.rn5::Rnorvegicus))



	myCreateSet <- function(name,genome,chromosome)
	{
	return ( MEDIPS.createSet(
	file = paste("/commun/data/projects/20130607.AC1WR3ACXX.MEDIPSEQ/align/Samples/",name,"/BAM/",name,"_final.bam",sep=""),
	BSgenome = genome,
	extend = extend,
	shift = shift,
	uniq = uniq,
	window_size = ws,
	chr.select = chromosome
	))
	}


	myCreateSet2 <- function(names,genome,chromosome)
	{
	dataset <- NULL
	for(N in names)
	{
	if(is.null(dataset))
	{
	dataset <- myCreateSet(N,genome,chr.select)
	}
	else
	{
	dataset <- c( dataset , myCreateSet(N,genome,chr.select))
	}
	}
	return (dataset)
	}

	myMedipAnalysis<- function(dataSet1, dataSet2, name,genome,chromosome)
	{
	filename2 <- paste("medip.",name,".merge.",chromosome,".tsv",sep="")

	if( chromosome=="chrX")
	{
	return (0);
	}
	if( chromosome == "chr14" && ( name=="20L_vs_8H" \|\| name=="20L_vs_8L" \|\| name=="20H_vs_8L" \|\| name=="20H_vs_8H" \|\| name=="8H_vs_8L") )
	{
	return (0);
	}

	write(paste(">>>>> NOW2: ",filename2), stderr())

	if(file.exists(filename2)) return (0);

	CS <- MEDIPS.couplingVector(pattern = "CG", refObj = dataSet1[[1]])


	write(paste(">>>>> NOW12 ",filename2), stderr())

	is.medip <- TRUE
	if( chromosome == "chr14" && (name=="20H_vs_20L" \|\| name=="20H_vs_8H" \|\| name=="20H_vs_8L" \|\| name=="20L_vs_8H"))
	{
	is.medip <- FALSE
	}

	mr.edgeR = MEDIPS.meth(
	MSet1 = dataSet1,
	MSet2 = dataSet2,
	CSet = CS,
	p.adj = "bonferroni",
	diff.method = "edgeR",
	prob.method = "poisson",
	MeDIP = is.medip ,
	CNV = F, type = "rpkm",
	minRowSum = 1
	)

	write(paste(">>>>> NOW3: ",filename2), stderr())


	mr.edgeR.s = MEDIPS.selectSig(results = mr.edgeR, p.value = 0.1,adj = T, ratio = NULL, bg.counts = NULL, CNV = F)
	summary( mr.edgeR.s )

	write(paste(">>>>> NOW4: ",filename2), stderr())

	mr.edgeR.s.gain = mr.edgeR.s[which(mr.edgeR.s[, grep("logFC", colnames(mr.edgeR.s))] > 0), ]


	write(paste(">>>>> NOW5: ",filename2), stderr())

	summary( mr.edgeR.s.gain )
	write.table( mr.edgeR.s.gain , file=paste("medip.",name,".gain.",chromosome,".tsv",sep="") ,sep="\t",col.names=TRUE)

	summary(mr.edgeR.s.gain)

	if( nrow( mr.edgeR.s.gain) ==0 ) return (0)

	write(paste(">>>>> NOW6: ",filename2), stderr())

	mr.edgeR.s.gain.m = MEDIPS.mergeFrames(frames = mr.edgeR.s.gain, distance = 1)


	write.table( mr.edgeR.s.gain.m , file=filename2 ,sep="\t",col.names=TRUE)

	return (0)
	}

	chromosomes <- seqnames(BSgenome.Rnorvegicus.UCSC.rn5::Rnorvegicus);
	for(chr.select in chromosomes)
	{

	Set20H <- myCreateSet2(c("20H1-1","20H1-3","20H1-4","20H2-1","20H5-1","20H5-2"),genome,chr.select)
	Set20L <- myCreateSet2(c("20L1-1","20L1-2","20L3-1","20L3-2","20L6-2","20L7-1"),genome,chr.select)


	Set8H <- myCreateSet2(c("8H1-1","8H1-4","8H2-2","8H3-1","8H3-2","8H5-2"),genome,chr.select)
	Set8L <- myCreateSet2(c("8L1-2","8L3-4","8L6-1","8L6-2","8L8-1","8L8-4"),genome,chr.select)


	myMedipAnalysis(Set20H,Set20L,"20H_vs_20L",genome,chr.select)
	myMedipAnalysis(Set20H,Set8H,"20H_vs_8H",genome,chr.select)
	myMedipAnalysis(Set20H,Set8L,"20H_vs_8L",genome,chr.select)


	myMedipAnalysis(Set20L,Set8H,"20L_vs_8H",genome,chr.select)
	myMedipAnalysis(Set20L,Set8L,"20L_vs_8L",genome,chr.select)

	myMedipAnalysis(Set8H,Set8L,"8H_vs_8L",genome,chr.select)

	}

view raw medips.R hosted with ❤ by GitHub

everything works fine for the chr1 but at some point, the script raises an error for the chr2. (see below). Does anyone know what I've done wrong here ? Thank you Pierre (...) Preprocessing MEDIPS SET 6 in MSet2... Calculating calibration curve... Performing linear regression... Intercept: 1.23986548019807 Slope: 1.72956076992792 Calculating relative methylation score... Estimate methylation... Differential coverage analysis... Extracting count windows with at least 1 reads... Extracting non-zero coupling factor windows... Execute edgeR for count data of 1524540 windows... (Neglecting parameter 'type') Creating a DGEList object... Apply trimmed mean of M-values (TMM) for library sizes normalization... Estimating common dispersion... Estimating tagwise dispersion... Calculating differential coverage... Adjusting p.values for multiple testing... Please note, log2 ratios are reported as log2(MSet1/MSet2). Creating results table... Adding differential coverage results... Total number of windows: 5701362 Number of windows tested for differential methylation: 1524540 Remaining number of windows with adjusted p.value<=0.1: 0 Error in rbind(output, cbind(chromosomes[i], new_start, new_stop)) : number of columns of matrices must match (see arg 2) Calls: myMedipAnalysis -> MEDIPS.mergeFrames -> rbind Execution halted

Coverage Regression edgeR MEDIPS Coverage Regression edgeR MEDIPS • 1.8k views

ADD COMMENT • link updated 11.2 years ago by Matthias Lienhard ▴ 280 • written 11.2 years ago by Pierre Lindenbaum ▴ 10

Entering edit mode

Matthias Lienhard ▴ 280

@matthias-lienhard-6292

Last seen 15 months ago

Max Planck Institute for molecular Gene…

Hi Pierre, as you are looping over the chromosomes and your code works for chr1 and fails for chr2, my first guess would be that there are no regions for chr2. Can you check for that? Best, Matthias Am 15.01.2014 05:13, schrieb Pierre Lindenbaum: > Hello, I'm trying to run the MEDIPS package with the following script > (loop over each chromosomes, compare each conditions ) > > >

	source("http://bioconductor.org/biocLite.R")
	#biocLite("BSgenome")
	## GTOOLS AND GDATA INSTALLED FROM SOURCES `R CMD INSTALL archive.tar.gz`
	#biocLite("edgeR")
	#biocLite("DNAcopy")
	#biocLite("Rsamtools")
	#biocLite("rtracklayer")
	#biocLite("MEDIPS")
	#biocLite("BSgenome.Rnorvegicus.UCSC.rn5")

	library(MEDIPS)
	library(BSgenome.Rnorvegicus.UCSC.rn5)


	#genome <-
	genome <- "BSgenome.Rnorvegicus.UCSC.rn5"


	#MEDIPS will replace all reads which map to exactly the same start and end
	#positions on the same strand by only one representative:
	uniq=TRUE


	#All reads will be extended to a length of 300nt according to the given strand
	#information
	extend=300

	shift=0

	#The genome will be divided into adjacent windows of length 100nt and
	ws=50

	print(seqnames(BSgenome.Rnorvegicus.UCSC.rn5::Rnorvegicus))



	myCreateSet <- function(name,genome,chromosome)
	{
	return ( MEDIPS.createSet(
	file = paste("/commun/data/projects/20130607.AC1WR3ACXX.MEDIPSEQ/align/Samples/",name,"/BAM/",name,"_final.bam",sep=""),
	BSgenome = genome,
	extend = extend,
	shift = shift,
	uniq = uniq,
	window_size = ws,
	chr.select = chromosome
	))
	}


	myCreateSet2 <- function(names,genome,chromosome)
	{
	dataset <- NULL
	for(N in names)
	{
	if(is.null(dataset))
	{
	dataset <- myCreateSet(N,genome,chr.select)
	}
	else
	{
	dataset <- c( dataset , myCreateSet(N,genome,chr.select))
	}
	}
	return (dataset)
	}

	myMedipAnalysis<- function(dataSet1, dataSet2, name,genome,chromosome)
	{
	filename2 <- paste("medip.",name,".merge.",chromosome,".tsv",sep="")

	if( chromosome=="chrX")
	{
	return (0);
	}
	if( chromosome == "chr14" && ( name=="20L_vs_8H" \|\| name=="20L_vs_8L" \|\| name=="20H_vs_8L" \|\| name=="20H_vs_8H" \|\| name=="8H_vs_8L") )
	{
	return (0);
	}

	write(paste(">>>>> NOW2: ",filename2), stderr())

	if(file.exists(filename2)) return (0);

	CS <- MEDIPS.couplingVector(pattern = "CG", refObj = dataSet1[[1]])


	write(paste(">>>>> NOW12 ",filename2), stderr())

	is.medip <- TRUE
	if( chromosome == "chr14" && (name=="20H_vs_20L" \|\| name=="20H_vs_8H" \|\| name=="20H_vs_8L" \|\| name=="20L_vs_8H"))
	{
	is.medip <- FALSE
	}

	mr.edgeR = MEDIPS.meth(
	MSet1 = dataSet1,
	MSet2 = dataSet2,
	CSet = CS,
	p.adj = "bonferroni",
	diff.method = "edgeR",
	prob.method = "poisson",
	MeDIP = is.medip ,
	CNV = F, type = "rpkm",
	minRowSum = 1
	)

	write(paste(">>>>> NOW3: ",filename2), stderr())


	mr.edgeR.s = MEDIPS.selectSig(results = mr.edgeR, p.value = 0.1,adj = T, ratio = NULL, bg.counts = NULL, CNV = F)
	summary( mr.edgeR.s )

	write(paste(">>>>> NOW4: ",filename2), stderr())

	mr.edgeR.s.gain = mr.edgeR.s[which(mr.edgeR.s[, grep("logFC", colnames(mr.edgeR.s))] > 0), ]


	write(paste(">>>>> NOW5: ",filename2), stderr())

	summary( mr.edgeR.s.gain )
	write.table( mr.edgeR.s.gain , file=paste("medip.",name,".gain.",chromosome,".tsv",sep="") ,sep="\t",col.names=TRUE)

	summary(mr.edgeR.s.gain)

	if( nrow( mr.edgeR.s.gain) ==0 ) return (0)

	write(paste(">>>>> NOW6: ",filename2), stderr())

	mr.edgeR.s.gain.m = MEDIPS.mergeFrames(frames = mr.edgeR.s.gain, distance = 1)


	write.table( mr.edgeR.s.gain.m , file=filename2 ,sep="\t",col.names=TRUE)

	return (0)
	}

	chromosomes <- seqnames(BSgenome.Rnorvegicus.UCSC.rn5::Rnorvegicus);
	for(chr.select in chromosomes)
	{

	Set20H <- myCreateSet2(c("20H1-1","20H1-3","20H1-4","20H2-1","20H5-1","20H5-2"),genome,chr.select)
	Set20L <- myCreateSet2(c("20L1-1","20L1-2","20L3-1","20L3-2","20L6-2","20L7-1"),genome,chr.select)


	Set8H <- myCreateSet2(c("8H1-1","8H1-4","8H2-2","8H3-1","8H3-2","8H5-2"),genome,chr.select)
	Set8L <- myCreateSet2(c("8L1-2","8L3-4","8L6-1","8L6-2","8L8-1","8L8-4"),genome,chr.select)


	myMedipAnalysis(Set20H,Set20L,"20H_vs_20L",genome,chr.select)
	myMedipAnalysis(Set20H,Set8H,"20H_vs_8H",genome,chr.select)
	myMedipAnalysis(Set20H,Set8L,"20H_vs_8L",genome,chr.select)


	myMedipAnalysis(Set20L,Set8H,"20L_vs_8H",genome,chr.select)
	myMedipAnalysis(Set20L,Set8L,"20L_vs_8L",genome,chr.select)

	myMedipAnalysis(Set8H,Set8L,"8H_vs_8L",genome,chr.select)

	}

view raw medips.R hosted with ❤ by GitHub

> > everything works fine for the chr1 but at some point, the script > raises an error for the chr2. (see below). > > Does anyone know what I've done wrong here ? > > Thank you > > Pierre > > > (...) > Preprocessing MEDIPS SET 6 in MSet2... > Calculating calibration curve... > Performing linear regression... > Intercept: 1.23986548019807 > Slope: 1.72956076992792 > Calculating relative methylation score... > Estimate methylation... > Differential coverage analysis... > Extracting count windows with at least 1 reads... > Extracting non-zero coupling factor windows... > Execute edgeR for count data of 1524540 windows... > (Neglecting parameter 'type') > Creating a DGEList object... > Apply trimmed mean of M-values (TMM) for library sizes normalization... > Estimating common dispersion... > Estimating tagwise dispersion... > Calculating differential coverage... > Adjusting p.values for multiple testing... > Please note, log2 ratios are reported as log2(MSet1/MSet2). > Creating results table... > Adding differential coverage results... > Total number of windows: 5701362 > Number of windows tested for differential methylation: 1524540 > Remaining number of windows with adjusted p.value<=0.1: 0 > Error in rbind(output, cbind(chromosomes[i], new_start, new_stop)) : > number of columns of matrices must match (see arg 2) > Calls: myMedipAnalysis -> MEDIPS.mergeFrames -> rbind > Execution halted > > _______________________________________________ > Bioconductor mailing list > Bioconductor at r-project.org > https://stat.ethz.ch/mailman/listinfo/bioconductor > Search the archives: > http://news.gmane.org/gmane.science.biology.informatics.conductor

ADD COMMENT • link 11.2 years ago Matthias Lienhard ▴ 280