bluefuse export and limma read.maimages

0

Entering edit mode

Gregory Lefebvre ▴ 10

@gregory-lefebvre-1323

Last seen 9.6 years ago

Hi all, here are some hacks added to read.maimages() function in LIMMA package, in order to read BlueFuse export files. It works fine for me. Let me know. Greg ############################# # read bluefuse export files headers # ############################# my.readBlueFuseHeader <- function(file){ con <- file(file, "r") on.exit( close( con ) ) error <- 0 i<-0 # First line contains BlueFuse version i <- i+1 if( length( grep( "BlueFuse", readLines(con, n = 1) ) ) == 0 ){ cat( "Humm...I didnt find any BlueFuse expression in the fisrt line !\n" ) error <- error + 1 } # Second line is a blank line i <- i+1 if( ( nbsp <- readLines(con, n = 1) ) != "" ){ cat( "file format error founded in line 2 !\n" ) error <- error + 1 } if( error == 2 ) stop( "There is almost likely a problem with your file ; I am stopping now !" ) # From the Third line begin headers until a new blank line from which begin data out <- list() while( ( line <- readLines(con, n = 1) ) != "" ){ i<-i+1 txt <- strsplit( sub( "\"$", "", sub( "^\"", "", line ) ), split = ": ")[[1]] out[[i]] <- txt[2] names(out)[i] <- txt[1] } out$NHeaderRecords <- i + 1 out } ################ # bluefuse enabled # ################ my.read.maimages <- function (files, source = "spot", path = NULL, ext = NULL, names = NULL, columns = NULL, other.columns = NULL, annotation = NULL, wt.fun = NULL, verbose = TRUE, sep = "\t", quote = "\"", ...) { if (missing(files)) { if (missing(ext)) stop("Must specify input files") else { extregex <- paste("\\.", ext, "$", sep = "") files <- dir(path = ifelse(is.null(path), ".", path), pattern = extregex) files <- sub(extregex, "", files) } } ## modif ## source <- match.arg(source, c( "bluefuse", "agilent", "arrayvision", "genepix", "imagene", "quantarray", "smd.old", "smd", "spot", "spot.close.open")) if (source == "imagene") return(read.imagene(files = files, path = path, ext = ext, names = names, columns = columns, wt.fun = wt.fun, verbose = verbose, sep = sep, quote = quote, ...)) slides <- as.vector(as.character(files)) if (!is.null(ext)) slides <- paste(slides, ext, sep = ".") nslides <- length(slides) if (is.null(names)) names <- removeExt(files) if (is.null(columns)) columns <- switch(source, agilent = list(Gf = "gMeanSignal", Gb = "gBGMedianSignal", Rf = "rMeanSignal", Rb = "rBGMedianSignal"), smd.old = list(Gf = "CH1I_MEAN", Gb = "CH1B_MEDIAN", Rf = "CH2I_MEAN", Rb = "CH2B_MEDIAN"), smd = list(Gf = "Ch1 Intensity (Mean)", Gb = "Ch1 Background (Median)", Rf = "Ch2 Intensity (Mean)", Rb = "Ch2 Background (Median)"), spot = list(Rf = "Rmean", Gf = "Gmean", Rb = "morphR", Gb = "morphG"), spot.close.open = list(Rf = "Rmean", Gf = "Gmean", Rb = "morphR.close.open", Gb = "morphG.close.open"), genepix = list(Rf = "F635 Mean", Gf = "F532 Mean", Rb = "B635 Median", Gb = "B532 Median"), quantarray = list(Rf = "ch2 Intensity", Gf = "ch1 Intensity", Rb = "ch2 Background", Gb = "ch1 Background"), # modif # bluefuse = list( Rf="AMPCH2", Gf="AMPCH1", Rb="AMPCH2", Gb="AMPCH1" ) ) fullname <- slides[1] if (!is.null(path)) fullname <- file.path(path, fullname) switch(source, quantarray = { firstfield <- scan(fullname, what = "", sep = "\t", flush = TRUE, quiet = TRUE, blank.lines.skip = FALSE, multi.line = FALSE) skip <- grep("Begin Data", firstfield) if (length(skip) == 0) stop("Cannot find \"Begin Data\" in image output file") nspots <- grep("End Data", firstfield) - skip - 2 obj <- read.table(fullname, skip = skip, header = TRUE, sep = sep, quote = quote, as.is = TRUE, check.names = FALSE, comment.char = "", nrows = nspots, ...) }, arrayvision = { skip <- 1 cn <- scan(fullname, what = "", sep = sep, quote = quote, skip = 1, nlines = 1, quiet = TRUE) fg <- grep(" Dens - ", cn) if (length(fg) != 2) stop(paste("Cannot find foreground columns in", fullname)) bg <- grep("Bkgd", cn) if (length(fg) != 2) stop(paste("Cannot find background columns in", fullname)) columns <- list(Rf = fg[1], Rb = bg[1], Gf = fg[2], Gb = bg[2]) obj <- read.table(fullname, skip = skip, header = TRUE, sep = sep, quote = quote, as.is = TRUE, check.names = FALSE, comment.char = "", ...) nspots <- nrow(obj) }, genepix = { skip <- readGPRHeader(fullname)$NHeaderRecords obj <- read.table(fullname, skip = skip, header = TRUE, sep = sep, quote = quote, as.is = TRUE, check.names = FALSE, comment.char = "", fill = TRUE, ...) nspots <- nrow(obj) }, smd.old = { skip <- readSMDHeader(fullname)$NHeaderRecords obj <- read.table(fullname, skip = skip, header = TRUE, sep = sep, quote = quote, as.is = TRUE, check.names = FALSE, comment.char = "", fill = TRUE, ...) nspots <- nrow(obj) }, smd = { skip <- readSMDHeader(fullname)$NHeaderRecords obj <- read.table(fullname, skip = skip, header = TRUE, sep = sep, quote = quote, as.is = TRUE, check.names = FALSE, comment.char = "", fill = TRUE, ...) nspots <- nrow(obj) }, bluefuse = { # MODIF # skip <- my.readBlueFuseHeader( fullname )$NHeaderRecords obj <<- read.table(fullname, skip = skip, header = TRUE, sep = sep, quote = quote, as.is = TRUE, check.names = FALSE, comment.char = "", fill = TRUE, ...) nspots <- nrow(obj) },{ skip <- grep(protectMetachar(columns$Rf), readLines(fullname, n = 80)) - 1 if (length(skip) == 0) stop("Cannot find column heading in image output file") else skip <- skip[1] obj <- read.table(fullname, skip = skip, header = TRUE, sep = sep, quote = quote, as.is = TRUE, check.names = FALSE, comment.char = "", fill = TRUE, ...) nspots <- nrow(obj) }) Y <- matrix(0, nspots, nslides) colnames(Y) <- names RG <- list(R = Y, G = Y, Rb = Y, Gb = Y) if (!is.null(wt.fun)) RG$weights <- Y RG$targets <- data.frame(FileName = I(files), row.names = names) if (is.null(annotation)) annotation <- switch(source, agilent = c("Row", "Col", "Start", "Sequence", "SwissProt", "GenBank", "Primate", "GenPept", "ProbeUID", "ControlType", "ProbeName", "GeneName", "SystematicName", "Description"), genepix = c("Block", "Row", "Column", "ID", "Name"), smd = c("Spot", "Clone ID", "Gene Symbol", "Gene Name", "Cluster ID", "Accession", "Preferred name", "Locuslink ID", "Name", "Sequence Type", "X Grid Coordinate (within sector)", "Y Grid Coordinate (within sector)", "Sector", "Failed", "Plate Number", "Plate Row", "Plate Column", "Clone Source", "Is Verified", "Is Contaminated", "Luid"), smd.old = c("SPOT", "NAME", "Clone ID", "Gene Symbol", "Gene Name", "Cluster ID", "Accession", "Preferred name", "SUID"), # modif # bluefuse = c( "BLOCK", "ID", "SUBGRIDROW", "SUBGRIDCOL", "NAME", "FLAG" ), NULL) if (!is.null(annotation)) { j <- match(annotation, colnames(obj), 0) objGlob <<- obj # modif # if( source == "bluefuse" ) names(obj)[c(3,4,6,7,8,10)] <- c( "Row", "Column", "Block", "Name", "ID", "Flags") if (any(j > 0)) RG$genes <- data.frame(obj[, j, drop = FALSE]) } if (source == "agilent") { if (!is.null(RG$genes$Row) && !is.null(RG$genes$Col)) { nr <- length(unique(RG$genes$Row)) nc <- length(unique(RG$genes$Col)) if (nspots == nr * nc) RG$printer <- list(ngrid.r = 1, ngrid.c = 1, nspot.r = nr, nspot.c = nc) } } if (!is.null(other.columns)) { other.columns <- as.character(other.columns) j <- match(other.columns, colnames(obj), 0) if (any(j > 0)) { other.columns <- colnames(obj)[j] RG$other <- list() for (j in other.columns) RG$other[[j]] <- Y } else { other.columns <- NULL } } for (i in 1:nslides) { if (i > 1) { fullname <- slides[i] if (!is.null(path)) fullname <- file.path(path, fullname) if (source == "genepix") skip <- readGPRHeader(fullname)$NHeaderRecords # modif # if ( source == "bluefuse" ){ skip <- my.readBlueFuseHeader(fullname)$NHeaderRecords obj <- read.table(fullname, skip = skip, header = TRUE, sep = sep, as.is = TRUE, quote = quote, check.names = FALSE, comment.char = "", fill = TRUE, nrows = nspots, ...) names(obj)[c(3,4,6,7,8,10)] <- c( "Row", "Column", "Block", "Name", "ID", "Flags") } } RG$R[, i] <- obj[, columns$Rf] RG$G[, i] <- obj[, columns$Gf] RG$Rb[, i] <- obj[, columns$Rb] RG$Gb[, i] <- obj[, columns$Gb] if (!is.null(wt.fun)) RG$weights[, i] <- wt.fun(obj) if (!is.null(other.columns)) for (j in other.columns){ RG$other[[j]][, i] <- obj[, j] } if (verbose) cat(paste("Read", fullname, "\n")) } new("RGList", RG) }

Annotation Annotation • 1.2k views

ADD COMMENT • link updated 18.8 years ago by Steve Taylor ▴ 40 • written 18.8 years ago by Gregory Lefebvre ▴ 10

0

Entering edit mode

Steve Taylor ▴ 40

@steve-taylor-1168

Last seen 9.6 years ago

Hi Greg, That's great! Are there any plans to add this facility to limmaGUI? Kind Regards, Steve > > here are some hacks added to read.maimages() function in LIMMA package, > in order to read BlueFuse export files. > It works fine for me. > > Let me know. > Greg > > > ############################# > # read bluefuse export files headers # > ############################# > my.readBlueFuseHeader <- function(file){ > con <- file(file, "r") > on.exit( close( con ) ) > error <- 0 > i<-0 > > # First line contains BlueFuse version > i <- i+1 > if( length( grep( "BlueFuse", readLines(con, n = 1) ) ) == 0 ){ > cat( "Humm...I didnt find any BlueFuse expression in the fisrt > line !\n" ) > error <- error + 1 > } > > # Second line is a blank line > i <- i+1 > if( ( nbsp <- readLines(con, n = 1) ) != "" ){ > cat( "file format error founded in line 2 !\n" ) > error <- error + 1 > } > > if( error == 2 ) > stop( "There is almost likely a problem with your file ; I am > stopping now !" ) > > > # From the Third line begin headers until a new blank line from > which begin data > out <- list() > while( ( line <- readLines(con, n = 1) ) != "" ){ > i<-i+1 > txt <- strsplit( sub( "\"$", "", sub( "^\"", "", line ) ), split > = ": ")[[1]] > out[[i]] <- txt[2] > names(out)[i] <- txt[1] > } > out$NHeaderRecords <- i + 1 > out > } > > > ################ > # bluefuse enabled # > ################ > my.read.maimages <- function (files, source = "spot", path = NULL, ext = > NULL, names = NULL, > columns = NULL, other.columns = NULL, annotation = NULL, > wt.fun = NULL, verbose = TRUE, sep = "\t", quote = "\"", > ...) > { > if (missing(files)) { > if (missing(ext)) > stop("Must specify input files") > else { > extregex <- paste("\\.", ext, "$", sep = "") > files <- dir(path = ifelse(is.null(path), ".", path), > pattern = extregex) > files <- sub(extregex, "", files) > } > } > ## modif ## > source <- match.arg(source, c( "bluefuse", "agilent", "arrayvision", > "genepix", > "imagene", "quantarray", "smd.old", "smd", "spot", > "spot.close.open")) > if (source == "imagene") > return(read.imagene(files = files, path = path, ext = ext, > names = names, columns = columns, wt.fun = wt.fun, > verbose = verbose, sep = sep, quote = quote, ...)) > slides <- as.vector(as.character(files)) > if (!is.null(ext)) > slides <- paste(slides, ext, sep = ".") > nslides <- length(slides) > if (is.null(names)) > names <- removeExt(files) > if (is.null(columns)) > columns <- switch(source, > agilent = list(Gf = "gMeanSignal", Gb = "gBGMedianSignal", Rf > = "rMeanSignal", Rb = "rBGMedianSignal"), > > smd.old = list(Gf = "CH1I_MEAN", Gb = "CH1B_MEDIAN", Rf = > "CH2I_MEAN", Rb = "CH2B_MEDIAN"), > > smd = list(Gf = "Ch1 Intensity (Mean)", Gb = "Ch1 Background > (Median)", Rf = "Ch2 Intensity (Mean)", > Rb = "Ch2 Background (Median)"), > spot = list(Rf = "Rmean", Gf = "Gmean", Rb = "morphR", Gb = > "morphG"), > > spot.close.open = list(Rf = "Rmean", Gf = "Gmean", Rb = > "morphR.close.open", Gb = "morphG.close.open"), > > genepix = list(Rf = "F635 Mean", Gf = "F532 Mean", Rb = "B635 > Median", Gb = "B532 Median"), > > quantarray = list(Rf = "ch2 Intensity", Gf = "ch1 Intensity", Rb > = "ch2 Background", Gb = "ch1 Background"), > > # modif # > bluefuse = list( Rf="AMPCH2", Gf="AMPCH1", Rb="AMPCH2", > Gb="AMPCH1" ) > ) > > fullname <- slides[1] > if (!is.null(path)) > fullname <- file.path(path, fullname) > switch(source, quantarray = { > firstfield <- scan(fullname, what = "", sep = "\t", flush = TRUE, > quiet = TRUE, blank.lines.skip = FALSE, multi.line = FALSE) > skip <- grep("Begin Data", firstfield) > if (length(skip) == 0) > stop("Cannot find \"Begin Data\" in image output file") > nspots <- grep("End Data", firstfield) - skip - 2 > obj <- read.table(fullname, skip = skip, header = TRUE, > sep = sep, quote = quote, as.is = TRUE, check.names = FALSE, > comment.char = "", nrows = nspots, ...) > }, arrayvision = { > skip <- 1 > cn <- scan(fullname, what = "", sep = sep, quote = quote, > skip = 1, nlines = 1, quiet = TRUE) > fg <- grep(" Dens - ", cn) > if (length(fg) != 2) > stop(paste("Cannot find foreground columns in", fullname)) > bg <- grep("Bkgd", cn) > if (length(fg) != 2) > stop(paste("Cannot find background columns in", fullname)) > columns <- list(Rf = fg[1], Rb = bg[1], Gf = fg[2], Gb = bg[2]) > obj <- read.table(fullname, skip = skip, header = TRUE, > sep = sep, quote = quote, as.is = TRUE, check.names = FALSE, > comment.char = "", ...) > nspots <- nrow(obj) > }, genepix = { > skip <- readGPRHeader(fullname)$NHeaderRecords > obj <- read.table(fullname, skip = skip, header = TRUE, > sep = sep, quote = quote, as.is = TRUE, check.names = FALSE, > comment.char = "", fill = TRUE, ...) > nspots <- nrow(obj) > }, smd.old = { > skip <- readSMDHeader(fullname)$NHeaderRecords > obj <- read.table(fullname, skip = skip, header = TRUE, > sep = sep, quote = quote, as.is = TRUE, check.names = FALSE, > comment.char = "", fill = TRUE, ...) > nspots <- nrow(obj) > }, smd = { > skip <- readSMDHeader(fullname)$NHeaderRecords > obj <- read.table(fullname, skip = skip, header = TRUE, > sep = sep, quote = quote, as.is = TRUE, check.names = FALSE, > comment.char = "", fill = TRUE, ...) > nspots <- nrow(obj) > }, bluefuse = { > # MODIF # > skip <- my.readBlueFuseHeader( fullname )$NHeaderRecords > obj <<- read.table(fullname, skip = skip, header = TRUE, > sep = sep, quote = quote, as.is = TRUE, check.names = FALSE, > comment.char = "", fill = TRUE, ...) > nspots <- nrow(obj) > },{ > skip <- grep(protectMetachar(columns$Rf), readLines(fullname, > n = 80)) - 1 > if (length(skip) == 0) > stop("Cannot find column heading in image output file") > else skip <- skip[1] > obj <- read.table(fullname, skip = skip, header = TRUE, > sep = sep, quote = quote, as.is = TRUE, check.names = FALSE, > comment.char = "", fill = TRUE, ...) > nspots <- nrow(obj) > }) > Y <- matrix(0, nspots, nslides) > colnames(Y) <- names > RG <- list(R = Y, G = Y, Rb = Y, Gb = Y) > if (!is.null(wt.fun)) > RG$weights <- Y > RG$targets <- data.frame(FileName = I(files), row.names = names) > > if (is.null(annotation)) > annotation <- switch(source, > agilent = c("Row", "Col", > "Start", "Sequence", "SwissProt", "GenBank", "Primate", > "GenPept", "ProbeUID", "ControlType", "ProbeName", > "GeneName", "SystematicName", "Description"), > > genepix = c("Block", > "Row", "Column", "ID", "Name"), > > smd = c("Spot", "Clone ID", > "Gene Symbol", "Gene Name", "Cluster ID", "Accession", > "Preferred name", "Locuslink ID", "Name", "Sequence Type", > "X Grid Coordinate (within sector)", "Y Grid Coordinate > (within sector)", > "Sector", "Failed", "Plate Number", "Plate Row", > "Plate Column", "Clone Source", "Is Verified", "Is > Contaminated", > "Luid"), > > smd.old = c("SPOT", "NAME", "Clone ID", > "Gene Symbol", "Gene Name", "Cluster ID", "Accession", > "Preferred name", "SUID"), > # modif # > bluefuse = c( "BLOCK", "ID", "SUBGRIDROW", "SUBGRIDCOL", "NAME", > "FLAG" ), > > NULL) > > if (!is.null(annotation)) { > j <- match(annotation, colnames(obj), 0) > > objGlob <<- obj > > # modif # > if( source == "bluefuse" ) > names(obj)[c(3,4,6,7,8,10)] <- c( "Row", "Column", "Block", > "Name", "ID", "Flags") > > > if (any(j > 0)) > RG$genes <- data.frame(obj[, j, drop = FALSE]) > } > > if (source == "agilent") { > if (!is.null(RG$genes$Row) && !is.null(RG$genes$Col)) { > nr <- length(unique(RG$genes$Row)) > nc <- length(unique(RG$genes$Col)) > if (nspots == nr * nc) > RG$printer <- list(ngrid.r = 1, ngrid.c = 1, > nspot.r = nr, nspot.c = nc) > } > } > > > > if (!is.null(other.columns)) { > other.columns <- as.character(other.columns) > j <- match(other.columns, colnames(obj), 0) > if (any(j > 0)) { > other.columns <- colnames(obj)[j] > RG$other <- list() > for (j in other.columns) RG$other[[j]] <- Y > } > else { > other.columns <- NULL > } > } > > for (i in 1:nslides) { > > if (i > 1) { > fullname <- slides[i] > if (!is.null(path)) > fullname <- file.path(path, fullname) > if (source == "genepix") > skip <- readGPRHeader(fullname)$NHeaderRecords > > # modif # > if ( source == "bluefuse" ){ > skip <- my.readBlueFuseHeader(fullname)$NHeaderRecords > obj <- read.table(fullname, skip = skip, header = TRUE, > sep = sep, as.is = TRUE, quote = quote, > check.names = FALSE, > comment.char = "", fill = TRUE, nrows = nspots, > ...) > names(obj)[c(3,4,6,7,8,10)] <- c( "Row", "Column", > "Block", "Name", "ID", "Flags") > } > > } > > RG$R[, i] <- obj[, columns$Rf] > RG$G[, i] <- obj[, columns$Gf] > RG$Rb[, i] <- obj[, columns$Rb] > RG$Gb[, i] <- obj[, columns$Gb] > > if (!is.null(wt.fun)) > RG$weights[, i] <- wt.fun(obj) > > if (!is.null(other.columns)) > for (j in other.columns){ > RG$other[[j]][, i] <- obj[, j] > } > if (verbose) > cat(paste("Read", fullname, "\n")) > } > new("RGList", RG) > } > > _______________________________________________ > Bioconductor mailing list > Bioconductor at stat.math.ethz.ch > https://stat.ethz.ch/mailman/listinfo/bioconductor >

ADD COMMENT • link 18.8 years ago Steve Taylor ▴ 40

Login before adding your answer.