Reading SMD datasets
1
0
Entering edit mode
@stanley-m-dunn-725
Last seen 10.2 years ago
I've been trying to learn R and Bioconductor and to read SMD datasets, but I'm having trouble getting it to work. I can't even get it to work with a small dataset (5912.xls) I'm hoping that someone can help me get past the following error: > read.SMD(fnames = NULL, path = "e:/", name.Gf = "Ch 1 Intensity (Mean)", + name.Gb = "Ch 1 Background (Mean)", name.Rf = "Ch 2 Intensity (Mean)", + name.Rb = "Ch 2 Background (Median)", name.W = NULL, layout = NULL, + gnames = NULL, targets = NULL,notes = NULL, skip = 21, sep = " ", quote = "",nmax = 24192) [1] "Reading e://5912.xls" Error in readLines(con, n, ok) : invalid value for `n' > I just don't know where to go next to get the dataset read in. As you can see, I tried to fix the number of records and should probably fix the line format ('what' parameter to scan), but I suspect it should be simpler than I'm making it out to be. Thanks, in advance, Stan -- Stanley M Dunn, Ph.D. Paul S and Mary W Monroe Faculty Scholar Associate Dean for Graduate Education and Research School of Engineering Rutgers University 98 Brett Road Piscataway, NJ 08854 (732) 445 - 4462 (732) 445 - 3224 (Center for Packaging) (732) 445 - 7067 (Fax) smd@occlusal.rutgers.edu
GO GO • 1.1k views
ADD COMMENT
0
Entering edit mode
@jean-yee-hwa-yang-104
Last seen 10.2 years ago
Hi Stanley, Colin Smith had written this update function for reading SMD files a little while ago. I am working towards putting them into marray at the moment, but you might like to try it first. Simply source("read.SMD2.R") into you R dictory and try your command again. test <- read.SMD2("5912.xls") If there is still problems. Please e-mail the file off-line and I will help you with it. Cheers Jean > > I've been trying to learn R and Bioconductor and to read SMD datasets, > but I'm having trouble getting it to work. I can't even get it to work > with a small dataset (5912.xls) > > I'm hoping that someone can help me get past the following error: > > > read.SMD(fnames = NULL, path = "e:/", name.Gf = "Ch 1 Intensity (Mean)", > + name.Gb = "Ch 1 Background (Mean)", name.Rf = "Ch 2 Intensity (Mean)", > + name.Rb = "Ch 2 Background (Median)", name.W = NULL, layout = NULL, > + gnames = NULL, targets = NULL,notes = NULL, skip = 21, sep = " ", > quote = "",nmax = 24192) > [1] "Reading e://5912.xls" > Error in readLines(con, n, ok) : invalid value for `n' > > > > I just don't know where to go next to get the dataset read in. As you > can see, I tried to fix the number of records and should probably fix > the line format ('what' parameter to scan), but I suspect it should be > simpler than I'm making it out to be. > > Thanks, in advance, > Stan > > -- > Stanley M Dunn, Ph.D. > Paul S and Mary W Monroe Faculty Scholar > Associate Dean for Graduate Education and Research > School of Engineering > Rutgers University > 98 Brett Road > Piscataway, NJ 08854 > > (732) 445 - 4462 > (732) 445 - 3224 (Center for Packaging) > (732) 445 - 7067 (Fax) > smd@occlusal.rutgers.edu > > _______________________________________________ > Bioconductor mailing list > Bioconductor@stat.math.ethz.ch > https://www.stat.math.ethz.ch/mailman/listinfo/bioconductor > -------------- next part -------------- read.SMD2 <- function(fnames = NULL, path = ".", name.Gf = "CH1I_MEAN", name.Gb = "CH1B_MEDIAN", name.Rf = "CH2I_MEAN", name.Rb = "CH2B_MEDIAN", name.W = NULL, layout = NULL, gnames = NULL, targets = NULL, notes = NULL, skip = 0, sep = "\t", quote = "", ...) { if (is.null(fnames)) fnames <- dir(path = path, pattern = paste("*", "xls", sep = ".")) if (is.null(path)) fullfnames <- fnames else fullfnames <- file.path(path, fnames) y <- readLines(fullfnames[1], n = 100) skip <- grepname.Gf, y)[1] - 1 smdTable <- NULL if (is.null(layout)) { cat("Generating layout from ", fnames[1], "\n", sep="") smdTable <- read.table(fullfnames[1], header=TRUE, sep="\t", quote = "", skip = skip, comment.char = "") numSectors <- max(smdTable$SECTOR) xsize <- max(smdTable$X.COORD) - min(smdTable$X.COORD) ysize <- max(smdTable$Y.COORD) - min(smdTable$Y.COORD) maNgr <- round(sqrt(numSectors*ysize/xsize)) maNgc <- round(sqrt(numSectors*xsize/ysize)) if is.na(maNgr)) { row <- grep("Exptid", y)[1] exptid <- strsplit(y[row], "=")[[1]][2] cat("Image: http://genome-www5.stanford.edu/cgi- bin/SMD/clickable.pl?exptid=", exptid, "\n", sep = "") options(warn = getOption("warn")-1) repeat { cat("Enter number of vertical sectors (", numSectors, " total sectors): ", sep = "") maNgr <- as.integer(readLines(n = 1)) if (!is.na(maNgr) && maNgr > 0 && maNgr < numSectors && numSectors/maNgr == as.integer(numSectors/maNgr)) break } options(warn = getOption("warn")+1) maNgc <- numSectors / maNgr } row <- grep("Rows per Sector", y)[1] maNsr <- as.integer(strsplit(y[row], "=")[[1]][2]) row <- grep("Columns per Sector", y)[1] maNsc <- as.integer(strsplit(y[row], "=")[[1]][2]) maNspots <- maNgr * maNgc * maNsr * maNsc maSub <- rep(FALSE, maNspots) maSub[smdTable$SPOT] <- TRUE row <- grep("Printname", y)[1] printname <- strsplit(y[row], "=")[[1]][2] row <- grep("Tip Configuration", y)[1] tipconfig <- strsplit(y[row], "=")[[1]][2] maNotes <- paste("Print Name: ", printname, "\nTip Configuration: ", tipconfig, sep = "") layout <- new("marrayLayout", maNgr = maNgr, maNgc = maNgc, maNsr = maNsr, maNsc = maNsc, maNspots = maNspots, maSub = maSub, maNotes = maNotes) } if (is.null(gnames)) { cat("Generating probe sequence info from ", fnames[1], "\n", sep="") if (is.null(smdTable)) smdTable <- read.table(fullfnames[1], header=TRUE, sep="\t", quote = "", skip = skip, comment.char = "") maLabels <- as.character(smdTable$SUID) cols <- 2:(match("SUID", colnames(smdTable))-1) maInfo <- smdTable[,cols] gnames <- new("marrayInfo", maLabels = maLabels, maInfo = maInfo) } if (is.null(targets)) { cat("Generating target sample info from all files\n") maLabels <- character(0) maInfo <- data.frame() for (i in 1:length(fnames)) { z <- readLines(fullfnames[i], n = skip) row <- grep("Exptid", z)[1] maLabels <- c(maLabels, strsplit(z[row], "=")[[1]][2]) row <- grep("Experiment Name", z)[1] Experiment <- strsplit(z[row], "=")[[1]][2] row <- grep("Channel 1 Description", z)[1] Cy3 <- strsplit(z[row], "=")[[1]][2] row <- grep("Channel 2 Description", z)[1] Cy5 <- strsplit(z[row], "=")[[1]][2] row <- grep("SlideName", z)[1] SlideName <- strsplit(z[row], "=")[[1]][2] maInfo <- rbind(maInfo, data.frame(Experiment = Experiment, Cy3 = Cy3, Cy5 = Cy5, SlideName = SlideName)) } rownames(maInfo) <- 1:dim(maInfo)[1] targets <- new("marrayInfo", maLabels = maLabels, maInfo = maInfo) } if (is.null(notes)) { cat("Generating notes from ", fnames[1], "\n", sep="") row <- grep("Organism", y)[1] organism <- strsplit(y[row], "=")[[1]][2] row <- grep("Category", y)[1] category <- strsplit(y[row], "=")[[1]][2] row <- grep("Subcategory", y)[1] subcategory <- strsplit(y[row], "=")[[1]][2] row <- grep("Description", y)[1] description <- strsplit(y[row], "=")[[1]][2] row <- grep("Experimenter", y)[1] experimenter <- strsplit(y[row], "=")[[1]][2] row <- grep("Contact email", y)[1] email <- strsplit(y[row], "=")[[1]][2] row <- grep("Scanning Software", y)[1] software <- strsplit(y[row], "=")[[1]][2] row <- grep("Software version", y)[1] version <- strsplit(y[row], "=")[[1]][2] row <- grep("Scanning parameters", y)[1] parameters <- strsplit(y[row], "=")[[1]] if (length(parameters) > 1) parameters <- paste(parameters[2:length(parameters)], collapse = ", ") else parameters <- NA notes <- paste("Organism: ", organism, "\nCategory: ", category, "\nSubcategory: ", subcategory, "\nDescription: ", description, "\nExperimenter: ", experimenter, "\nE-Mail: ", email, "\nScanning Software: ", software, " ", version, "\nScanning Parameters: ", parameters, sep = "") } mraw <- read.marrayRaw(fnames = fnames, path = path, name.Gf = name.Gf, name.Gb = name.Gb, name.Rf = name.Rf, name.Rb = name.Rb, name.W = name.W, layout = layout, gnames = gnames, targets = targets, notes = notes, skip = skip, sep = sep, quote = quote, ...) return(mraw) }
ADD COMMENT

Login before adding your answer.

Traffic: 704 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6