Reading SMD datasets

0

Entering edit mode

Stanley M Dunn ▴ 10

@stanley-m-dunn-725

Last seen 11.5 years ago

I've been trying to learn R and Bioconductor and to read SMD datasets, but I'm having trouble getting it to work. I can't even get it to work with a small dataset (5912.xls) I'm hoping that someone can help me get past the following error: > read.SMD(fnames = NULL, path = "e:/", name.Gf = "Ch 1 Intensity (Mean)", + name.Gb = "Ch 1 Background (Mean)", name.Rf = "Ch 2 Intensity (Mean)", + name.Rb = "Ch 2 Background (Median)", name.W = NULL, layout = NULL, + gnames = NULL, targets = NULL,notes = NULL, skip = 21, sep = " ", quote = "",nmax = 24192) [1] "Reading e://5912.xls" Error in readLines(con, n, ok) : invalid value for `n' > I just don't know where to go next to get the dataset read in. As you can see, I tried to fix the number of records and should probably fix the line format ('what' parameter to scan), but I suspect it should be simpler than I'm making it out to be. Thanks, in advance, Stan -- Stanley M Dunn, Ph.D. Paul S and Mary W Monroe Faculty Scholar Associate Dean for Graduate Education and Research School of Engineering Rutgers University 98 Brett Road Piscataway, NJ 08854 (732) 445 - 4462 (732) 445 - 3224 (Center for Packaging) (732) 445 - 7067 (Fax) smd@occlusal.rutgers.edu

GO GO • 1.3k views

ADD COMMENT • link updated 21.9 years ago by Jean Yee Hwa Yang ▴ 920 • written 21.9 years ago by Stanley M Dunn ▴ 10

0

Entering edit mode

Jean Yee Hwa Yang ▴ 920

@jean-yee-hwa-yang-104

Last seen 11.5 years ago

Hi Stanley, Colin Smith had written this update function for reading SMD files a little while ago. I am working towards putting them into marray at the moment, but you might like to try it first. Simply source("read.SMD2.R") into you R dictory and try your command again. test <- read.SMD2("5912.xls") If there is still problems. Please e-mail the file off-line and I will help you with it. Cheers Jean > > I've been trying to learn R and Bioconductor and to read SMD datasets, > but I'm having trouble getting it to work. I can't even get it to work > with a small dataset (5912.xls) > > I'm hoping that someone can help me get past the following error: > > > read.SMD(fnames = NULL, path = "e:/", name.Gf = "Ch 1 Intensity (Mean)", > + name.Gb = "Ch 1 Background (Mean)", name.Rf = "Ch 2 Intensity (Mean)", > + name.Rb = "Ch 2 Background (Median)", name.W = NULL, layout = NULL, > + gnames = NULL, targets = NULL,notes = NULL, skip = 21, sep = " ", > quote = "",nmax = 24192) > [1] "Reading e://5912.xls" > Error in readLines(con, n, ok) : invalid value for `n' > > > > I just don't know where to go next to get the dataset read in. As you > can see, I tried to fix the number of records and should probably fix > the line format ('what' parameter to scan), but I suspect it should be > simpler than I'm making it out to be. > > Thanks, in advance, > Stan > > -- > Stanley M Dunn, Ph.D. > Paul S and Mary W Monroe Faculty Scholar > Associate Dean for Graduate Education and Research > School of Engineering > Rutgers University > 98 Brett Road > Piscataway, NJ 08854 > > (732) 445 - 4462 > (732) 445 - 3224 (Center for Packaging) > (732) 445 - 7067 (Fax) > smd@occlusal.rutgers.edu > > _______________________________________________ > Bioconductor mailing list > Bioconductor@stat.math.ethz.ch > https://www.stat.math.ethz.ch/mailman/listinfo/bioconductor > -------------- next part -------------- read.SMD2 <- function(fnames = NULL, path = ".", name.Gf = "CH1I_MEAN", name.Gb = "CH1B_MEDIAN", name.Rf = "CH2I_MEAN", name.Rb = "CH2B_MEDIAN", name.W = NULL, layout = NULL, gnames = NULL, targets = NULL, notes = NULL, skip = 0, sep = "\t", quote = "", ...) { if (is.null(fnames)) fnames <- dir(path = path, pattern = paste("*", "xls", sep = ".")) if (is.null(path)) fullfnames <- fnames else fullfnames <- file.path(path, fnames) y <- readLines(fullfnames[1], n = 100) skip <- grepname.Gf, y)[1] - 1 smdTable <- NULL if (is.null(layout)) { cat("Generating layout from ", fnames[1], "\n", sep="") smdTable <- read.table(fullfnames[1], header=TRUE, sep="\t", quote = "", skip = skip, comment.char = "") numSectors <- max(smdTable$SECTOR) xsize <- max(smdTable$X.COORD) - min(smdTable$X.COORD) ysize <- max(smdTable$Y.COORD) - min(smdTable$Y.COORD) maNgr <- round(sqrt(numSectors*ysize/xsize)) maNgc <- round(sqrt(numSectors*xsize/ysize)) if is.na(maNgr)) { row <- grep("Exptid", y)[1] exptid <- strsplit(y[row], "=")[[1]][2] cat("Image: http://genome-www5.stanford.edu/cgi- bin/SMD/clickable.pl?exptid=", exptid, "\n", sep = "") options(warn = getOption("warn")-1) repeat { cat("Enter number of vertical sectors (", numSectors, " total sectors): ", sep = "") maNgr <- as.integer(readLines(n = 1)) if (!is.na(maNgr) && maNgr > 0 && maNgr < numSectors && numSectors/maNgr == as.integer(numSectors/maNgr)) break } options(warn = getOption("warn")+1) maNgc <- numSectors / maNgr } row <- grep("Rows per Sector", y)[1] maNsr <- as.integer(strsplit(y[row], "=")[[1]][2]) row <- grep("Columns per Sector", y)[1] maNsc <- as.integer(strsplit(y[row], "=")[[1]][2]) maNspots <- maNgr * maNgc * maNsr * maNsc maSub <- rep(FALSE, maNspots) maSub[smdTable$SPOT] <- TRUE row <- grep("Printname", y)[1] printname <- strsplit(y[row], "=")[[1]][2] row <- grep("Tip Configuration", y)[1] tipconfig <- strsplit(y[row], "=")[[1]][2] maNotes <- paste("Print Name: ", printname, "\nTip Configuration: ", tipconfig, sep = "") layout <- new("marrayLayout", maNgr = maNgr, maNgc = maNgc, maNsr = maNsr, maNsc = maNsc, maNspots = maNspots, maSub = maSub, maNotes = maNotes) } if (is.null(gnames)) { cat("Generating probe sequence info from ", fnames[1], "\n", sep="") if (is.null(smdTable)) smdTable <- read.table(fullfnames[1], header=TRUE, sep="\t", quote = "", skip = skip, comment.char = "") maLabels <- as.character(smdTable$SUID) cols <- 2:(match("SUID", colnames(smdTable))-1) maInfo <- smdTable[,cols] gnames <- new("marrayInfo", maLabels = maLabels, maInfo = maInfo) } if (is.null(targets)) { cat("Generating target sample info from all files\n") maLabels <- character(0) maInfo <- data.frame() for (i in 1:length(fnames)) { z <- readLines(fullfnames[i], n = skip) row <- grep("Exptid", z)[1] maLabels <- c(maLabels, strsplit(z[row], "=")[[1]][2]) row <- grep("Experiment Name", z)[1] Experiment <- strsplit(z[row], "=")[[1]][2] row <- grep("Channel 1 Description", z)[1] Cy3 <- strsplit(z[row], "=")[[1]][2] row <- grep("Channel 2 Description", z)[1] Cy5 <- strsplit(z[row], "=")[[1]][2] row <- grep("SlideName", z)[1] SlideName <- strsplit(z[row], "=")[[1]][2] maInfo <- rbind(maInfo, data.frame(Experiment = Experiment, Cy3 = Cy3, Cy5 = Cy5, SlideName = SlideName)) } rownames(maInfo) <- 1:dim(maInfo)[1] targets <- new("marrayInfo", maLabels = maLabels, maInfo = maInfo) } if (is.null(notes)) { cat("Generating notes from ", fnames[1], "\n", sep="") row <- grep("Organism", y)[1] organism <- strsplit(y[row], "=")[[1]][2] row <- grep("Category", y)[1] category <- strsplit(y[row], "=")[[1]][2] row <- grep("Subcategory", y)[1] subcategory <- strsplit(y[row], "=")[[1]][2] row <- grep("Description", y)[1] description <- strsplit(y[row], "=")[[1]][2] row <- grep("Experimenter", y)[1] experimenter <- strsplit(y[row], "=")[[1]][2] row <- grep("Contact email", y)[1] email <- strsplit(y[row], "=")[[1]][2] row <- grep("Scanning Software", y)[1] software <- strsplit(y[row], "=")[[1]][2] row <- grep("Software version", y)[1] version <- strsplit(y[row], "=")[[1]][2] row <- grep("Scanning parameters", y)[1] parameters <- strsplit(y[row], "=")[[1]] if (length(parameters) > 1) parameters <- paste(parameters[2:length(parameters)], collapse = ", ") else parameters <- NA notes <- paste("Organism: ", organism, "\nCategory: ", category, "\nSubcategory: ", subcategory, "\nDescription: ", description, "\nExperimenter: ", experimenter, "\nE-Mail: ", email, "\nScanning Software: ", software, " ", version, "\nScanning Parameters: ", parameters, sep = "") } mraw <- read.marrayRaw(fnames = fnames, path = path, name.Gf = name.Gf, name.Gb = name.Gb, name.Rf = name.Rf, name.Rb = name.Rb, name.W = name.W, layout = layout, gnames = gnames, targets = targets, notes = notes, skip = skip, sep = sep, quote = quote, ...) return(mraw) }

ADD COMMENT • link 21.9 years ago Jean Yee Hwa Yang ▴ 920

Login before adding your answer.