Hi all!
I'm new with R and amd currently working on data from ZebGene-1_0-st arrays. However I am having problem doing the annotations as firstly there is no package in bioconductor and secondly, the sample workflow that I found for the array does not yield a true sanity check/identical. I realised that the workflow below does not extract and reorder to match my probes. Any advice to overcome this problem helps! Thank you in advance :)
Workflow:
# Import the annotations
dat <- read.csv(file.path(metaDir, "ZebGene-1_0-st-v1.na33.3.zv9.transcript.csv"), comment.char = "#", stringsAsFactors=FALSE, na.string = "---")
dat <- col2rownames(dat, "probeset_id")
#extract and reorder to match the array features
dat <- dat[row.names(fData(affyNorm.batch)),]
dat <- dat[,c("probeset_id", "seqname", "strand", "start", "stop", "gene_assignment", "mrna_assignment")]
dat <- as.matrix(dat)
# parse mrna_assignments
headercol <- "mrna_assignment"
mrnas <- t(sapply(strsplit(dat[, headercol], " /// "), function(x) {
dat.probe.df <- do.call(rbind, strsplit(x, " // "))
bestrna <- dat.probe.df[1,1]
rnas <- paste(dat.probe.df[,1], collapse=",")
c(bestrna, rnas)
}))
mrnas <- as.data.frame(mrnas)
names(mrnas) <- c("best.mrna", "mrnas")
# parse gene assignments
headercol <- "gene_assignment"
genes <- t(sapply(strsplit(dat[, headercol], " /// "), function(x) {
if(is.na(x[1])){
out <- rep("NA", 6)
} else {
dat.probe.mat <- as.matrix(do.call(rbind, strsplit(x, " // ")))
bestgene <- as.character(dat.probe.mat[1,1])
dat.probe.vec <- apply(dat.probe.mat, 2, function(y) {
paste(unique(y), collapse=",")
})
out <- as.character(c(bestgene,dat.probe.vec))
}
return(out)
}))
genes <- as.data.frame(genes[,c(1,2,3,4,6)])
names(genes) <- c("bestgene", "accessions", "symbols", "descriptions", "entrezIDs")
genes <- rownames2col(genes, "probeids")
#combo mrna and gene assigments
gene.annots <- cbind(genes, mrnas)
