Entering edit mode
Justin Borevitz
▴
90
@justin-borevitz-327
Last seen 10.2 years ago
I've noticed that the order of probes in the 2 packages does not
agree. At
least for barley1 and ath1121501. Also the way probes are ordered in
Linux
and Rgui (PC) does not agree. It could be something with the
alphabetizing
of probsets names in the 2 versions. Its possible this is true for
the
probe package coming from Affymetrix as well, which doesn't match
either
Linux or PC ordering. Lesson never assume ordering...
Maybe everyone knows this already and that is the purpose of
matchprobes??
Any help with simple calls to avoid this problem are appreciated.
# in Linux
barley.object <- read.affybatch(filenames = list.celfiles()[2])
Warning message:
Incompatible phenoData object. Created a new one.
in: read.affybatch(filenames = list.celfiles()[2])
pnL <- rownames(pm(barley.object))
save(pnL,file = "pnL.RData",compress=T)
## then download from linux to PC
# On PC
barley.object <- read.affybatch(filenames = list.celfiles()[2])
Warning message:
Incompatible phenoData object. Created a new one.
in: read.affybatch(filenames = list.celfiles()[2])
pnPC <- rownames(pm(barley.object))
load("D:/barley/pnL.RData")
table(pnPC == pnL)
FALSE TRUE
172752 78685
#Observation and rough fix for probe package ordering to PC ordering
setwd("d:/barley")
library(affy)
barley.object <- read.affybatch(filenames = list.celfiles())
probesets <- rownames(pm(barley.object))
length(probesets)
library(barley1probe)
length(barley1probe$Probe.Set.Names)
psn <- gsub("_at[0-9]","_at",probesets)
psn <- gsub("_at[0-9]","_at",psn)
table(psn == barley1probe$Probe.Set.Name)
# FALSE TRUE
#249955 1482
setwd("d:/ath1")
ath1.obj <- read.affybatch(filenames = list.celfiles()[1])
aprobesets <- rownames(pm(ath1.obj))
apsn <- gsub("_at[0-9]","_at",aprobesets)
apsn <- gsub("_at[0-9]","_at",apsn)
library(ath1121501probe)
table(apsn == ath1121501probe$Probe.Set.Name)
# FALSE TRUE
# 439 250639
Using the x and y coords I'm reordered the probe package as follows...
setwd("d:/barley")
library(affy)
barley.object <- read.affybatch(filenames = list.celfiles())
pm.i <- indexProbes(barley.object, which="pm") # all genes
pm1 <- unlist(pm.i)
pm.i.xy <- matrix(indices2xy(pm1, abatch = barley.object),nc = 2)
length(pm1)
dim(pm.i.xy)
pm.i.xy <- pm.i.xy - 1 # for affy units starting at 0.
probesets <- rownames(pm(barley.object))
length(probesets)
# now match with xy in barley1probe..
cdfxy <- paste(pm.i.xy[,1],pm.i.xy[,2])
library(barley1probe)
names(barley1probe)
probexy <- paste(barley1probe$x,barley1probe$y)
ordcdf <- match(cdfxy,probexy)
psn <- gsub("_at[0-9]","_at",probesets)
psn <- gsub("_at[0-9]","_at",psn)
table(psn == barley1probe$Probe.Set.Name)
# FALSE TRUE
#250100 1337
table(psn == barley1probe$Probe.Set.Name[ordcdf])
# TRUE
#251437
barley1probe <- barley1probe[ordcdf, ]
save(barley1probe,file = "barley1probe.RData", compress=T)
setwd("d:/ath1")
ath1.obj <- read.affybatch(filenames = list.celfiles()[1])
aprobesets <- rownames(pm(ath1.obj))
apsn <- gsub("_at[0-9]","_at",aprobesets)
apsn <- gsub("_at[0-9]","_at",apsn)
apsn <- gsub("_at[0-9]","_at",apsn)
pm.i <- indexProbes(ath1.obj, which="pm") # all genes
pm1 <- unlist(pm.i)
pm.i.xy <- matrix(indices2xy(pm1, abatch = ath1.obj),nc = 2)
length(pm1)
dim(pm.i.xy)
pm.i.xy <- pm.i.xy - 1 # for affy units starting at 0.
# now match with xy in ath1121501probe..
cdfxy <- paste(pm.i.xy[,1],pm.i.xy[,2])
library(ath1121501probe)
probexy <- paste(ath1121501probe$x,ath1121501probe$y)
ordcdf <- match(cdfxy,probexy)
table(apsn == ath1121501probe$Probe.Set.Name)
# FALSE TRUE
# 439 250639
table(apsn == ath1121501probe$Probe.Set.Name[ordcdf])
# TRUE
#251078
ath1121501probe <- ath1121501probe[ordcdf, ]
save(ath1121501probe,file = "ath1121501probe.RData", compress=T)
---
Justin Borevitz
Plant Biology
Salk Institute
10010 N. Torrey Pines Rd.
La Jolla CA, 92037
USA
ph. 858 453-4100X1796
fax 858 452-4315
mailto:borevitz@salk.edu
http://naturalvariation.org