Entering edit mode
Richard Friedman
★
2.0k
@richard-friedman-513
Last seen 10.4 years ago
Dear Bioconductor list members,
I am analyzing a dataset with both biological and technical
replicates.
There is 1 factor with 8 different levels, denoted A-H
Biological replicates of a given level are denoted by 1,2, ...
Technical replicates corresponding to a given biological replcate are
denoted 2a,2b..
I wish to compute the statistics of each of levels B-H)relative to
level A.
I would greatly appreciate it if someone were to look at the
following code and
input files and tell me if they are correct.
Target file:
Name FileName Target
A1_xen1_1_1 A1_xen1_1_1.CEL A_xen
A2_xen2a_5_2 A2_xen2a_5_2.CEL A_xen
A3_xen2b_6_3 A3_xen2b_6_3.CEL A_xen
B1_cr1_2_4 B1_cr1_2_4.CEL B_crp
B2_cr2a_9_5 B2_cr2a_9_5.CEL B_crp
B3_cr2b_10_6 B3_cr2b_10_6.CEL B_crp
C1_nod1_3_7 C1_nod1_3_7.CEL C_nod
C2_nod2a_11_8 C2_nod2a_11_8.CEL C_nod
C3_nod2b_12_9 C3_nod2b_12_9.CEL C_nod
D1_fgf1a_17_10 D1_fgf1a_17_10.CEL D_FGF
D2_fgf1b_18_11 D2_fgf1b_18_11.CEL D_FGF
E1_act1a_19_12 E1_act1a_19_12.CEL E_ACTB
E2_act1b_20_13 E2_act1b_20_13.CEL E_ACTB
F1_sb1a_7_14 F1_sb1a_7_14.CEL F_SB
F2_sb1b_8_15 F2_sb1b_8_15.CEL F_SB
G1_crsb1a_13_16 G1_crsb1a_13_16.CEL G_CR_SB
G2_crsb1b_14_17 G2_crsb1b_14_17.CEL G_CR_SB
H1_ndsb1a_15_18 H1_ndsb1a_15_18.CEL H_ND_SB
H2_ndsb1b_16_19 H2_ndsb1b_16_19.CEL H_ND_SB
> sessionInfo()
R version 2.5.1 (2007-06-27)
i386-apple-darwin8.9.1
locale:
en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] "splines" "tools" "stats" "graphics" "grDevices"
"utils" "datasets" "methods" "base"
other attached packages:
statmod limma simpleaffy
genefilter survival mouse430a2probe mouse430a2
"1.3.0" "2.10.5" "2.10.31"
"1.14.1" "2.32" "1.16.2" "1.16.0"
mouse430a2cdf annotate annaffy
affyPLM gcrma matchprobes affydata
"1.16.0" "1.14.1" "1.8.1"
"1.12.0" "2.8.1" "1.8.1" "1.11.3"
affy affyio KEGG
GO Biobase
"1.14.2" "1.4.1" "1.16.1"
"1.16.0" "1.14.1"
>
> xen1data<-ReadAffy()
> xen1dataeset<-gcrma(xen1data)
Adjusting for optical effect...................Done.
Computing affinities.Done.
Adjusting for non-specific binding...................Done.
Normalizing
Calculating Expression
> xen1data
AffyBatch object
size of arrays=732x732 features (10 kb)
cdf=Mouse430A_2 (22690 affyids)
number of samples=19
number of genes=22690
annotation=mouse430a2
notes=
> xen1dataeset
ExpressionSet (storageMode: lockedEnvironment)
assayData: 22690 features, 19 samples
element names: exprs
phenoData
rowNames: A1_xen1_1_1.CEL, A2_xen2a_5_2.CEL, ...,
H2_ndsb1b_16_19.CEL (19 total)
varLabels and varMetadata:
sample: arbitrary numbering
featureData
featureNames: 1415670_at, 1415671_at, ..., AFFX-r2-P1-cre-5_at
(22690 total)
varLabels and varMetadata: none
experimentData: use 'experimentData(object)'
Annotation [1] "mouse430a2"
> xen1dataeset(PhenoData)
Error: could not find function "xen1dataeset"
> PhenoData
Error: object "PhenoData" not found
> phenoData(xen1dataeset)
rowNames: A1_xen1_1_1.CEL, A2_xen2a_5_2.CEL, ...,
H2_ndsb1b_16_19.CEL (19 total)
varLabels and varMetadata:
sample: arbitrary numbering
> phenoData(xen1data)
rowNames: A1_xen1_1_1.CEL, A2_xen2a_5_2.CEL, ...,
H2_ndsb1b_16_19.CEL (19 total)
varLabels and varMetadata:
sample: arbitrary numbering
> pData(xen1data)
sample
A1_xen1_1_1.CEL 1
A2_xen2a_5_2.CEL 2
A3_xen2b_6_3.CEL 3
B1_cr1_2_4.CEL 4
B2_cr2a_9_5.CEL 5
B3_cr2b_10_6.CEL 6
C1_nod1_3_7.CEL 7
C2_nod2a_11_8.CEL 8
C3_nod2b_12_9.CEL 9
D1_fgf1a_17_10.CEL 10
D2_fgf1b_18_11.CEL 11
E1_act1a_19_12.CEL 12
E2_act1b_20_13.CEL 13
F1_sb1a_7_14.CEL 14
F2_sb1b_8_15.CEL 15
G1_crsb1a_13_16.cel 16
G2_crsb1b_14_17.CEL 17
H1_ndsb1a_15_18.CEL 18
H2_ndsb1b_16_19.CEL 19
> pData(xen1dataeset)
sample
A1_xen1_1_1.CEL 1
A2_xen2a_5_2.CEL 2
A3_xen2b_6_3.CEL 3
B1_cr1_2_4.CEL 4
B2_cr2a_9_5.CEL 5
B3_cr2b_10_6.CEL 6
C1_nod1_3_7.CEL 7
C2_nod2a_11_8.CEL 8
C3_nod2b_12_9.CEL 9
D1_fgf1a_17_10.CEL 10
D2_fgf1b_18_11.CEL 11
E1_act1a_19_12.CEL 12
E2_act1b_20_13.CEL 13
F1_sb1a_7_14.CEL 14
F2_sb1b_8_15.CEL 15
G1_crsb1a_13_16.cel 16
G2_crsb1b_14_17.CEL 17
H1_ndsb1a_15_18.CEL 18
H2_ndsb1b_16_19.CEL 19
>
I think the right ones were read in before as expression same as
before.
> sampleNames(xen1data)
[1] "A1_xen1_1_1.CEL" "A2_xen2a_5_2.CEL"
"A3_xen2b_6_3.CEL" "B1_cr1_2_4.CEL"
[5] "B2_cr2a_9_5.CEL" "B3_cr2b_10_6.CEL"
"C1_nod1_3_7.CEL" "C2_nod2a_11_8.CEL"
[9] "C3_nod2b_12_9.CEL" "D1_fgf1a_17_10.CEL"
"D2_fgf1b_18_11.CEL" "E1_act1a_19_12.CEL"
[13] "E2_act1b_20_13.CEL" "F1_sb1a_7_14.CEL"
"F2_sb1b_8_15.CEL" "G1_crsb1a_13_16.cel"
[17] "G2_crsb1b_14_17.CEL" "H1_ndsb1a_15_18.CEL" "H2_ndsb1b_16_19.CEL"
> targets<-readTargets("xen1.txt")
> f<-factor(targets$Target,levels=c("A_xen",
"B_crp","C_nod","D_FGF","E_ACTB",
+ "F_SB","G_CR_SB","H_ND_SB"))
> design<-model.matrix(~0+f)
> colnames(design)<- c("A_xen","B_crp","C_nod","D_FGF","E_ACTB",
"F_SB","G_CR_SB","H_ND_SB")
> design
A_xen B_crp C_nod D_FGF E_ACTB F_SB G_CR_SB H_ND_SB
1 1 0 0 0 0 0 0 0
2 1 0 0 0 0 0 0 0
3 1 0 0 0 0 0 0 0
4 0 1 0 0 0 0 0 0
5 0 1 0 0 0 0 0 0
6 0 1 0 0 0 0 0 0
7 0 0 1 0 0 0 0 0
8 0 0 1 0 0 0 0 0
9 0 0 1 0 0 0 0 0
10 0 0 0 1 0 0 0 0
11 0 0 0 1 0 0 0 0
12 0 0 0 0 1 0 0 0
13 0 0 0 0 1 0 0 0
14 0 0 0 0 0 1 0 0
15 0 0 0 0 0 1 0 0
16 0 0 0 0 0 0 1 0
17 0 0 0 0 0 0 1 0
18 0 0 0 0 0 0 0 1
19 0 0 0 0 0 0 0 1
attr(,"assign")
[1] 1 1 1 1 1 1 1 1
attr(,"contrasts")
attr(,"contrasts")$f
[1] "contr.treatment"
> biolrep<-c(1,2,2,3,4,4,5,6,6,7,7,8,8,9,9,10,10,11,11)
>
> corfit<-duplicateCorrelation(xen1dataeset,ndups=1,block=biolrep)
Loading required package: statmod
Warning message:
Too much damping - convergence tolerance not achievable in: glmgam.fit
(dx, dy, start = start, tol = tol, maxit = maxit, trace = trace)
fit<-lmFit(xen1dataeset,design,block=biolrep,cor=corfit$consensus)
> contrast.matrix<- makeContrasts(B_crp- A_xen,C_nod- A_xen, D_FGF -
A_xen,E_ACTB - A_xen, F_SB- A_xen,
+ G_CR_SB- A_xen, H_ND_SB -A_xen,levels=design)
contrast.matrix
Contrasts
Levels B_crp - A_xen C_nod - A_xen D_FGF - A_xen E_ACTB - A_xen
F_SB - A_xen G_CR_SB - A_xen H_ND_SB - A_xen
A_xen -1 -1 -1
-1 -1 -1 -1
B_crp 1 0 0
0 0 0 0
C_nod 0 1 0
0 0 0 0
D_FGF 0 0 1
0 0 0 0
E_ACTB 0 0 0
1 0 0 0
F_SB 0 0 0
0 1 0 0
G_CR_SB 0 0 0
0 0 1 0
H_ND_SB 0 0 0
0 0 0 1
> fit2<-contrasts.fit(fit,contrast.matrix)
> fit3<-eBayes(fit2)
> fit3<-eBayes(fit2)
> crpMxen<-topTable
(fit3,coef=1,number=22690,adjust.method="BH",sort.by="B")
write.csv(crpMxen,"crpMxen.csv")
same few genes as before.
> cnodMxen<-topTable
(fit3,coef=2,number=22690,adjust.method="BH",sort.by="B")
> write.csv(cnodMxen,"crpMnod.csv")
chmaged to nodMxen.csv
> fgfMxen<-topTable
(fit3,coef=3,number=22690,adjust.method="BH",sort.by="B")
> write.csv(fgfMxen,"fgfMxen.csv")
> actMxen<-topTable
(fit3,coef=4,number=22690,adjust.method="BH",sort.by="B")
> write.csv(actMxen,"actMxen.csv")
> sbMxen<-topTable
(fit3,coef=5,number=22690,adjust.method="BH",sort.by="B")
> write.csv(sbMxen,"sbMxen.csv")
> g_cr_sbMxen<-topTable
(fit3,coef=6,number=22690,adjust.method="BH",sort.by="B")
> write.csv(g_cr_sbMxen,"gc_cr_sbMxen.csv")
> h_nd_sbMxen<-topTable
(fit3,coef=7,number=22690,adjust.method="BH",sort.by="B")
> write.csv(h_nd_sbMxen,"h_nd_sbMxen.csv")
Thanks and best wishes,
Rich
------------------------------------------------------------
Richard A. Friedman, PhD
Biomedical Informatics Shared Resource
Lecturer
Department of Biomedical Informatics
Educational Coordinator
Center for Computational Biology and Bioinformatics
National Center for Multiscale Analysis of Genomic Networks
Box 95, Room 130BB or P&S 1-420C
Columbia University Medical Center
630 W. 168th St.
New York, NY 10032
(212)305-6901 (5-6901) (voice)
friedman at cancercenter.columbia.edu
http://cancercenter.columbia.edu/~friedman/
In memoriam, John Stewart Williamson