How can I find the name of the file that a particular R script is missing?
I have copied a functional R script into a new directory and it does not want to run anymore. It is giving me the following error message:
Error in eval(expr, envir, enclos) : object 'Symbols' not found
But it is running fine in its old directory. Hence there must be a file in the old directory that is not in the new directory. But the old directory is too big and has at least 500 files. But if I knew the name of the file R is missing then I could search for it on my hard drive and copy-paste it into the new R directory. How can I ask R to give me a list of all the file names that it needs to run? Is there a function for that?
I have copy-pasted the script in question below: What is wrong with it? It worked fine only a few days ago. Did I accidentally comment out a line that I should not have? Or is the error insignificant? I am trying to figure out what this script is trying to do that it apparently cannot. How could I do that? Do I need to update R? I am using R Studio
#RMA normalization
#Install yeast annotation data
source("http://bioconductor.org/biocLite.R")
#biocLite("Biobase")
#biocLite("affy")
#biocLite("limma")
#biocLite("yeast2.db")
#biocLite("yeast2cdf")
#biocLite("genefilter")
#biocLite("annotate")
#biocLite("IRanges")
#biocLite("gplots")
# Load Bioconductor packages
library(affy)
library(limma)
library(genefilter)
library(annotate)
library(yeast2.db)
library(yeast2cdf)
library(IRanges)
library(gplots)
setwd("D:/R_Data/RMA_normalization_for_Dr_Tangs_data")
# Read in microarray CEL files and phenotype data
pheno = read.AnnotatedDataFrame("annotations.tsv")
affydata = ReadAffy(celfile.path="cel", phenoData=pheno, verbose=TRUE)
#affydata = ReadAffy(celfile.path="/CEL/", phenoData=pheno, verbose=TRUE)
write.csv(exprs(affydata), file='rawdata.csv')
# Normalize expression values and do log2 transformation
eset = rma(affydata)
normexp = exprs(eset)
write.csv(exprs(eset), file='eset.csv')
y2annot = read.csv("y2annotations.txt", sep="\t")
y2systematic = new.env()
y2common = new.env()
y2coordinate = new.env()
for (i in 1:length(y2annot$Probesets)) {
y2systematic[[ as.character(y2annot$Probesets[i]) ]] = y2annot$SystematicName[i]
y2common[[ as.character(y2annot$Probesets[i]) ]] = y2annot$CommonName[i]
y2coordinate[[ as.character(y2annot$Probesets[i]) ]] = y2annot$Coordinates[i]
}
# Write out normalized data values
Probesets = row.names(eset);
Systematic = as.character(unlist(mget(Probesets, y2systematic, ifnotfound=NA)))
names(Systematic) = Probesets
Common = as.character(unlist(mget(Probesets, y2common, ifnotfound=NA)))
names(Common) = Probesets
Coordinate = as.character(unlist(mget(Probesets, y2coordinate, ifnotfound=NA)))
names(Coordinate) = Probesets
normdata = cbind(Probesets, normexp);
#Symbols = unlist(mget(Probesets, yeast2GENENAME, ifnotfound=NA))
#ORFs = unlist(mget(Probesets, yeast2ORF, ifnotfound=NA))
#Symbols = unlist(mget(Probesets, ygs98GENENAME, ifnotfound=NA))
#ORFs = unlist(mget(Probesets, ygs98ORF, ifnotfound=NA))
#normexp.raw=2^(normexp)
#colnames(normexp.raw)=paste(filtered$SampleName, "raw")
#colnames(normexp)=paste(filtered$SampleName, "LOG")
#normdata = cbind(Probesets,ORFs,Symbols,normexp,normexp.raw)
#write.table(normdata, "GSE47820_Yeast2_expression.txt", sep="\t", row.names=FALSE, col.names=TRUE)
# Filtering out low intensity probesets
# This keeps only probesets with signals of 2^7=128 or higher in at least 3 samples
#filtered = eset[genefilter(eset, filterfun(kOverA(3, 7))),]
target.classes = c('Atg15','YEPD','WT','Erg6')
scores = matrix(0, nrow(eset), length(target.classes))
rownames(scores) = rownames(eset)
colnames(scores) = target.classes
for (class.index in 1:length(target.classes))
{
target.class = target.classes[class.index]
print(target.class)
filtered = eset[, eset@phenoData@data[,target.class] %in% c("Yes","No")]
#filtered=eset
# Design matrix and fit linear model
for (i in 1:ncol(filtered)){
print(paste(i, "/", ncol(filtered)))
t_filtered = filtered[,-i]
samples = t_filtered@phenoData@data[,target.class]
samples = as.factor(samples)
design = model.matrix(~0 + samples)
colnames(design) = levels(samples)
linfit = lmFit(exprs(t_filtered), design)
# Define the comparisons to make in this data set: dal80_LoaOOH vs. dal80_con
contrast.matrix = makeContrasts(A_B = Yes - No, levels=design)#experimental-control
contrast.fit = contrasts.fit(linfit, contrast.matrix)
smooth.fit = eBayes(contrast.fit)
probeset.list = topTable(smooth.fit, number=nrow(t_filtered), coef="A_B", genelist=smooth.fit$genes, adjust.method="BH", sort.by="P", p.value=1)
topprobes = topTable(smooth.fit, number=10928, coef="A_B", genelist=smooth.fit$genes, adjust.method="BH", sort.by="P")
for (index in 1:nrow(topprobes)){
scores[rownames(topprobes)[index],target.class] =
scores[rownames(topprobes)[index],target.class] + (nrow(scores) - index)
}
}
}
tmp <- eset
colnames(tmp) <- pheno$sampletype
gene.annotations = read.csv2("gene_annotation.csv", header=TRUE, sep=",")
rownames(gene.annotations) <- gene.annotations$probe_set_id
tmp.table <- cbind(exprs(tmp), gene.annotations[match(rownames(exprs(tmp)), rownames(gene.annotations)),])
write.csv(cbind(Common, Systematic, scores, tmp.table), "scores10928_4.csv")
# Volcano plot
par(oma=c(4,2,2,2))#bottom, left, top, right
volcanoplot(smooth.fit, coef="A_B", highlight=10, names=paste(Symbols[row.names(smooth.fit)]," ", Common[row.names(smooth.fit)]),
main=target.class)#ask Robbie how to decrease font
topprobes = topTable(smooth.fit, number=5000, coef="A_B", genelist=smooth.fit$genes, adjust.method="BH", sort.by="P")
filtexp = exprs(filtered);
colnames(filtexp) = filtered@phenoData$sampletype
#my.colors2 = colorRampPalette(c("darkmagenta", "darkblue", "blue4", "blue3", "blue2", "blue1", "cyan2", "cyan1", "lightblue2", "lightblue1", "green4", "green3", "green2", "green1", "lawngreen", "chartreuse2", "chartreuse1", "lightgreen", "lightyellow1", "lightyellow2", "moccasin", "navajowhite1", "navajowhite2", "yellow1", "yellow2", "gold1", "gold2", "orange1", "orange2", "darkorange1", "darkorange2", "orangered1", "orangered2", "red1", "red2", "red3", "tan3", "saddlebrown", "tan4"))(2048)
#par(cex.main=1)#my heat map
#heatmap.2(filtexp[row.names(topprobes),],
#scale="row",
#ylab="Gene", xlab="Sample", main="long vs. short",
#labRow=paste(Common[selected.probes], " ", Systematic[selected.probes]),
#col=my.colors2,
#cellnote=round(filtexp[row.names(topprobes),], digits=2),
#trace = "none", density.info = "none",
#cexRow=0.0377, cexCol=0.85,
#notecol="black", notecex=0.0377)
# scale = row is row Z-score (-1,1)
# scale = none is just the raw values
#heatmap.2(filtexp[row.names(topprobes),], scale="row", ylab="Gene", xlab=" ",
#trace='none',
# main=target.class, cexRow=0.1, cexCol=0.8, labRow=paste(Systematic, " ", Common))
#heatmap.2(exprs(eset), scale="row", ylab="Gene", xlab=" ",
#trace='none',
# main=target.class, cexRow=0.1, cexCol=0.8, labRow=paste(Systematic, " ", Common))
#my.colors = colorRampPalette(c("darkorchid4", "darkorchid", "mediumorchid", "orchid3", "orchid4", "orchid", "plum", "purple", "darkblue", "slateblue", "blue", "dodgerblue", "darkcyan", "cyan", "darkturquoise", "turquoise", "lightblue", "mediumaquamarine", "aquamarine", "seagreen", "lightseagreen", "olivedrab",
# "darkgreen", "darkolivegreen", "green", "chartreuse", "springgreen", "lightgreen", "palegreen", "greenyellow", "yellow", "palegoldenrod", "lightgoldenrod", "goldenrod", "darkgoldenrod", "lightpink", "pink", "hotpink", "deeppink", "orange", "darkorange", "orangered", "indianred", "tomato", "red", "firebrick", "darkred"))(2048)
#rainbow heat map
#par(cex.main=1)
#heatmap.2(filtexp[row.names(topprobes),],
# scale="none",
# #scale="row",
# ylab="Gene", xlab=" ",
# col=my.colors2, # My heat map
# #col=rainbow(n=1000,start=0,end=0.8), # rainbow heatmap
# cellnote=round(filtexp[row.names(topprobes),], digits=2),
#trace = "none", density.info = "none",
# notecol="black", notecex=0.0377, margins=c(2,21),
# main="LaoOOH vs. Con",
# cexRow=0.1, cexCol=0.8, labRow=paste(Systematic, " ", Common),
#trace = "none",
# density.info = "none"#)
#str(topprobes)
# Heatmap using sample names
#colnames(filtexp) = filtered$SampleName
#heatmap(filtexp[row.names(topprobes),], scale="row", ylab="Gene", xlab="", main="LaoOOH vs. Con", cexRow=0.1, cexCol=0.8, labRow=paste(Systematic, " ", Common))
# saves top genes into topgenes.csv file
# you should change the number= parameter to extract as many genes as you want.
topprobes = topTable(smooth.fit, number=500, coef="A_B", genelist=smooth.fit$genes, adjust.method="BH", sort.by="P")
write.csv(cbind(topprobes,
Common[rownames(topprobes)],
Systematic[rownames(topprobes)]), 'topgenes_4.csv')
Thanks a lot for your help and advice
Thomas