How can I find the name of the file that a particular R script is missing?
1
0
Entering edit mode
@thomasfhahn2-7201
Last seen 7.9 years ago
United States

How can I find the name of the file that a particular R script is missing?

 

I have copied a functional R script into a new directory and it does not want to run anymore.  It is giving me the following error message:

Error in eval(expr, envir, enclos) : object 'Symbols' not found

 

But it is running fine in its old directory.  Hence there must be a file in the old directory that is not in the new directory.  But the old directory is too big and has at least 500 files.  But if I knew the name of the file R is missing then I could search for it on my hard drive and copy-paste it into the new R directory.  How can I ask R to give me a list of all the file names that it needs to run?  Is there a function for that?

 

I have copy-pasted the script in question below:  What is wrong with it?  It worked fine only a few days ago.  Did I accidentally comment out a line that I should not have?  Or is the error insignificant?  I am trying to figure out what this script is trying to do that it apparently cannot.  How could I do that?   Do I need to update R?  I am using R Studio

 

#RMA normalization

#Install yeast annotation data
source("http://bioconductor.org/biocLite.R")
#biocLite("Biobase")
#biocLite("affy")
#biocLite("limma")
#biocLite("yeast2.db")
#biocLite("yeast2cdf")
#biocLite("genefilter")
#biocLite("annotate")
#biocLite("IRanges")
#biocLite("gplots")

# Load Bioconductor packages
library(affy)
library(limma)
library(genefilter)
library(annotate)
library(yeast2.db)
library(yeast2cdf)
library(IRanges)
library(gplots)

setwd("D:/R_Data/RMA_normalization_for_Dr_Tangs_data")

# Read in microarray CEL files and phenotype data
pheno = read.AnnotatedDataFrame("annotations.tsv")
affydata = ReadAffy(celfile.path="cel", phenoData=pheno, verbose=TRUE)
#affydata = ReadAffy(celfile.path="/CEL/", phenoData=pheno, verbose=TRUE)
write.csv(exprs(affydata), file='rawdata.csv')

# Normalize expression values and do log2 transformation
eset = rma(affydata)
normexp = exprs(eset)
write.csv(exprs(eset), file='eset.csv')

y2annot = read.csv("y2annotations.txt", sep="\t")
y2systematic = new.env()
y2common = new.env()
y2coordinate = new.env()
for (i in 1:length(y2annot$Probesets)) {
  y2systematic[[ as.character(y2annot$Probesets[i]) ]] = y2annot$SystematicName[i]
  y2common[[ as.character(y2annot$Probesets[i]) ]] = y2annot$CommonName[i]
  y2coordinate[[ as.character(y2annot$Probesets[i]) ]] = y2annot$Coordinates[i]
}


# Write out normalized data values
Probesets = row.names(eset);
Systematic = as.character(unlist(mget(Probesets, y2systematic, ifnotfound=NA)))
names(Systematic) = Probesets
Common = as.character(unlist(mget(Probesets, y2common, ifnotfound=NA)))
names(Common) = Probesets
Coordinate = as.character(unlist(mget(Probesets, y2coordinate, ifnotfound=NA)))
names(Coordinate) = Probesets


normdata = cbind(Probesets, normexp);
#Symbols = unlist(mget(Probesets, yeast2GENENAME, ifnotfound=NA))
#ORFs = unlist(mget(Probesets, yeast2ORF, ifnotfound=NA))
#Symbols = unlist(mget(Probesets, ygs98GENENAME, ifnotfound=NA))
#ORFs = unlist(mget(Probesets, ygs98ORF, ifnotfound=NA))
#normexp.raw=2^(normexp)
#colnames(normexp.raw)=paste(filtered$SampleName, "raw")
#colnames(normexp)=paste(filtered$SampleName, "LOG")
#normdata = cbind(Probesets,ORFs,Symbols,normexp,normexp.raw)
#write.table(normdata, "GSE47820_Yeast2_expression.txt", sep="\t", row.names=FALSE, col.names=TRUE)


# Filtering out low intensity probesets
# This keeps only probesets with signals of 2^7=128 or higher in at least 3 samples
#filtered = eset[genefilter(eset, filterfun(kOverA(3, 7))),]
target.classes = c('Atg15','YEPD','WT','Erg6')
scores = matrix(0, nrow(eset), length(target.classes))
rownames(scores) = rownames(eset)
colnames(scores) = target.classes

for (class.index in 1:length(target.classes))
{
  target.class = target.classes[class.index]
  print(target.class)
  filtered = eset[, eset@phenoData@data[,target.class] %in% c("Yes","No")]
  #filtered=eset
  # Design matrix and fit linear model
 
  for (i in 1:ncol(filtered)){
    print(paste(i, "/", ncol(filtered)))
    t_filtered = filtered[,-i]
 
    samples = t_filtered@phenoData@data[,target.class]
    samples = as.factor(samples)
    design = model.matrix(~0 + samples)
    colnames(design) = levels(samples)
    linfit = lmFit(exprs(t_filtered), design)
 
    # Define the comparisons to make in this data set: dal80_LoaOOH vs. dal80_con
    contrast.matrix = makeContrasts(A_B = Yes - No, levels=design)#experimental-control
    contrast.fit = contrasts.fit(linfit, contrast.matrix)
    smooth.fit = eBayes(contrast.fit)
    probeset.list = topTable(smooth.fit, number=nrow(t_filtered), coef="A_B", genelist=smooth.fit$genes, adjust.method="BH", sort.by="P", p.value=1)
 
    topprobes = topTable(smooth.fit, number=10928, coef="A_B", genelist=smooth.fit$genes, adjust.method="BH", sort.by="P")
    for (index in 1:nrow(topprobes)){
      scores[rownames(topprobes)[index],target.class] =
        scores[rownames(topprobes)[index],target.class] + (nrow(scores) - index)
    }
  }
}
tmp <- eset
colnames(tmp) <- pheno$sampletype

gene.annotations = read.csv2("gene_annotation.csv", header=TRUE, sep=",")
rownames(gene.annotations) <- gene.annotations$probe_set_id

tmp.table <- cbind(exprs(tmp), gene.annotations[match(rownames(exprs(tmp)), rownames(gene.annotations)),])
write.csv(cbind(Common, Systematic, scores, tmp.table), "scores10928_4.csv")

# Volcano plot
par(oma=c(4,2,2,2))#bottom, left, top, right
volcanoplot(smooth.fit, coef="A_B", highlight=10, names=paste(Symbols[row.names(smooth.fit)]," ", Common[row.names(smooth.fit)]),
            main=target.class)#ask Robbie how to decrease font


topprobes = topTable(smooth.fit, number=5000, coef="A_B", genelist=smooth.fit$genes, adjust.method="BH", sort.by="P")
filtexp = exprs(filtered);
colnames(filtexp) = filtered@phenoData$sampletype

#my.colors2 = colorRampPalette(c("darkmagenta", "darkblue", "blue4", "blue3", "blue2", "blue1", "cyan2", "cyan1", "lightblue2", "lightblue1", "green4", "green3", "green2", "green1", "lawngreen", "chartreuse2", "chartreuse1", "lightgreen", "lightyellow1", "lightyellow2", "moccasin", "navajowhite1", "navajowhite2", "yellow1", "yellow2", "gold1", "gold2", "orange1", "orange2", "darkorange1", "darkorange2", "orangered1", "orangered2", "red1", "red2", "red3", "tan3", "saddlebrown", "tan4"))(2048)

#par(cex.main=1)#my heat map
#heatmap.2(filtexp[row.names(topprobes),],
          #scale="row",
          #ylab="Gene", xlab="Sample", main="long vs. short",
          #labRow=paste(Common[selected.probes], "  ", Systematic[selected.probes]),
          #col=my.colors2,
          #cellnote=round(filtexp[row.names(topprobes),], digits=2),
          #trace = "none", density.info = "none",
          #cexRow=0.0377, cexCol=0.85,
          #notecol="black", notecex=0.0377)

# scale = row is row Z-score (-1,1)
# scale = none is just the raw values
#heatmap.2(filtexp[row.names(topprobes),], scale="row", ylab="Gene", xlab=" ",
          #trace='none',
       # main=target.class, cexRow=0.1, cexCol=0.8, labRow=paste(Systematic, " ", Common))

#heatmap.2(exprs(eset), scale="row", ylab="Gene", xlab=" ",
          #trace='none',
 #         main=target.class, cexRow=0.1, cexCol=0.8, labRow=paste(Systematic, " ", Common))


#my.colors = colorRampPalette(c("darkorchid4", "darkorchid", "mediumorchid", "orchid3", "orchid4", "orchid", "plum", "purple", "darkblue", "slateblue", "blue", "dodgerblue", "darkcyan", "cyan", "darkturquoise", "turquoise", "lightblue", "mediumaquamarine", "aquamarine", "seagreen", "lightseagreen", "olivedrab",
 #                              "darkgreen", "darkolivegreen", "green", "chartreuse", "springgreen", "lightgreen",  "palegreen", "greenyellow", "yellow", "palegoldenrod", "lightgoldenrod", "goldenrod", "darkgoldenrod", "lightpink", "pink", "hotpink", "deeppink", "orange", "darkorange", "orangered", "indianred", "tomato", "red", "firebrick", "darkred"))(2048)
#rainbow heat map
#par(cex.main=1)
#heatmap.2(filtexp[row.names(topprobes),],
 #         scale="none",
  #        #scale="row",
   #       ylab="Gene", xlab=" ",
    #      col=my.colors2, # My heat map
     #     #col=rainbow(n=1000,start=0,end=0.8), # rainbow heatmap
      #    cellnote=round(filtexp[row.names(topprobes),], digits=2),
          #trace = "none", density.info = "none",
       #   notecol="black", notecex=0.0377, margins=c(2,21),
        #  main="LaoOOH vs. Con",
         # cexRow=0.1, cexCol=0.8, labRow=paste(Systematic, " ", Common),
          #trace = "none",
        #  density.info = "none"#)

#str(topprobes)
# Heatmap using sample names
#colnames(filtexp) = filtered$SampleName
#heatmap(filtexp[row.names(topprobes),], scale="row", ylab="Gene", xlab="", main="LaoOOH vs. Con", cexRow=0.1, cexCol=0.8, labRow=paste(Systematic, " ", Common))

# saves top genes into topgenes.csv file
# you should change the number= parameter to extract as many genes as you want.
topprobes = topTable(smooth.fit, number=500, coef="A_B", genelist=smooth.fit$genes, adjust.method="BH", sort.by="P")
write.csv(cbind(topprobes,
                Common[rownames(topprobes)],
                Systematic[rownames(topprobes)]), 'topgenes_4.csv')

 

Thanks a lot for your help and advice

 

Thomas

 

microarray input files saccharomyces cerevisiae bioconductor • 1.8k views
ADD COMMENT
1
Entering edit mode
Dan Tenenbaum ★ 8.2k
@dan-tenenbaum-4256
Last seen 3.2 years ago
United States

In this particular script, the lines that define 'Symbols' are all commented out. So maybe it's just a question of uncommenting those lines (or one of them, at least).

It's probably a good idea, once you have an R script of this length, to convert it into a package. Then you can just install the package and not worry if it will work in one directory and not another, or with some files and not without them.

Dan

 

ADD COMMENT

Login before adding your answer.

Traffic: 745 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6