Hi,
I've been analysing gene expression networks from my RNAseq dataset using the WGCNA software, using the following code on a single .csv folder with individuals making the columns and different genes making up the rows - I used the gene expression values as FPFM from the tuxedo pipeline. I used this code:
R setwd("/home/user/edgeR") library(WGCNA) options(stringsAsFactors = FALSE) alphaData = read.csv("data.csv") nSets = 1 setLabels = c("alpha") shortLabels = c("alpha") multiExpr = vector(mode = "list", length = nSets) multiExpr[[1]] = list(data = as.data.frame(t(alphaData[-c(1:8)]))); names(multiExpr[[1]]$data) = alphaData$substanceBXH; rownames(multiExpr[[1]]$data) = names(alphaData)[-c(1:8)]; exprSize = checkSets(multiExpr) exprSize gsg = goodSamplesGenesMS(multiExpr, verbose = 3); gsg$allOK sampleTrees = list() for (set in 1:nSets) { sampleTrees[[set]] = hclust(dist(multiExpr[[set]]$data), method = "average") } pdf(file = "Plots/SampleClustering.pdf", width = 12, height = 12); par(mfrow=c(2,1)) par(mar = c(0, 4, 2, 0)) for (set in 1:nSets) plot(sampleTrees[[set]], main = paste("Sample clustering on all genes in", setLabels[set]), xlab="", sub="", cex = 0.7); dev.off() save(multiExpr,setLabels, shortLabels, file = "Consensus-dataInput.RData") enableWGCNAThreads() lnames = load(file = "Consensus-dataInput.RData"); lnames nSets = checkSets(multiExpr)$nSets powers = c(seq(4,10,by=1), seq(12,20, by=2)); powerTables = vector(mode = "list", length = nSets); for (set in 1:nSets) powerTables[[set]] = list(data = pickSoftThreshold(multiExpr[[set]]$data, powerVector=powers, verbose = 2)[[2]]); collectGarbage(); colors = c("black", "red") plotCols = c(2,5,6,7) colNames = c("Scale Free Topology Model Fit", "Mean connectivity", "Median connectivity", "Max connectivity"); ylim = matrix(NA, nrow = 2, ncol = 4); for (set in 1:nSets) { for (col in 1:length(plotCols)){ ylim[1, col] = min(ylim[1, col], powerTables[[set]]$data[, plotCols[col]], na.rm = TRUE); ylim[2, col] = max(ylim[2, col], powerTables[[set]]$data[, plotCols[col]], na.rm = TRUE); } } sizeGrWindow(8, 6) par(mfcol = c(2,2)); par(mar = c(4.2, 4.2 , 2.2, 0.5)) cex1 = 0.7; for (col in 1:length(plotCols)) for (set in 1:nSets) { if (set==1) { plot(powerTables[[set]]$data[,1], -sign(powerTables[[set]]$data[,3])*powerTables[[set]]$data[,2], xlab="Soft Threshold (power)",ylab=colNames[col],type="n", ylim = ylim[, col], main = colNames[col]); addGrid(); } if (col==1) { text(powerTables[[set]]$data[,1], -sign(powerTables[[set]]$data[,3])*powerTables[[set]]$data[,2], labels=powers,cex=cex1,col=colors[set]); } else text(powerTables[[set]]$data[,1], powerTables[[set]]$data[,plotCols[col]], labels=powers,cex=cex1,col=colors[set]); if (col==1) { legend("bottomright", legend = setLabels, col = colors, pch = 20) ; } else legend("topright", legend = setLabels, col = colors, pch = 20) ; } net = blockwiseConsensusModules( multiExpr, power = 6, minModuleSize = 30, deepSplit = 2, pamRespectsDendro = FALSE, mergeCutHeight = 0.25, numericLabels = TRUE, minKMEtoStay = 0, saveTOMs = TRUE, verbose = 5)
which returns the error
Could not find a suitable move to improve the clustering. ..merging smaller clusters... ..Working on block 1 . ....Working on set 1 Error: REAL() can only be applied to a 'numeric', not a 'integer'
Any ideas on what might be causing this? Thanks!
Hey,
So as I had integer FPKM read-counts, I used R:
log2(myarray+1)
and then exported it back into a text file which looked like:
But the same error came up again as before:
Am I doing something wrong?
Thanks for your help!