Hi,
I've been analysing gene expression networks from my RNAseq dataset using the WGCNA software, using the following code on a single .csv folder with individuals making the columns and different genes making up the rows - I used the gene expression values as FPFM from the tuxedo pipeline. I used this code:
R
setwd("/home/user/edgeR")
library(WGCNA)
options(stringsAsFactors = FALSE)
alphaData = read.csv("data.csv")
nSets = 1
setLabels = c("alpha")
shortLabels = c("alpha")
multiExpr = vector(mode = "list", length = nSets)
multiExpr[[1]] = list(data = as.data.frame(t(alphaData[-c(1:8)])));
names(multiExpr[[1]]$data) = alphaData$substanceBXH;
rownames(multiExpr[[1]]$data) = names(alphaData)[-c(1:8)];
exprSize = checkSets(multiExpr)
exprSize
gsg = goodSamplesGenesMS(multiExpr, verbose = 3);
gsg$allOK
sampleTrees = list()
for (set in 1:nSets)
{
sampleTrees[[set]] = hclust(dist(multiExpr[[set]]$data), method = "average")
}
pdf(file = "Plots/SampleClustering.pdf", width = 12, height = 12);
par(mfrow=c(2,1))
par(mar = c(0, 4, 2, 0))
for (set in 1:nSets)
plot(sampleTrees[[set]], main = paste("Sample clustering on all genes in", setLabels[set]),
xlab="", sub="", cex = 0.7);
dev.off()
save(multiExpr,setLabels, shortLabels,
file = "Consensus-dataInput.RData")
enableWGCNAThreads()
lnames = load(file = "Consensus-dataInput.RData");
lnames
nSets = checkSets(multiExpr)$nSets
powers = c(seq(4,10,by=1), seq(12,20, by=2));
powerTables = vector(mode = "list", length = nSets);
for (set in 1:nSets)
powerTables[[set]] = list(data = pickSoftThreshold(multiExpr[[set]]$data, powerVector=powers,
verbose = 2)[[2]]);
collectGarbage();
colors = c("black", "red")
plotCols = c(2,5,6,7)
colNames = c("Scale Free Topology Model Fit", "Mean connectivity", "Median connectivity",
"Max connectivity");
ylim = matrix(NA, nrow = 2, ncol = 4);
for (set in 1:nSets)
{
for (col in 1:length(plotCols)){
ylim[1, col] = min(ylim[1, col], powerTables[[set]]$data[, plotCols[col]], na.rm = TRUE);
ylim[2, col] = max(ylim[2, col], powerTables[[set]]$data[, plotCols[col]], na.rm = TRUE);
}
}
sizeGrWindow(8, 6)
par(mfcol = c(2,2));
par(mar = c(4.2, 4.2 , 2.2, 0.5))
cex1 = 0.7;
for (col in 1:length(plotCols)) for (set in 1:nSets)
{
if (set==1)
{
plot(powerTables[[set]]$data[,1], -sign(powerTables[[set]]$data[,3])*powerTables[[set]]$data[,2],
xlab="Soft Threshold (power)",ylab=colNames[col],type="n", ylim = ylim[, col],
main = colNames[col]);
addGrid();
}
if (col==1)
{
text(powerTables[[set]]$data[,1], -sign(powerTables[[set]]$data[,3])*powerTables[[set]]$data[,2],
labels=powers,cex=cex1,col=colors[set]);
} else
text(powerTables[[set]]$data[,1], powerTables[[set]]$data[,plotCols[col]],
labels=powers,cex=cex1,col=colors[set]);
if (col==1)
{
legend("bottomright", legend = setLabels, col = colors, pch = 20) ;
} else
legend("topright", legend = setLabels, col = colors, pch = 20) ;
}
net = blockwiseConsensusModules(
multiExpr, power = 6, minModuleSize = 30, deepSplit = 2,
pamRespectsDendro = FALSE,
mergeCutHeight = 0.25, numericLabels = TRUE,
minKMEtoStay = 0,
saveTOMs = TRUE, verbose = 5)
which returns the error
Could not find a suitable move to improve the clustering. ..merging smaller clusters... ..Working on block 1 . ....Working on set 1 Error: REAL() can only be applied to a 'numeric', not a 'integer'
Any ideas on what might be causing this? Thanks!

Hey,
So as I had integer FPKM read-counts, I used R:
log2(myarray+1)
and then exported it back into a text file which looked like:
But the same error came up again as before:
Am I doing something wrong?
Thanks for your help!