Entering edit mode
How to download the count expression table for each sample analized using TCGAbiolinks package ?
#
this is the code that i've been working:
###------------------------------------------------------
library(TCGAbiolinks)
library(dplyr)
library(DT)
library(SummarizedExperiment)
library(plyr)
library(limma)
library(biomaRt)
## ================= Samples ====================
listSamples <- c("TCGA-BA-5152", "TCGA-CN-A49A", "TCGA-CQ-7069", "TCGA-P3-A5QF", "TCGA-P3-A6T6", "TCGA-QK-A6IH", "TCGA-CN-6013", "TCGA-CR-6472","TCGA-P3-A5QE", "TCGA-HD-8224", "TCGA-BB-7871", "TCGA-CQ-6220", "TCGA-CQ-7064", "TCGA-F7-A624", "TCGA-P3-A6T2", "TCGA-HD-A4C1", "TCGA-D6-A6EN", "TCGA-CQ-7068", "TCGA-CV-6953", "TCGA-CV-7407", "TCGA-CV-A463", "TCGA-KU-A66T", "TCGA-MT-A7BN", "TCGA-UF-A71E", "TCGA-UF-A7JO", "TCGA-UF-A7JT", "TCGA-CQ-A4C9", "TCGA-QK-A8Z7", "TCGA-CV-A6JD", "TCGA-CN-A642", "TCGA-D6-A6EO", "TCGA-CV-6948", "TCGA-BA-5558", "TCGA-QK-A64Z", "TCGA-CQ-7063", "TCGA-BB-A5HZ", "TCGA-CN-6018", "TCGA-CQ-7071", "TCGA-CQ-A4CD", "TCGA-CR-7380", "TCGA-CV-6942", "TCGA-CV-6955", "TCGA-CV-7252", "TCGA-CV-7416", "TCGA-CV-7425", "TCGA-CV-A45V", "TCGA-HD-A633", "TCGA-MT-A67F", "TCGA-P3-A6T3", "TCGA-RS-A6TO", "TCGA-BB-A5HU", "TCGA-CR-6484", "TCGA-CV-7428", "TCGA-CV-7095", "TCGA-CN-6994", "TCGA-CR-7379", "TCGA-CV-7090", "TCGA-CV-7253", "TCGA-CV-7409", "TCGA-CV-7413", "TCGA-BA-5557", "TCGA-BB-4224", "TCGA-BB-7863", "TCGA-C9-A47Z", "TCGA-C9-A480", "TCGA-CN-6996", "TCGA-CQ-5327", "TCGA-CQ-5329", "TCGA-CQ-6229", "TCGA-CQ-7065", "TCGA-CQ-A4CE", "TCGA-CQ-A4CH", "TCGA-CR-6488", "TCGA-CR-7382", "TCGA-CV-5973", "TCGA-CV-5979", "TCGA-CV-6003", "TCGA-CV-6939", "TCGA-CV-6959", "TCGA-CV-7104", "TCGA-CV-7238", "TCGA-CV-7243", "TCGA-CV-7255","TCGA-CV-7438", "TCGA-CV-A45P", "TCGA-CV-A465", "TCGA-CV-A6JT", "TCGA-CV-A6K0","TCGA-D6-6515", "TCGA-D6-A6EM", "TCGA-DQ-5624", "TCGA-HD-7831", "TCGA-HD-A6HZ", "TCGA-IQ-A61J", "TCGA-IQ-A6SG", "TCGA-MT-A67A", "TCGA-P3-A5QA", "TCGA-QK-A652", "TCGA-T2-A6WX", "TCGA-UP-A6WW", "TCGA-BA-A6DB", "TCGA-CN-4725", "TCGA-CN-4733", "TCGA-CN-4737", "TCGA-CR-7372", "TCGA-CR-7393", "TCGA-IQ-A61L", "TCGA-BA-6873", "TCGA-H7-A6C4", "TCGA-DQ-5630", "TCGA-CQ-6222", "TCGA-CX-7085", "TCGA-CR-7391", "TCGA-CN-6017", "TCGA-4P-AA8J", "TCGA-CQ-7067", "TCGA-CV-7236")
query.exp <- GDCquery(project = "TCGA-HNSC",
legacy = TRUE,
data.category = "Gene expression",
data.type = "Gene expression quantification",
platform = "Illumina HiSeq",
file.type = "results",
barcode = listSamples,
experimental.strategy = "RNA-Seq",
sample.type = c("Primary Tumor","Solid Tissue Normal"))
GDCdownload(query.exp)
HNSC.exp <- GDCprepare(query = query.exp, save = TRUE,
save.filename = "HNSC_selectedExp.rda")
# get subtype information
dataSubt <- TCGAquery_subtype(tumor = "HNSC")
# get clinical data
dataClin <- GDCquery_clinic(project = "TCGA-HNSC","clinical")
# Which samples are Primary Tumor
dataSmTP <- TCGAquery_SampleTypes(getResults(query.exp,cols="cases"),"TP")
# which samples are solid tissue normal
dataSmNT <- TCGAquery_SampleTypes(getResults(query.exp,cols="cases"),"NT")
dataPrep <-TCGAanalyze_Preprocessing(object = non_habits_HNSC.exp, cor.cut = 0.6)
dataNorm <- TCGAanalyze_Normalization(tabDF = dataPrep,
geneInfo = geneInfo,
method = "gcContent")
#filtrando os dados:
dataFilt <- TCGAanalyze_Filtering(tabDF = dataNorm,
method = "quantile",
qnt.cut = 0.25)
######
dataDEGs <- TCGAanalyze_DEA(mat1 = dataFilt[,dataSmNT],
mat2 = dataFilt[,dataSmTP],
Cond1type = "Normal",
Cond2type = "Tumor",
fdr.cut = 0.01 ,
logFC.cut = 1,
method = "glmLRT")
write.table(dataDEGs, "non_habits_HNSC_selected.txt", sep="\t")
TCGAVisualize_volcano(x = dataDEGs$logFC,
y = dataDEGs$FDR,
filename = "HNSCselected_volcanoexp.png",
x.cut = 6,
y.cut = 10^-5,
names = rownames(dataDEGs),
color = c("black","red","darkgreen"),
names.size = 2,
xlab = " Gene expression fold change (Log2)",
legend = "State",
title = "Volcano plot (CIMP-high vs CIMP-low)",
width = 10)
**************************************************************************************
With This code i colected the DE spreadsheet. But i'm need to have the counts or logFC from each samples that i used.
Can anyone help me ?