Help With TCGAbiolinks package
1
0
Entering edit mode
DANIELA • 0
@cc33d309
Last seen 2.7 years ago
Brazil

How to download the count expression table for each sample analized using TCGAbiolinks package ?

#
this is the code that i've been working:

###------------------------------------------------------

library(TCGAbiolinks)
library(dplyr)
library(DT)
library(SummarizedExperiment)
library(plyr)
library(limma)
library(biomaRt)


##  =================    Samples   ====================

listSamples <- c("TCGA-BA-5152", "TCGA-CN-A49A", "TCGA-CQ-7069",   "TCGA-P3-A5QF", "TCGA-P3-A6T6", "TCGA-QK-A6IH", "TCGA-CN-6013", "TCGA-CR-6472","TCGA-P3-A5QE", "TCGA-HD-8224", "TCGA-BB-7871", "TCGA-CQ-6220", "TCGA-CQ-7064",  "TCGA-F7-A624", "TCGA-P3-A6T2", "TCGA-HD-A4C1", "TCGA-D6-A6EN", "TCGA-CQ-7068", "TCGA-CV-6953", "TCGA-CV-7407", "TCGA-CV-A463", "TCGA-KU-A66T", "TCGA-MT-A7BN",  "TCGA-UF-A71E", "TCGA-UF-A7JO", "TCGA-UF-A7JT", "TCGA-CQ-A4C9", "TCGA-QK-A8Z7",  "TCGA-CV-A6JD", "TCGA-CN-A642", "TCGA-D6-A6EO", "TCGA-CV-6948", "TCGA-BA-5558",    "TCGA-QK-A64Z", "TCGA-CQ-7063", "TCGA-BB-A5HZ", "TCGA-CN-6018", "TCGA-CQ-7071",   "TCGA-CQ-A4CD", "TCGA-CR-7380", "TCGA-CV-6942", "TCGA-CV-6955", "TCGA-CV-7252",   "TCGA-CV-7416", "TCGA-CV-7425", "TCGA-CV-A45V", "TCGA-HD-A633", "TCGA-MT-A67F",   "TCGA-P3-A6T3", "TCGA-RS-A6TO", "TCGA-BB-A5HU", "TCGA-CR-6484", "TCGA-CV-7428",    "TCGA-CV-7095", "TCGA-CN-6994", "TCGA-CR-7379", "TCGA-CV-7090", "TCGA-CV-7253",  "TCGA-CV-7409", "TCGA-CV-7413", "TCGA-BA-5557", "TCGA-BB-4224", "TCGA-BB-7863",  "TCGA-C9-A47Z", "TCGA-C9-A480", "TCGA-CN-6996", "TCGA-CQ-5327", "TCGA-CQ-5329",  "TCGA-CQ-6229", "TCGA-CQ-7065", "TCGA-CQ-A4CE", "TCGA-CQ-A4CH", "TCGA-CR-6488", "TCGA-CR-7382", "TCGA-CV-5973", "TCGA-CV-5979", "TCGA-CV-6003", "TCGA-CV-6939",  "TCGA-CV-6959", "TCGA-CV-7104", "TCGA-CV-7238", "TCGA-CV-7243", "TCGA-CV-7255","TCGA-CV-7438", "TCGA-CV-A45P", "TCGA-CV-A465", "TCGA-CV-A6JT", "TCGA-CV-A6K0","TCGA-D6-6515", "TCGA-D6-A6EM", "TCGA-DQ-5624", "TCGA-HD-7831", "TCGA-HD-A6HZ", "TCGA-IQ-A61J", "TCGA-IQ-A6SG", "TCGA-MT-A67A", "TCGA-P3-A5QA", "TCGA-QK-A652", "TCGA-T2-A6WX", "TCGA-UP-A6WW", "TCGA-BA-A6DB", "TCGA-CN-4725", "TCGA-CN-4733", "TCGA-CN-4737", "TCGA-CR-7372", "TCGA-CR-7393", "TCGA-IQ-A61L", "TCGA-BA-6873",    "TCGA-H7-A6C4", "TCGA-DQ-5630", "TCGA-CQ-6222", "TCGA-CX-7085", "TCGA-CR-7391",  "TCGA-CN-6017", "TCGA-4P-AA8J", "TCGA-CQ-7067", "TCGA-CV-7236")

query.exp <- GDCquery(project = "TCGA-HNSC", 
                      legacy = TRUE,
                      data.category = "Gene expression",
                      data.type = "Gene expression quantification",
                      platform = "Illumina HiSeq", 
                      file.type = "results",
                      barcode = listSamples,
                      experimental.strategy = "RNA-Seq",
                      sample.type = c("Primary Tumor","Solid Tissue Normal"))
GDCdownload(query.exp)

HNSC.exp <- GDCprepare(query = query.exp, save = TRUE,
                                  save.filename = "HNSC_selectedExp.rda")

# get subtype information 
dataSubt <- TCGAquery_subtype(tumor = "HNSC")

# get clinical data 
dataClin <- GDCquery_clinic(project = "TCGA-HNSC","clinical") 


# Which samples are Primary Tumor
dataSmTP <- TCGAquery_SampleTypes(getResults(query.exp,cols="cases"),"TP") 

# which samples are solid tissue normal
dataSmNT <- TCGAquery_SampleTypes(getResults(query.exp,cols="cases"),"NT")

dataPrep <-TCGAanalyze_Preprocessing(object = non_habits_HNSC.exp, cor.cut = 0.6)                      

dataNorm <- TCGAanalyze_Normalization(tabDF = dataPrep,
                                      geneInfo = geneInfo,
                                      method = "gcContent")                
#filtrando os dados:
dataFilt <- TCGAanalyze_Filtering(tabDF = dataNorm,
                                  method = "quantile", 
                                  qnt.cut =  0.25)   

######    
dataDEGs <- TCGAanalyze_DEA(mat1 = dataFilt[,dataSmNT],
                            mat2 = dataFilt[,dataSmTP],
                            Cond1type = "Normal",
                            Cond2type = "Tumor",
                            fdr.cut = 0.01 ,
                            logFC.cut = 1,
                            method = "glmLRT")  

write.table(dataDEGs, "non_habits_HNSC_selected.txt", sep="\t")

TCGAVisualize_volcano(x = dataDEGs$logFC,
                      y = dataDEGs$FDR,
                      filename = "HNSCselected_volcanoexp.png",
                      x.cut = 6,
                      y.cut = 10^-5,
                      names = rownames(dataDEGs),
                      color = c("black","red","darkgreen"),
                      names.size = 2,
                      xlab = " Gene expression fold change (Log2)",
                      legend = "State",
                      title = "Volcano plot (CIMP-high vs CIMP-low)",
                      width = 10)

**************************************************************************************

With This code i colected the DE spreadsheet. But i'm need to have the counts or logFC from each samples that i used.

Can anyone help me ?

TCGAbiolinks data normalize l • 1.1k views
ADD COMMENT
0
Entering edit mode
Kevin Blighe ★ 3.9k
@kevin
Last seen 1 day ago
Republic of Ireland

Hi Daniela,

The raw counts should be contained in the HGNC.exp object under the 'raw_count' assay:

HNSC.exp

class: RangedSummarizedExperiment 
dim: 19616 126 
metadata(1): data_release
assays(2): raw_count scaled_estimate
rownames(19616): A1BG|1 A2M|2 ... TMED7-TICAM2|100302736
  LOC100303728|100303728
rowData names(4): gene_id entrezgene ensembl_gene_id
  transcript_id.transcript_id_TCGA-BA-5152-01A-02R-1873-07
colnames(126): TCGA-BA-5152-01A-02R-1873-07
  TCGA-CN-A49A-01A-11R-A24H-07 ... TCGA-CQ-7067-01A-11R-2232-07
  TCGA-CV-7236-01A-11R-2016-07
colData names(77): barcode patient ... paper_Copy.Number paper_PARADIGM

library(SummarizedExperiment)
assays(HNSC.exp)$raw_count[1:5,1:5]

            TCGA-BA-5152-01A-02R-1873-07 TCGA-CN-A49A-01A-11R-A24H-07
A1BG|1                            119.00                       263.07
A2M|2                           12633.91                      7971.87
NAT1|9                            454.00                       156.00
NAT2|10                             2.00                         0.00
SERPINA3|12                       116.00                      7727.00
            TCGA-CQ-7069-01A-11R-2403-07 TCGA-P3-A5QF-01A-11R-A28V-07
A1BG|1                             68.00                        84.81
A2M|2                            7144.99                      3181.88
NAT1|9                            107.00                       326.00
NAT2|10                             0.00                         7.00
SERPINA3|12                      7595.00                        49.00
            TCGA-P3-A6T6-01A-11R-A34R-07
A1BG|1                             45.00
A2M|2                            2528.83
NAT1|9                            104.00
NAT2|10                             1.00
SERPINA3|12                       765.00

We can also access TPM values via the 'scaled_estimate' assay:

assays(HNSC.exp)$scaled_estimate[1:5,1:5]

            TCGA-BA-5152-01A-02R-1873-07 TCGA-CN-A49A-01A-11R-A24H-07
A1BG|1                      1.197571e-06                 5.651699e-06
A2M|2                       9.151945e-05                 1.324122e-04
NAT1|9                      4.359858e-06                 3.132209e-06
NAT2|10                     2.919127e-08                 0.000000e+00
SERPINA3|12                 1.360617e-06                 1.984193e-04
            TCGA-CQ-7069-01A-11R-2403-07 TCGA-P3-A5QF-01A-11R-A28V-07
A1BG|1                      2.146949e-06                 1.460906e-06
A2M|2                       1.525101e-04                 5.466426e-05
NAT1|9                      3.235555e-06                 5.443233e-06
NAT2|10                     0.000000e+00                 1.846216e-07
SERPINA3|12                 2.741503e-04                 1.054492e-06
            TCGA-P3-A6T6-01A-11R-A34R-07
A1BG|1                      1.315608e-06
A2M|2                       6.816046e-05
NAT1|9                      2.955437e-06
NAT2|10                     4.365187e-08
SERPINA3|12                 2.694737e-05

Kevin

ADD COMMENT

Login before adding your answer.

Traffic: 772 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6