Entering edit mode
Hello everyone,
Im trying to find columns called 'eventTime' and 'eventObserved' in my TCGA-GBM dataset with RNA-Seq data and clinical data. already called function colnames(colData(tcga_data))
but unfortunately no luck with that.
Does anyone know how to do it?
library(pathwayPCA)
library(TCGAbiolinks)
library(tidyverse)
library(SummarizedExperiment)
#donwload data
TCGAbiolinks:::getProjectSummary('TCGA-GBM')
#redo our query
query_TCGA = GDCquery(
project = "TCGA-GBM",
data.category = "Transcriptome Profiling", # parameter enforced by GDCquery
experimental.strategy = "RNA-Seq",
data.type = 'Gene Expression Quantification',
workflow.type = "STAR - Counts",
sample.type = c("Primary Tumor",'Recurrent Tumor', 'Solid Tissue Normal'))
prad_res_TCGA = getResults(query_TCGA)
GDCdownload(query = query_TCGA)
tcga_data = GDCprepare(query_TCGA)
dim(tcga_data)
colnames(colData(tcga_data))
head(assay(tcga_data)[,1:10])
table(tcga_data@colData$race)
new_data = assay(tcga_data)
#transpose new_data
new_data2 =t(new_data)
#lets choose caterogical variable
y = data.frame(tcga_data$gender, tcga_data$race,tcga_data$sample_id, tcga_data$days_to_last_follow_up, tcga_data$vital_status, tcga_data$age_at_index)
new_frame = cbind(new_data2, y)
#Changing male to 0 and female to 1
new_frame$tcga_data.gender <-ifelse(new_frame$tcga_data.gender=="female",1,0)
head(new_frame$tcga_data.gender)
#Changing Dead to 0 and Alive to 1
new_frame$tcga_data.vital_status <-ifelse(new_frame$tcga_data.vital_status=="Alive",1,0)
#Converting race from character to factor
new_frame$tcga_data.race <- as.factor(new_frame$tcga_data.race)
new_frame$tcga_data.gender <- as.factor(new_frame$tcga_data.gender)
new_frame$tcga_data.vital_status <- as.factor(new_frame$tcga_data.vital_status)
#Removing all NA values in our data frame
frame2 = new_frame[complete.cases(new_frame), ]
write.csv(frame2, '/Users/mikolajmierzejewski/Library/Mobile Documents/com~apple~CloudDocs/Magisterka_studia/Magisterka_3_semestr/Statistics_and_bioinformatisc_with_R/Report2/GBM.csv')
#pathway PCA
cp_pathwayCollection <- read_gmt('c2.all.v2022.1.Hs.entrez.gmt', description = TRUE)
head(cp_pathwayCollection)
assay_df = read.csv('GBM.csv')
assay_df
#remove NA values and rows that include them
assay_df = assay_df[complete.cases(assay_df), ]
#remove duplicates
assay_df = assay_df[!duplicated(assay_df$tcga_data.sample_id), ]
GBM_OmicsSurv = CreateOmics(
assayData_df = assay_df[,(1:60660)],
pathwayCollection_ls = cp_pathwayCollection,
response = assay_df[,60663:60665],
respType = 'surv'
)