Hello,
I perform CNV analysis using TCGA data and the gaia package. I have followed the reference manual of gaia, compared thoroughly the format of my inputs, but I still do not get any results back. The run_gaia function runs without any problem, but it always returns an empty data frame.
Here are some info about the data I am using.
str(cnvMatrix) 'data.frame': 28270 obs. of 6 variables: $ Chromosome : int 1 1 1 1 1 1 1 1 1 1 ... $ Start : int 25266637 40881535 48892229 72290430 94665210 103697067 109690415 112153343 152583230 193187149 ... $ End : int 25318693 40908771 49360721 72343068 94669551 103717410 109697556 112154591 152614118 193187218 ... $ Num.of.Markers: int 19 16 232 49 16 3 12 22 35 4 ... $ Sample.Name : chr "TCGA-AA-A02R-01A-01D-A008-01" "TCGA-AA-A02R-01A-01D-A008-01" "TCGA-AA-A02R-01A-01D-A008-01" "TCGA-AA-A02R-01A-01D-A008-01" ... $ Aberration : int 1 1 0 1 1 1 0 0 0 1 ...
> str(markersMatrix) 'data.frame': 1874147 obs. of 3 variables: $ Probe.Name: int 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 ... $ Chromosome: int 1 1 1 1 1 1 1 1 1 1 ... $ Start : int 99897690 99904180 99942312 99973037 99984524 99990722 100006891 100027241 100031071 100083855 ...
The problem, and most probably the reason of why I do not get back results, seems to be the matrix returned by load_cnv(cnvMatrix, markers_obj, nbsamples)
where the observations of each aberration (in my case 0s and 1s) in each chromosome is provided three times.
I cannot see why I get this result back. Hope someone can help me.
Thank you in advance.
R.
Having the same problem here!
library(TCGAbiolinks)
library(GAIA)
project = "TCGA-PRAD"
data.category = "Copy Number Variation"
data.type = "Masked Copy Number Segment"
legacy = FALSE
sample.type = c("Primary solid Tumor")
datQuery <- GDCquery(project = project, data.category = data.category,data.type = data.type,legacy=legacy,sample.type = sample.type )
GDCdownload(datQuery)
prad<- GDCprepare(datQuery,save = TRUE,save.filename = "prad.cnv.hg38.rda",summarizedExperiment = TRUE)
## Marker Descriptor Matrix
#https://gdc.cancer.gov/about-data/data-harmonization-and-generation/gdc-reference-files
url<- "https://gdc.cancer.gov/files/public/file/snp6.na35.liftoverhg38.txt.zip"
temp <- tempfile()
download.file(url = url, temp)
unzip(temp)
probes_metadata<- read.csv("snp6.na35.liftoverhg38.txt", sep = "\t",as.is = TRUE)
probes_metadata=probes_metadata[probes_metadata[,"freqcnv"]==FALSE,]
colnames(probes_metadata)[1:3] <- c("Probe.Name", "Chromosome", "Start")
probes_metadata[probes_metadata$Chromosome == "X","Chromosome"] <- 23
probes_metadata[probes_metadata$Chromosome == "Y","Chromosome"] <- 24
probes_metadata$Chromosome <- as.integer(probes_metadata$Chromosome)
markerID <- paste(probes_metadata$Chromosome,probes_metadata$Start, sep = ":")
# Removed duplicates
probes_metadata <- probes_metadata[!duplicated(markerID),]
# Filter probes_metadata for common CNV
markerID <- paste(probes_metadata$Chromosome,probes_metadata$Start, sep = ":")
markers_obj <- load_markers(probes_metadata)
## Aberrant Region Descriptor Matrix
load("prad.cnv.hg38.rda")
synthCNV_Matrix<- data
synthCNV_Matrix <- cbind(synthCNV_Matrix,Label=NA)
synthCNV_Matrix[synthCNV_Matrix[,"Segment_Mean"] < -0.2,"Label"] <- 0
synthCNV_Matrix[synthCNV_Matrix[,"Segment_Mean"] > 0.2,"Label"] <- 1
synthCNV_Matrix <- synthCNV_Matrix[!is.na(synthCNV_Matrix$Label),]
synthCNV_Matrix<- synthCNV_Matrix[,c(7,2,3,4,5,8)]
colnames(synthCNV_Matrix)<- c("Sample.Name", "Chromosome", "Start", "End", "Num.of.Markers", "Aberration")
#Replace x and y chromosome names
xidx <- which(synthCNV_Matrix$Chromosome=="X")
yidx <- which(synthCNV_Matrix$Chromosome=="Y")
synthCNV_Matrix[xidx,"Chromosome"] <- 23
synthCNV_Matrix[yidx,"Chromosome"] <- 24
synthCNV_Matrix$Chromosome <- sapply(synthCNV_Matrix$Chromosome,as.integer)
cnv_obj<- load_cnv(synthCNV_Matrix, markers_obj, length(selected))
#Loading Copy Number Data