hello, I'm trying to extract the hichip fragments that interacts with my chip peak, So I did an overlapping between my chip peak and hichip(annotated hichip). The hichip are composed from 2 fragments as they identify the interactions between 2 long genomic ranges. Now the overlapping gives me in subjectHits the fragments that overlaps with my chip peak, that could be either frg1 or frg2, and so I want to see if its frg1 what's the frg2 that it interacts with it and if its frg2 what is the frg1 that it interacts with it , this is what I did so far, but im having difficulties to get these interactions. Does anyone have any idea? I thought about getting the Ids of fragments and find it in frg1 and frg2 object but it didn't help.
load("genomic_objects_hg38 (1).RData")
# Load HiChIP data
hichip_file <- ".../GSE222206_HiChIP_Mock.bedpe.gz"
hichip <- read.delim(hichip_file, header = FALSE)
colnames(hichip)[1:6] <- c("frg1", "start1", "end1", "frg2", "start2", "end2")
# Convert to GRanges
frg1 <- GRanges(seqnames = Rle(hichip$frg1), ranges = IRanges(start = hichip$start1, end = hichip$end1))
frg2 <- GRanges(seqnames = Rle(hichip$frg2), ranges = IRanges(start = hichip$start2, end = hichip$end2))
hichip <- c(frg1, frg2)
hichip <- unique(hichip)
# hichip annotation
# Initialize columns for multiple annotations
mcols(hichip)$all_genes <- character(length(hichip))
mcols(hichip)$TSS <- character(length(hichip))
mcols(hichip)$promoter <- character(length(hichip))
mcols(hichip)$all_genes_status <- character(length(hichip))
mcols(hichip)$TSS_status <- character(length(hichip))
mcols(hichip)$promoter_status <- character(length(hichip))
mcols(hichip)$genomic_location <- character(length(hichip))
# Overlap with all genes
ovl1 <- findOverlaps(hichip, all.genes)
ovl1_df <- as.data.frame(ovl1)
all_genes_by_fragment <- tapply(all.genes$SYMBOL[ovl1_df$subjectHits], ovl1_df$queryHits, paste, collapse = ", ")
all_genes_status_by_fragment <- tapply(all.genes$in.microarray[ovl1_df$subjectHits], ovl1_df$queryHits, paste, collapse = ", ")
mcols(hichip)$all_genes[as.numeric(names(all_genes_by_fragment))] <- all_genes_by_fragment # ou ovl_1_df$queryhits
mcols(hichip)$all_genes_status[as.numeric(names(all_genes_status_by_fragment))] <- all_genes_status_by_fragment
# Overlap with TSS
ovl2 <- findOverlaps(hichip, TSS)
ovl2_df <- as.data.frame(ovl2)
tss_by_fragment <- tapply(TSS$SYMBOL[ovl2_df$subjectHits], ovl2_df$queryHits, paste, collapse = ", ")
tss_status_by_fragment <- tapply(TSS$in.microarray[ovl2_df$subjectHits], ovl2_df$queryHits, paste, collapse = ", ")
mcols(hichip)$TSS[as.numeric(names(tss_by_fragment))] <- tss_by_fragment
mcols(hichip)$TSS_status[as.numeric(names(tss_status_by_fragment))] <- tss_status_by_fragment
# Overlap with promoters
ovl3 <- findOverlaps(hichip, prom)
ovl3_df <- as.data.frame(ovl3)
promoters_by_fragment <- tapply(prom$SYMBOL[ovl3_df$subjectHits], ovl3_df$queryHits, paste, collapse = ", ")
promoters_status_by_fragment <- tapply(prom$in.microarray[ovl3_df$subjectHits], ovl3_df$queryHits, paste, collapse = ", ")
mcols(hichip)$promoter[as.numeric(names(promoters_by_fragment))] <- promoters_by_fragment
mcols(hichip)$promoter_status[as.numeric(names(promoters_status_by_fragment))] <- promoters_status_by_fragment
# Summary column
mcols(hichip)$summary <- ifelse(
mcols(hichip)$TSS != "",
mcols(hichip)$TSS,
ifelse(
mcols(hichip)$promoter != "",
mcols(hichip)$promoter,
ifelse(
mcols(hichip)$all_genes != "",
mcols(hichip)$all_genes,
"intergenic"
)
)
)
# Summary status column
mcols(hichip)$summary_status <- ifelse(
mcols(hichip)$TSS != "",
mcols(hichip)$TSS_status,
ifelse(
mcols(hichip)$promoter != "",
mcols(hichip)$promoter_status,
ifelse(
mcols(hichip)$all_genes != "",
mcols(hichip)$all_genes_status,
NA
)
)
)
mcols(hichip)$genomic_location <- ifelse(
!is.na(mcols(hichip)$TSS) & mcols(hichip)$TSS != "",
"TSS",
ifelse(
!is.na(mcols(hichip)$promoter) & mcols(hichip)$promoter != "",
"promoter",
ifelse(
!is.na(mcols(hichip)$all_genes) & mcols(hichip)$all_genes != "",
"gene_body",
"intergenic"
)
)
)
chipseq_file <- ".../S1_Invi_fdr.bed"
S1 <- read.delim(chipseq_file, header = FALSE)
colnames(S1)[1:3] <- c("seqnames", "start", "end")
S1 <- GRanges(seqnames = Rle(S1$seqnames), ranges = IRanges(start = S1$start, end = S1$end))
ovl_s1 <- findOverlaps(S1, hichip)
ovl_s1_df <- as.data.frame(ovl_s1)
#IDs from overlaps
query_hits <- ovl_s1_df$queryHits
subject_hits <- ovl_s1_df$subjectHits
fragments <- hichip[subject_hits]
peaks <- hichip[query_hits]
sessionInfo()
R version 4.1.1 (2021-08-10)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 20.04.6 LTS
Matrix products: default
BLAS/LAPACK: /shared/ifbstor1/software/miniconda/envs/r-4.1.1/lib/libopenblasp-r0.3.18.so
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8
[6] LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] grid stats4 stats graphics grDevices utils datasets methods base
other attached packages:
[1] data.table_1.14.2 reshape2_1.4.4 TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
[4] ChIPseeker_1.30.3 ChIPpeakAnno_3.28.0 msigdbr_7.4.1
[7] ggupset_0.3.0 DOSE_3.20.1 RColorBrewer_1.1-3
[10] fgsea_1.20.0 org.Hs.eg.db_3.14.0 enrichplot_1.14.2
[13] clusterProfiler_4.2.2 pheatmap_1.0.12 VennDiagram_1.7.3
[16] futile.logger_1.4.3 enrichR_3.2 rtracklayer_1.54.0
[19] TxDb.Hsapiens.UCSC.hg38.knownGene_3.14.0 GenomicFeatures_1.46.1 AnnotationDbi_1.56.1
[22] DiffBind_3.4.11 BiocParallel_1.28.3 BiocManager_1.30.23
[25] openxlsx_4.2.6.1 readxl_1.4.3 EnhancedVolcano_1.13.2
[28] ggrepel_0.9.5 magrittr_2.0.3 lubridate_1.9.3
[31] forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
[34] purrr_1.0.2 readr_2.1.5 tidyr_1.3.1
[37] tibble_3.2.1 tidyverse_2.0.0 DESeq2_1.34.0
[40] SummarizedExperiment_1.24.0 MatrixGenerics_1.6.0 matrixStats_0.62.0
[43] GenomicRanges_1.46.1 GenomeInfoDb_1.30.1 IRanges_2.28.0
[46] S4Vectors_0.32.4 Biobase_2.54.0 BiocGenerics_0.40.0
[49] limma_3.50.3 ggplot2_3.5.1
loaded via a namespace (and not attached):
[1] utf8_1.2.2 tidyselect_1.2.1 RSQLite_2.2.8 htmlwidgets_1.5.4 scatterpie_0.1.7
[6] munsell_0.5.0 systemPipeR_2.0.4 withr_2.5.0 colorspace_2.0-3 GOSemSim_2.20.0
[11] filelock_1.0.2 rstudioapi_0.16.0 bbmle_1.0.24 GenomeInfoDbData_1.2.7 mixsqp_0.3-43
[16] hwriter_1.3.2 polyclip_1.10-0 bit64_4.0.5 farver_2.1.1 downloader_0.4
[21] treeio_1.18.1 coda_0.19-4 vctrs_0.6.5 generics_0.1.3 lambda.r_1.2.4
[26] timechange_0.3.0 BiocFileCache_2.2.0 regioneR_1.26.0 R6_2.5.1 apeglm_1.16.0
[31] graphlayouts_0.8.0 invgamma_1.1 locfit_1.5-9.4 AnnotationFilter_1.18.0 bitops_1.0-7
[36] cachem_1.0.6 gridGraphics_0.5-1 DelayedArray_0.20.0 assertthat_0.2.1 BiocIO_1.4.0
[41] scales_1.3.0 ggraph_2.0.5 gtable_0.3.1 ensembldb_2.18.2 tidygraph_1.2.0
[46] rlang_1.1.4 genefilter_1.76.0 splines_4.1.1 lazyeval_0.2.2 yaml_2.3.6
[51] qvalue_2.26.0 RBGL_1.70.0 tools_4.1.1 ggplotify_0.1.0 gplots_3.1.3.1
[56] Rcpp_1.0.9 plyr_1.8.7 progress_1.2.2 zlibbioc_1.40.0 RCurl_1.98-1.9
[61] prettyunits_1.1.1 viridis_0.6.2 ashr_2.2-47 futile.options_1.0.1 DO.db_2.9
[66] truncnorm_1.0-8 mvtnorm_1.1-3 SQUAREM_2021.1 amap_0.8-19 ProtGenerics_1.26.0
[71] patchwork_1.1.1 hms_1.1.3 xtable_1.8-4 XML_3.99-0.9 emdbook_1.3.12
[76] jpeg_0.1-9 gridExtra_2.3 compiler_4.1.1 biomaRt_2.50.0 bdsmatrix_1.3-7
[81] shadowtext_0.0.9 KernSmooth_2.23-20 crayon_1.5.2 htmltools_0.5.2 ggfun_0.0.4
[86] tzdb_0.2.0 geneplotter_1.72.0 aplot_0.1.1 DBI_1.1.2 tweenr_1.0.2
[91] formatR_1.11 WriteXLS_6.7.0 dbplyr_2.1.1 MASS_7.3-54 rappdirs_0.3.3
[96] boot_1.3-28 babelgene_21.4 ShortRead_1.52.0 Matrix_1.3-4 cli_3.6.3
[101] parallel_4.1.1 igraph_2.0.3 pkgconfig_2.0.3 GenomicAlignments_1.30.0 numDeriv_2016.8-1.1
[106] InteractionSet_1.22.0 xml2_1.3.3 ggtree_3.2.1 annotate_1.72.0 multtest_2.50.0
[111] XVector_0.34.0 yulab.utils_0.0.4 digest_0.6.30 graph_1.72.0 Biostrings_2.62.0
[116] cellranger_1.1.0 fastmatch_1.1-3 tidytree_0.3.6 restfulr_0.0.13 GreyListChIP_1.26.0
[121] curl_4.3.3 Rsamtools_2.10.0 gtools_3.9.3 rjson_0.2.20 jsonlite_1.8.8
[126] nlme_3.1-153 lifecycle_1.0.3 viridisLite_0.4.1 BSgenome_1.62.0 fansi_1.0.3
[131] pillar_1.9.0 lattice_0.20-45 plotrix_3.8-2 KEGGREST_1.34.0 fastmap_1.1.0
[136] httr_1.4.4 survival_3.2-13 GO.db_3.14.0 glue_1.6.2 zip_2.2.0
[141] png_0.1-7 bit_4.0.4 ggforce_0.3.3 stringi_1.7.8 blob_1.2.2
[146] latticeExtra_0.6-29 caTools_1.18.2 memoise_2.0.1 ape_5.5 irlba_2.3.5.1