# From Bioconductor
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("gVenn")ChIP-seq analysis workflow with gVenn using A549 datasets: identifying binding site overlaps
Supplementary data S1
Last updated: 11 February 2026
1 Overview
This supplementary material demonstrates a complete ChIP-seq analysis workflow using the gVenn R package with A549 cell line data. The workflow includes:
- Loading ChIP-seq peak data
- Computing overlaps between genomic regions
- Visualizing results with Venn diagrams and UpSet plots
- Extracting specific overlap groups for downstream analysis
- Exporting results
2 Installation
The gVenn package is available through Bioconductor and GitHub.
You can install it from Bioconductor using:
To install the development version from GitHub, use:
# install.packages("pak") # if not already installed
pak::pak("ckntav/gVenn")
# or, alternatively:
# install.packages("devtools") # if not already installed
devtools::install_github("ckntav/gVenn")3 Load required packages
library(gVenn)
library(GenomicRanges)
library(knitr)
library(tidyverse)
library(kableExtra)3.1 Checking gVenn version
packageVersion("gVenn")[1] '1.1.1'
4 Dataset description
The dataset consists of ChIP-seq peaks from A549 cells (lung adenocarcinoma cell line) after dexamethasone treatment for two transcriptional coregulators and a transcription factor:
- MED1: Mediator complex subunit 1
- BRD4: Bromodomain-containing protein 4
- GR: Glucocorticoid receptor
These data represent binding sites identified from ChIP-seq experiments. To keep the dataset small and suitable for examples and tests, each set has been restricted to peaks located on chromosome 7.
The data originate from Tav et al. (2023) (doi:10.3389/fgene.2023.1237092).
5 Load ChIP-seq data
The gVenn package includes example ChIP-seq peak data for the A549 cell line.
5.1 Locate data files
First, we locate the BED files included in the package:
# Locate the gzipped BED files inside inst/extdata/ of the installed package
bed_med1 <- system.file("extdata", "A549_MED1_Dex.stdchr.bed.gz", package = "gVenn")
bed_brd4 <- system.file("extdata", "A549_BRD4_Dex.stdchr.bed.gz", package = "gVenn")
bed_gr <- system.file("extdata", "A549_GR_Dex.stdchr.bed.gz", package = "gVenn")5.2 Import peak data
Next, we import the peak sets using rtracklayer:
# Import full consensus peak sets
med1_full <- rtracklayer::import(bed_med1)
brd4_full <- rtracklayer::import(bed_brd4)
gr_full <- rtracklayer::import(bed_gr)5.3 Filter to chromosome 7
To keep the dataset small and suitable for examples, we restrict the analysis to peaks on chromosome 7:
# Helper function: keep only peaks on chr7
keep_chr7 <- function(gr) {
gr[seqnames(gr) %in% c("chr7", "7")]
}
# Subset each peak set to chr7 only (keeps the dataset small and fast for examples)
med1_chr7 <- keep_chr7(med1_full)
brd4_chr7 <- keep_chr7(brd4_full)
gr_chr7 <- keep_chr7(gr_full)5.4 Create GRangesList object
Finally, we combine the filtered peak sets into a single GRangesList object:
# Combine into a GRangesList with descriptive names
a549_chipseq_peaks <- GRangesList("MED1_Dex_chr7" = med1_chr7,
"BRD4_Dex_chr7" = brd4_chr7,
"GR_Dex_chr7" = gr_chr7)
# Display the structure of the dataset
a549_chipseq_peaksGRangesList object of length 3:
$MED1_Dex_chr7
GRanges object with 336 ranges and 2 metadata columns:
seqnames ranges strand | name score
<Rle> <IRanges> <Rle> | <character> <numeric>
[1] chr7 1157024-1157513 * | 4997 0
[2] chr7 1520389-1521218 * | 4998 0
[3] chr7 1536927-1537642 * | 4999 0
[4] chr7 2309837-2310506 * | 5000 0
[5] chr7 3028013-3028396 * | 5001 0
... ... ... ... . ... ...
[332] chr7 158733134-158733544 * | 5328 0
[333] chr7 158818327-158819201 * | 5329 0
[334] chr7 158821150-158821448 * | 5330 0
[335] chr7 158863388-158864513 * | 5331 0
[336] chr7 159015348-159016094 * | 5332 0
-------
seqinfo: 24 sequences from an unspecified genome; no seqlengths
$BRD4_Dex_chr7
GRanges object with 604 ranges and 2 metadata columns:
seqnames ranges strand | name score
<Rle> <IRanges> <Rle> | <character> <numeric>
[1] chr7 234690-235402 * | 9419 0
[2] chr7 538240-538633 * | 9420 0
[3] chr7 1156721-1157555 * | 9421 0
[4] chr7 1504294-1504733 * | 9422 0
[5] chr7 1506830-1507301 * | 9423 0
... ... ... ... . ... ...
[600] chr7 158829343-158830028 * | 10018 0
[601] chr7 158856251-158856723 * | 10019 0
[602] chr7 158863108-158864616 * | 10020 0
[603] chr7 159012435-159013222 * | 10021 0
[604] chr7 159015311-159016245 * | 10022 0
-------
seqinfo: 24 sequences from an unspecified genome; no seqlengths
$GR_Dex_chr7
GRanges object with 450 ranges and 2 metadata columns:
seqnames ranges strand | name score
<Rle> <IRanges> <Rle> | <character> <numeric>
[1] chr7 729847-730122 * | 6571 0
[2] chr7 1156806-1157495 * | 6572 0
[3] chr7 1520508-1521044 * | 6573 0
[4] chr7 2309959-2310483 * | 6574 0
[5] chr7 2860620-2860960 * | 6575 0
... ... ... ... . ... ...
[446] chr7 158733144-158733534 * | 7016 0
[447] chr7 158818350-158819168 * | 7017 0
[448] chr7 158821076-158821582 * | 7018 0
[449] chr7 158863549-158864364 * | 7019 0
[450] chr7 159015407-159016007 * | 7020 0
-------
seqinfo: 24 sequences from an unspecified genome; no seqlengths
5.5 Dataset summary
# Create a summary table
summary_df <- data.frame(
"factor" = gsub("_Dex_chr7", "", names(a549_chipseq_peaks)),
"number of peaks" = sapply(a549_chipseq_peaks, length),
check.names = FALSE
)
kable(summary_df,
caption = "Summary of ChIP-seq peaks for each factor")| factor | number of peaks | |
|---|---|---|
| MED1_Dex_chr7 | MED1 | 336 |
| BRD4_Dex_chr7 | BRD4 | 604 |
| GR_Dex_chr7 | GR | 450 |
6 Computing overlaps
The computeOverlaps() function analyzes the intersection patterns across all peak sets. For genomic ranges, it identifies regions that overlap between different ChIP-seq samples:
# Compute overlaps between ChIP-seq peaks
genomic_overlaps <- computeOverlaps(a549_chipseq_peaks)
# Display the structure of the result
class(genomic_overlaps)[1] "GenomicOverlapResult"
names(genomic_overlaps)[1] "reduced_regions" "overlap_matrix"
7 Visualization
7.1 Venn diagram
The plotVenn() function creates area-proportional Venn diagrams to visualize the overlaps between ChIP-seq peaks:
# Create a basic Venn diagram
plotVenn(genomic_overlaps)7.2 UpSet plot
For more than three sets, a Venn diagram with areas exactly proportional to all intersections is generally not mathematically attainable. Solvers (like those used by eulerr) provide best-effort approximations, but the layout can become hard to read. In these cases, an UpSet plot is the recommended visualization because it scales cleanly to many sets and preserves intersection sizes precisely on bar axes.
For complex overlap patterns, plotUpSet() provide an alternative visualization that can be easier to interpret with more than 4 sets:
# Create an UpSet plot
plotUpSet(genomic_overlaps,
comb_col = c( "#D87093", "#CD3301", "#9370DB", "#008B8B", "#2B70AB", "#FFB027", "#3EA742"))The top bar chart shows intersection sizes for each overlap combination, while the connected dots below indicate which sets participate in each intersection. Horizontal bars on the right show total set sizes. The UpSet representation is particularly useful for comparing more than four sets or when precise quantification of complex overlap patterns is required.
7.3 Export visualization
You can export any visualization using saveViz():
venn <- plotVenn(genomic_overlaps)
saveViz(venn,
output_dir = ".",
output_file = "figure_gVenn",
format = "pdf")By default, files are written to the current directory (“.”). If you enabled the date option (today), the current date will be prepended to the filename.
You can also export to PNG or SVG:
# png
saveViz(venn,
output_dir = ".",
output_file = "figure_gVenn",
format = "png")
# pdf
saveViz(venn,
output_dir = ".",
output_file = "figure_gVenn",
format = "svg")By default, the background is white. For presentations or publications with colored backgrounds, figures can be exported with a transparent background using bg = "transparent":
# png
saveViz(venn,
output_dir = ".",
output_file = "figure_gVenn_transparent",
format = "png",
bg = "transparent")
# svg
saveViz(venn,
output_dir = ".",
output_file = "figure_gVenn_transparent",
format = "svg",
bg = "transparent")8 Extracting overlap groups
The extractOverlaps() function retrieves the actual genomic regions for each overlap group, which can be useful for downstream analyses such as:
- Motif enrichment analysis
- Peak annotation to nearby genes
- Functional enrichment analysis
- Export for genome browsers
groups <- extractOverlaps(genomic_overlaps)# Display the number of genomic regions per overlap group
sapply(groups, length)group_010 group_001 group_100 group_110 group_011 group_101 group_111
267 125 4 48 46 16 243
8.1 Overlap group naming
When overlaps are computed, each group of elements or genomic regions is labeled with a binary code that indicates which sets the element belongs to.
- Each digit in the code corresponds to one input set (e.g., A, B, C).
- A 1 means the element is present in that set, while 0 means absent.
- The group names in the output are prefixed with “group_” for clarity.
| Group name | Meaning |
|---|---|
group_100 |
Elements only in A |
group_010 |
Elements only in B |
group_001 |
Elements only in C |
group_110 |
Elements in A ∩ B (not C) |
group_101 |
Elements in A ∩ C (not B) |
group_011 |
Elements in B ∩ C (not A) |
group_111 |
Elements in A ∩ B ∩ C |
8.2 Extract one particular group
Each overlap group can be accessed directly by name.
For example, to extract all elements that are present in A ∩ B ∩ C:
# Extract elements in group_111 (present in all three sets: MED1, BRD4, and GR)
peaks_in_all_sets <- groups[["group_111"]]
# Display the elements
peaks_in_all_setsGRanges object with 243 ranges and 1 metadata column:
seqnames ranges strand | intersect_category
<Rle> <IRanges> <Rle> | <character>
[1] chr7 1156721-1157555 * | 111
[2] chr7 1520256-1521263 * | 111
[3] chr7 2309811-2310529 * | 111
[4] chr7 3027924-3028466 * | 111
[5] chr7 3436651-3437214 * | 111
... ... ... ... . ...
[239] chr7 158431413-158433728 * | 111
[240] chr7 158818200-158819318 * | 111
[241] chr7 158821076-158821876 * | 111
[242] chr7 158863108-158864616 * | 111
[243] chr7 159015311-159016245 * | 111
-------
seqinfo: 24 sequences from an unspecified genome; no seqlengths
9 Export overlap groups
Each overlap group (e.g., group_100, group_110, group_111) can be exported for downstream analysis. The gVenn package provides two export functions depending on your data type and downstream needs:
9.1 For all overlap types (genomic or gene sets):
The function exportOverlaps() writes each group to an Excel file with one sheet per overlap group, making it easy to review and reuse the results outside of R.
# export overlaps to Excel file
exportOverlaps(groups,
output_dir = ".",
output_file = "overlap_groups")9.2 For genomic overlaps only:
When working with genomic regions (GRanges objects), you can export overlap groups as BED files using exportOverlapsToBed(). This creates one BED file per overlap group, which is ideal for visualization in genome browsers (IGV, UCSC Genome Browser) or for downstream analyses requiring BED format input.
# Export genomic overlaps to BED files
exportOverlapsToBed(groups,
output_dir = ".",
output_prefix = "overlaps")
# This will create separate BED files such as:
# - overlaps_group_100.bed
# - overlaps_group_110.bed
# - overlaps_group_111.bed
# etc.10 Customization examples
This section shows common ways to customize the Venn diagram produced by plotVenn().
# basic default venn plot (uses package defaults)
plotVenn(genomic_overlaps)10.1 Custom fills with transparency
plotVenn(genomic_overlaps,
fills = list(fill = c("#FF6B6B", "#4ECDC4", "#45B7D1"), alpha = 0.5),
legend = "right",
main = list(label = "Custom fills (transparent)", fontsize = 14))10.2 Colored edges, no fills (colored borders only)
plotVenn(genomic_overlaps,
fills = "transparent",
edges = list(col = c("red", "blue", "darkgreen"), lwd = 2),
main = list(label = "Colored borders only"))10.3 Custom labels and counts + percentages
plotVenn(genomic_overlaps,
labels = list(col = "black", fontsize = 12, font = 2),
quantities = list(type = c("counts","percent"),
col = "black", fontsize = 10),
main = list(label = "Counts + Percentages", fontsize = 14))10.4 Legend at the bottom with custom text
plotVenn(genomic_overlaps,
legend = list(side = "bottom",
labels = c("Treatment A","Treatment B","Control"),
fontsize = 10),
main = list(label = "Custom legend"))10.5 Combining multiple custom options
plotVenn(genomic_overlaps,
fills = list(fill = c("#2B70AB", "#FFB027", "#3EA742"), alpha = 0.6),
edges = list(col = "gray30", lwd = 1.5),
labels = list(col = "black", fontsize = 7, font = 2),
quantities = list(type = "counts", col = "black", fontsize = 10),
main = list(label = "multiple custom options Venn", fontsize = 16, font = 2),
legend = FALSE)11 References
For more information about the gVenn package, visit:
- Package documentation: https://ckntav.github.io/gVenn
- GitHub repository: https://github.com/ckntav/gVenn
12 Session information
Code
sessionInfo()R version 4.5.2 (2025-10-31)
Platform: aarch64-apple-darwin20
Running under: macOS Sequoia 15.7.3
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
locale:
[1] fr_CA.UTF-8/fr_CA.UTF-8/fr_CA.UTF-8/C/fr_CA.UTF-8/fr_CA.UTF-8
time zone: America/Toronto
tzcode source: internal
attached base packages:
[1] stats4 stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] kableExtra_1.4.0 lubridate_1.9.5 forcats_1.0.1
[4] stringr_1.6.0 dplyr_1.2.0 purrr_1.2.1
[7] readr_2.2.0 tidyr_1.3.2 tibble_3.3.1
[10] ggplot2_4.0.2 tidyverse_2.0.0 knitr_1.51
[13] GenomicRanges_1.62.1 Seqinfo_1.0.0 IRanges_2.44.0
[16] S4Vectors_0.48.0 BiocGenerics_0.56.0 generics_0.1.4
[19] gVenn_1.1.1
loaded via a namespace (and not attached):
[1] tidyselect_1.2.1 viridisLite_0.4.3
[3] farver_2.1.2 Biostrings_2.78.0
[5] S7_0.2.1 bitops_1.0-9
[7] fastmap_1.2.0 RCurl_1.98-1.17
[9] GenomicAlignments_1.46.0 XML_3.99-0.22
[11] digest_0.6.39 timechange_0.4.0
[13] lifecycle_1.0.5 cluster_2.1.8.2
[15] Cairo_1.7-0 magrittr_2.0.4
[17] compiler_4.5.2 polylabelr_1.0.0
[19] rlang_1.1.7 tools_4.5.2
[21] yaml_2.3.12 rtracklayer_1.70.1
[23] S4Arrays_1.10.1 htmlwidgets_1.6.4
[25] curl_7.0.0 DelayedArray_0.36.0
[27] xml2_1.5.2 eulerr_7.0.4
[29] RColorBrewer_1.1-3 abind_1.4-8
[31] BiocParallel_1.44.0 withr_3.0.2
[33] grid_4.5.2 polyclip_1.10-7
[35] colorspace_2.1-2 iterators_1.0.14
[37] scales_1.4.0 dichromat_2.0-0.1
[39] SummarizedExperiment_1.40.0 cli_3.6.5
[41] rmarkdown_2.30 crayon_1.5.3
[43] otel_0.2.0 rstudioapi_0.18.0
[45] httr_1.4.8 tzdb_0.5.0
[47] rjson_0.2.23 parallel_4.5.2
[49] XVector_0.50.0 restfulr_0.0.16
[51] matrixStats_1.5.0 vctrs_0.7.1
[53] Matrix_1.7-4 jsonlite_2.0.0
[55] GetoptLong_1.1.0 hms_1.1.4
[57] clue_0.3-67 magick_2.9.0
[59] systemfonts_1.3.1 foreach_1.5.2
[61] glue_1.8.0 codetools_0.2-20
[63] shape_1.4.6.1 stringi_1.8.7
[65] gtable_0.3.6 BiocIO_1.20.0
[67] ComplexHeatmap_2.26.1 pillar_1.11.1
[69] htmltools_0.5.9 circlize_0.4.17
[71] R6_2.6.1 textshaping_1.0.4
[73] doParallel_1.0.17 evaluate_1.0.5
[75] Biobase_2.70.0 lattice_0.22-9
[77] png_0.1-8 Rsamtools_2.26.0
[79] cigarillo_1.0.0 Rcpp_1.1.1
[81] svglite_2.2.2 SparseArray_1.10.8
[83] xfun_0.56 GlobalOptions_0.1.3
[85] MatrixGenerics_1.22.0 pkgconfig_2.0.3