1. Build Count Matrix

The build_count_matrix() function builds a fragment-overlap count matrix from paired-end BAM files over user-specified genomic regions.

Parameters

Parameter	Type	Default	Description
`bam_path`	character vector	—	Vector of BAM file paths
`regions`	integer or character	—	Genomic regions to evaluate, either: Integer (bin size): The genome will be segmented into non-overlapping bins of this size, and CUT&Tag counts will be summarized per bin. File path: Genomic intervals will be extracted directly from the file, and counts will be summarized exactly on these regions. When a file is provided, the file must contain valid genomic coordinates. Only one region file is accepted in the current implementation.
`out_dir`	character	`“./”`	Output directory
`ref_genome`	character	`“hg38”`	Reference genome
`sample_name`	character	`NULL`	Optional prefix added to the output filename. If provided, the final Feather file will include this value at the beginning of the filename.
`force_chr_coord`	logical	`FALSE`	By default, if the provided regions file contains a gene identifier column (for example, `gene_id` in gff/tsv files or the 4th “name” column in BED files), this column will be used as the row name instead of the “CHR_start_end” coordinate string. Setting `force_chr_coord = TRUE` overrides this behavior and forces the use of genomic coordinates as row names.

Output Files

The generated count matrix is saved as a Feather file following these conventions:

Fixed-size bins (numeric regions):
Count_Matrix_<BINSIZE>.feather (e.g., Count_Matrix_5000.feather)
Custom regions file (character regions):
Count_Matrix_<prefix>.feather, where <prefix> is the basename of the region file (without extension). (e.g., "peaks.bed" → Count_Matrix_peaks.feather)
When sample_name is provided:
<sample_name>_<Count_Matrix_xxx>.feather (e.g., C1_Count_Matrix_peaks.feather)

Read Output Files

library(arrow)
df <- read_feather("output/Count_Matrix_800.feather")
df <- as.data.frame(df, stringsAsFactors = FALSE, check.names = FALSE)
head(df)

	pos	H3K27ac-H3K4me3	H3K4me3-H3K4me3	H3K4me1-H3K4me3	H3K4me3-H3K9me3
1	chr1_1_800	0	0	0	0
2	chr1_801_1600	0	0	0	0
3	chr1_1601_2400	0	0	0	0
4	chr1_2401_3200	0	0	0	0
5	chr1_3201_4000	0	0	0	0
6	chr1_4001_4800	0	0	0	0

library(arrow)
df <- read_feather("output/Count_Matrix_800.feather")
df <- as.data.frame(df, stringsAsFactors = FALSE, check.names = FALSE)
rownames(df) <- df$pos
df$pos <- NULL
df_nonzero <- df[ rowSums(df != 0) > 0, ]
head(df_nonzero)

	H3K4me3-H3K4me3	H3K27ac-H3K27me3	H3K9me3-H3K9me3	H3K9me2-H3K9me3
chr1_9601_10400	1.4928058	1.8988764	22.8962419	1.8861111
chr1_10401_11200	0.5071942	0.1011236	1.1037581	0.1138889
chr1_12001_12800	0	0	0.1297468	0
chr1_12801_13600	0	0	0.8702532	1

Usage Examples

library(multiEpiCore)

# Basic Usage with Fixed Bins
build_count_matrix(
  bam_path = bam_files,
  regions = 800,
  out_dir = "count_matrix/"
)

# Use Custom Peak Regions
build_count_matrix(
  bam_path = bam_files,
  regions = "peaks/merged_peaks.bed",
  out_dir = "count_matrix/"
)

# Test Data
bam_dirs <- c(
  C1 = "bam/C1",
  C2 = "bam/C2",
  T1 = "bam/T1",
  T2 = "bam/T2"
)

out_dir <- "./count_matrix"
dir.create(out_dir, recursive = TRUE, showWarnings = FALSE)

for (sample_name in names(bam_dirs)) {
  bam_dir <- bam_dirs[[sample_name]]
  bam_paths <- list.files(
    path = bam_dir,
    pattern = "\\.bam$",
    full.names = TRUE
  )
  build_count_matrix(
    bam_path    = bam_paths,
    regions     = 800,
    save_dir    = out_dir,
    sample_name = sample_name
  )
}

Read Counting

1. Build Count Matrix

Parameters

Output Files

Read Output Files

Usage Examples