HIBF 1.0.0-rc.1
All Classes Namespaces Files Functions Variables Typedefs Friends Macros Modules Pages Concepts
hierarchical_binning.hpp
1// SPDX-FileCopyrightText: 2006-2025, Knut Reinert & Freie Universität Berlin
2// SPDX-FileCopyrightText: 2016-2025, Knut Reinert & MPI für molekulare Genetik
3// SPDX-License-Identifier: BSD-3-Clause
4
5#pragma once
6
7#include <cstddef> // for size_t
8#include <memory> // for addressof
9#include <utility> // for pair
10#include <vector> // for vector
11
12#include <hibf/config.hpp> // for config
13#include <hibf/layout/data_store.hpp> // for data_store
14#include <hibf/misc/subtract_empty_bins.hpp> // for subtract_empty_bins
15#include <hibf/platform.hpp> // for HIBF_WORKAROUND_GCC_BOGUS_MEMCPY
16
17namespace seqan::hibf::layout
18{
19
24{
25private:
29 data_store * data{nullptr};
30
32 size_t num_user_bins{};
34 size_t num_technical_bins{};
35
37 struct maximum_bin_tracker
38 {
39 size_t max_id{};
40 size_t max_size{};
41
42 void update_max(size_t const new_id, size_t const new_size)
43 {
44 if (new_size > max_size)
45 {
46 max_id = new_id;
47 max_size = new_size;
48 }
49 }
50 };
51
52public:
59
68 config{config_},
69 data{std::addressof(data_)},
70 num_user_bins{data->positions.size()},
71 num_technical_bins{data->previous.empty() ? subtract_empty_bins(config.tmax, config.empty_bin_fraction)
72 : needed_technical_bins(num_user_bins)}
73 {}
74
76 size_t execute();
77
78private:
82 [[nodiscard]] size_t needed_technical_bins(size_t const requested_num_ub) const;
83
87 [[nodiscard]] size_t max_merge_levels(size_t const num_ubs_in_merge) const;
88
93 void initialization(std::vector<std::vector<size_t>> & matrix,
96
135 void recursion(std::vector<std::vector<size_t>> & matrix,
136 std::vector<std::vector<size_t>> & ll_matrix,
138
139 void backtrack_merged_bin(size_t trace_j,
140 size_t const next_j,
141 size_t const bin_id,
142 maximum_bin_tracker & max_tracker,
143 bool is_first_row = false);
144
145 void backtrack_split_bin(size_t trace_j,
146 size_t const number_of_bins,
147 size_t const bin_id,
148 maximum_bin_tracker & max_tracker);
149
151 size_t backtracking(std::vector<std::vector<std::pair<size_t, size_t>>> const & trace);
152
153 data_store initialise_libf_data(size_t const trace_j) const;
154
155 void process_merged_bin(data_store & libf_data, size_t const bin_id) const;
156
157 void update_libf_data(data_store & libf_data, size_t const bin_id) const;
158
159 size_t add_lower_level(data_store & libf_data) const;
160};
161
162} // namespace seqan::hibf::layout
Hierarchical binning algorithm.
Definition hierarchical_binning.hpp:24
size_t execute()
Executes the hierarchical binning algorithm and layouts user bins into technical bins.
hierarchical_binning & operator=(hierarchical_binning &&)=default
Defaulted.
hierarchical_binning(hierarchical_binning const &)=delete
Deleted. Would modify same data.
hierarchical_binning(data_store &data_, seqan::hibf::config const &config_)
The constructor from user bin names, their kmer counts and a config.
Definition hierarchical_binning.hpp:67
hierarchical_binning(hierarchical_binning &&)=default
Defaulted.
hierarchical_binning & operator=(hierarchical_binning const &)=delete
Deleted. Would modify same data.
constexpr size_t subtract_empty_bins(size_t const tmax, double const fraction) noexcept
Returns the number of technical bins available for use.
Definition subtract_empty_bins.hpp:21
Provides platform and dependency checks.
The configuration used to build an (H)IBF.
Definition config.hpp:75
Contains information used for the layout.
Definition data_store.hpp:25