HIBF 1.0.0-rc.1
All Classes Namespaces Files Functions Variables Typedefs Friends Macros Modules Pages Concepts
data_store.hpp
1// SPDX-FileCopyrightText: 2006-2025, Knut Reinert & Freie Universität Berlin
2// SPDX-FileCopyrightText: 2016-2025, Knut Reinert & MPI für molekulare Genetik
3// SPDX-License-Identifier: BSD-3-Clause
4
5#pragma once
6
7#include <cassert> // for assert
8#include <cstddef> // for size_t
9#include <cstdint> // for uint64_t
10#include <numeric> // for iota
11#include <string> // for basic_string, string
12#include <vector> // for vector
13
14#include <hibf/layout/layout.hpp> // for layout
15#include <hibf/misc/timer.hpp> // for concurrent_timer
16#include <hibf/sketch/hyperloglog.hpp> // for hyperloglog
17
18namespace seqan::hibf::layout
19{
20
25{
33 {
34 std::vector<size_t> bin_indices{};
35 std::string num_of_bins;
36
37 bool empty() const
38 {
39 assert(bin_indices.empty() == num_of_bins.empty());
40 return bin_indices.empty();
41 }
42 };
43
48 layout * hibf_layout; // Will be modified by {simple,hierarchical}_binning.
49
51 std::vector<size_t> const * kmer_counts{}; // Pointed to data should not be modified.
52
54 std::vector<sketch::hyperloglog> const * sketches{}; // Pointed to data should not be modified.
56
66 {
68 if (this->kmer_counts == nullptr)
69 return ps; // GCOVR_EXCL_LINE
70 ps.resize(this->kmer_counts->size());
71 std::iota(ps.begin(), ps.end(), 0);
72 return ps;
73 }(); // GCOVR_EXCL_LINE
74
77
80
83
86
87 bool user_bins_arranged{false};
88
91
95
96 void validate() const;
97};
98
99} // namespace seqan::hibf::layout
T begin(T... args)
A timer with a thread-safe operator+=().
Definition timer.hpp:155
T empty(T... args)
T end(T... args)
Provides seqan::hibf::sketch::hyperloglog.
T iota(T... args)
T resize(T... args)
T size(T... args)
Stores information of the previous level of a given IBF.
Definition data_store.hpp:33
Contains information used for the layout.
Definition data_store.hpp:25
std::vector< size_t > const * kmer_counts
The kmer counts associated with the above files used to layout user bin into technical bins.
Definition data_store.hpp:51
std::vector< double > fpr_correction
The false positive correction based on fp_rate, num_hash_functions and requested_max_tb.
Definition data_store.hpp:76
double relaxed_fpr_correction
The correction factor for merged bins which are allowed to have a relaxed FPR.
Definition data_store.hpp:79
std::vector< size_t > positions
The input is sorted and rearranged. To keep track without changing the input we store the positions.
Definition data_store.hpp:65
bool user_bins_arranged
The input is sorted and rearranged. To keep track without changing the input we store the positions.
Definition data_store.hpp:87
previous_level previous
Information about previous levels of the IBF if the algorithm is called recursively.
Definition data_store.hpp:82
std::vector< sketch::hyperloglog > const * sketches
The hyperloglog sketches of all input files to estimate their size and similarities.
Definition data_store.hpp:54
layout * hibf_layout
The layout that is built by layout::hierarchical_binning.
Definition data_store.hpp:48
std::vector< uint64_t > union_estimates
Matrix of estimates of merged bin cardinalites.
Definition data_store.hpp:85
concurrent_timer union_estimation_timer
Tracks the time the algorithm spends on estimating the union of user bins (merged bins).
Definition data_store.hpp:90
concurrent_timer rearrangement_timer
Tracks the time the algorithm spends on rearranging user bins (merged bins).
Definition data_store.hpp:93
The layout.
Definition layout.hpp:22
Provides seqan::hibf::timer.