HIBF 1.0.0-rc.1
All Classes Namespaces Files Functions Variables Typedefs Friends Macros Modules Pages Concepts
config.hpp
1// SPDX-FileCopyrightText: 2006-2025, Knut Reinert & Freie Universität Berlin
2// SPDX-FileCopyrightText: 2016-2025, Knut Reinert & MPI für molekulare Genetik
3// SPDX-License-Identifier: BSD-3-Clause
4
5#pragma once
6
7#include <cstddef> // for size_t
8#include <cstdint> // for uint32_t, uint8_t
9#include <functional> // for function
10#include <iosfwd> // for istream, ostream
11
12#include <cereal/access.hpp> // for access
13#include <cereal/cereal.hpp> // for make_nvp, CEREAL_NVP
14
15#include <hibf/misc/insert_iterator.hpp> // for insert_iterator
16#include <hibf/platform.hpp>
17
18namespace seqan::hibf
19{
20
74struct config
75{
110 std::function<void(size_t const, insert_iterator &&)> input_fn{};
111
124
136
153 double maximum_fpr{0.05};
154
181 double relaxed_fpr{0.3};
182
194 size_t threads{1u};
196
213 uint8_t sketch_bits{12};
214
232 size_t tmax{};
233
250
266 double alpha{1.2};
267
281
292
304
305 void read_from(std::istream & stream);
306 void write_to(std::ostream & stream) const;
307
335
337 constexpr bool operator==(config const & other) const
338 {
339 // clang-format off
341 maximum_fpr == other.maximum_fpr &&
342 relaxed_fpr == other.relaxed_fpr &&
343 threads == other.threads &&
344 sketch_bits == other.sketch_bits &&
345 tmax == other.tmax &&
347 alpha == other.alpha &&
351 // clang-format on
352 }
353
354private:
355 friend class cereal::access;
356
357 static constexpr uint32_t version{2};
358
359 template <typename archive_t>
360 void serialize(archive_t & archive)
361 {
362 uint32_t parsed_version{version};
363 archive(cereal::make_nvp("version", parsed_version));
364
365 archive(CEREAL_NVP(number_of_user_bins));
366 archive(CEREAL_NVP(number_of_hash_functions));
367 archive(CEREAL_NVP(maximum_fpr));
368 archive(CEREAL_NVP(relaxed_fpr));
369 archive(CEREAL_NVP(threads));
370
371 archive(CEREAL_NVP(sketch_bits));
372 archive(CEREAL_NVP(tmax));
373
374 if (parsed_version > 1u)
375 archive(CEREAL_NVP(empty_bin_fraction));
376
377 archive(CEREAL_NVP(alpha));
378 archive(CEREAL_NVP(max_rearrangement_ratio));
379 archive(CEREAL_NVP(disable_estimate_union));
380 archive(CEREAL_NVP(disable_rearrangement));
381 }
382};
383
384} // namespace seqan::hibf
Definition insert_iterator.hpp:25
Provides platform and dependency checks.
The configuration used to build an (H)IBF.
Definition config.hpp:75
uint8_t sketch_bits
The number of bits for HyperLogLog sketches.
Definition config.hpp:213
void validate_and_set_defaults()
Checks several variables of seqan::hibf::config and sets default values if necessary.
size_t number_of_hash_functions
The number of hash functions for the underlying Bloom Filters.
Definition config.hpp:135
double maximum_fpr
The desired maximum false positive rate of the underlying Bloom Filters. [RECOMMENDED_TO_ADAPT].
Definition config.hpp:153
double empty_bin_fraction
The percentage of empty bins in the layout.
Definition config.hpp:249
bool disable_estimate_union
Whether to disable union estimate of user bins to improve the layout.
Definition config.hpp:291
double alpha
A scaling factor to influence the amount of merged bins produced by the layout algorithm.
Definition config.hpp:266
bool disable_rearrangement
Whether to disable rearranging user bins based on their content similarity.
Definition config.hpp:302
size_t number_of_user_bins
The number of user bins.
Definition config.hpp:123
constexpr bool operator==(config const &other) const
Two configs are equal if all options, except seqan::hibf::config::input_fn, are equal.
Definition config.hpp:337
double relaxed_fpr
Allow a higher FPR in non-accuracy-critical parts of the HIBF structure.
Definition config.hpp:181
size_t threads
The number of threads to use during construction. [RECOMMENDED_TO_ADAPT].
Definition config.hpp:194
std::function< void(size_t const, insert_iterator &&) input_fn)
A function for how to hash your input [REQUIRED].
Definition config.hpp:110
double max_rearrangement_ratio
The maximal cardinality ratio in the clustering intervals of the layout rearrangement algorithm.
Definition config.hpp:280
size_t tmax
The maximum number of technical bins of each IBF in the HIBF.
Definition config.hpp:232