15#include <seqan3/contrib/sdsl-lite.hpp>
20SEQAN3_DEPRECATED_HEADER(
"This header and its functionality is deprecated and will be removed in a future version of SeqAn. Please use the hibf-library (url: https://github.com/seqan/hibf) instead.");
96template <data_layout data_layout_mode_ = data_layout::uncompressed>
101 template <data_layout data_layout_mode>
107 seqan3::contrib::sdsl::bit_vector,
108 seqan3::contrib::sdsl::sd_vector<>>;
113 size_t technical_bins{};
126 13'043'817'825'332'782'213ULL,
127 10'650'232'656'628'343'401ULL,
128 16'499'269'484'942'379'435ULL,
129 4'893'150'838'803'335'377ULL};
138 inline constexpr size_t hash_and_fit(
size_t h,
size_t const seed)
const
142 h ^=
h >> hash_shift;
143 h *= 11'400'714'819'323'198'485ULL;
145#ifdef __SIZEOF_INT128__
160 template <
typename value_t>
192 bin_size_ = size.get();
193 hash_funs =
funs.get();
197 if (hash_funs == 0 || hash_funs > 5)
198 throw std::logic_error{
"The number of hash functions must be > 0 and <= 5."};
203 bin_words = (bins + 63) >> 6;
204 technical_bins = bin_words << 6;
205 data = seqan3::contrib::sdsl::bit_vector(technical_bins * bin_size_);
219 std::tie(bins, technical_bins, bin_size_, hash_shift, bin_words, hash_funs) =
222 data = seqan3::contrib::sdsl::bit_vector{
ibf.data.begin(),
ibf.data.end()};
239 std::tie(bins, technical_bins, bin_size_, hash_shift, bin_words, hash_funs) =
242 data = seqan3::contrib::sdsl::sd_vector<>{
ibf.data};
265 for (
size_t i = 0;
i < hash_funs; ++
i)
267 size_t idx = hash_and_fit(value, hash_seeds[
i]);
289 for (
size_t idx = bin.get(),
i = 0;
i < bin_size_;
idx += technical_bins, ++
i)
306 template <
typename rng_t>
310 static_assert(std::ranges::forward_range<rng_t>,
"The range of bins to clear must model a forward_range.");
311 static_assert(std::same_as<std::remove_cvref_t<std::ranges::range_reference_t<rng_t>>,
bin_index>,
312 "The reference type of the range to clear must be seqan3::bin_index.");
318 for (
size_t offset = 0,
i = 0;
i < bin_size_;
offset += technical_bins, ++
i)
320 data[bin.get() +
offset] = 0;
376 uint64_t
old = data.get_int(
jj);
378 data.set_int(
ii,
old);
417 template <
typename value_t = u
int16_t>
526 template <cereal_archive archive_t>
550template <data_layout data_layout_mode>
558 ibf_t const * ibf_ptr{
nullptr};
606 assert(ibf_ptr !=
nullptr);
607 assert(result_buffer.
size() == ibf_ptr->bin_count());
612 for (
size_t i = 0;
i < ibf_ptr->hash_funs; ++
i)
618 for (
size_t i = 0;
i < ibf_ptr->hash_funs; ++
i)
625 result_buffer.data.set_int(
batch << 6,
tmp);
628 return result_buffer;
638template <data_layout data_layout_mode>
643 using data_type = seqan3::contrib::sdsl::bit_vector;
705 return lhs.data ==
rhs.data;
775template <std::
integral value_t>
783 template <
typename binning_bitvector_t>
784 static constexpr bool is_binning_bitvector =
801 using base_t::base_t;
816 template <
typename binning_bitvector_t>
820 for_each_set_bin(binning_bitvector,
821 [
this](
size_t const bin)
835 template <
typename binning_bitvector_t>
839 for_each_set_bin(binning_bitvector,
840 [
this](
size_t const bin)
890 template <
typename binning_bitvector_t,
typename on_bin_fn_t>
893 assert(this->
size() >= binning_bitvector.size());
904 for (
size_t bit_pos = 0; bit_pos < binning_bitvector.size(); bit_pos += 64)
907 size_t bit_sequence = binning_bitvector.raw_data().get_int(bit_pos);
910 for (
size_t bin = bit_pos; bit_sequence != 0u; ++bin, bit_sequence >>= 1)
913 bin += jump_to_next_1bit(bit_sequence);
930template <data_layout data_layout_mode>
931template <
typename value_t>
935 static_assert(std::integral<value_t>,
"The value type must model std::integral.");
941 ibf_t const * ibf_ptr{
nullptr};
962 ibf_ptr(
std::addressof(
ibf)),
993 template <std::ranges::range value_range_t>
996 assert(ibf_ptr !=
nullptr);
997 assert(result_buffer.size() == ibf_ptr->bin_count());
999 static_assert(std::ranges::input_range<value_range_t>,
"The values must model input_range.");
1000 static_assert(std::unsigned_integral<std::ranges::range_value_t<value_range_t>>,
1001 "An individual value must be an unsigned integral.");
1005 for (
auto && value : values)
1008 return result_buffer;
1013 template <std::ranges::range value_range_t>
Provides strong types for the (Interleaved) Bloom Filter.
Adaptions of concepts from the Cereal library.
A data structure that behaves like a std::vector and can be used to consolidate the results of multip...
Definition interleaved_bloom_filter.hpp:777
counting_vector & operator+=(counting_vector const &rhs)
Bin-wise addition of two seqan3::counting_vectors.
Definition interleaved_bloom_filter.hpp:858
counting_vector & operator=(counting_vector const &)=default
Defaulted.
counting_vector & operator=(counting_vector &&)=default
Defaulted.
counting_vector & operator+=(binning_bitvector_t const &binning_bitvector)
Bin-wise adds the bits of a seqan3::interleaved_bloom_filter::membership_agent_type::binning_bitvecto...
Definition interleaved_bloom_filter.hpp:818
counting_vector(counting_vector const &)=default
Defaulted.
~counting_vector()=default
Defaulted.
counting_vector(counting_vector &&)=default
Defaulted.
counting_vector & operator-=(binning_bitvector_t const &binning_bitvector)
Bin-wise subtracts the bits of a seqan3::interleaved_bloom_filter::membership_agent_type::binning_bit...
Definition interleaved_bloom_filter.hpp:837
counting_vector()=default
Defaulted.
counting_vector & operator-=(counting_vector const &rhs)
Bin-wise substraction of two seqan3::counting_vectors.
Definition interleaved_bloom_filter.hpp:871
A "pretty printer" for most SeqAn data structures and related types.
Definition debug_stream_type.hpp:79
Manages counting ranges of values for the seqan3::interleaved_bloom_filter.
Definition interleaved_bloom_filter.hpp:933
counting_vector< value_t > const & bulk_count(value_range_t &&values) &noexcept
Counts the occurrences in each bin for all values in a range.
Definition interleaved_bloom_filter.hpp:994
~counting_agent_type()=default
Defaulted.
counting_agent_type()=default
Defaulted.
counting_agent_type(counting_agent_type &&)=default
Defaulted.
counting_agent_type(counting_agent_type const &)=default
Defaulted.
counting_vector< value_t > const & bulk_count(value_range_t &&values) &&noexcept=delete
Counts the occurrences in each bin for all values in a range.
counting_agent_type & operator=(counting_agent_type const &)=default
Defaulted.
counting_vector< value_t > result_buffer
Stores the result of bulk_count().
Definition interleaved_bloom_filter.hpp:969
counting_agent_type & operator=(counting_agent_type &&)=default
Defaulted.
A bitvector representing the result of a call to bulk_contains of the seqan3::interleaved_bloom_filte...
Definition interleaved_bloom_filter.hpp:640
binning_bitvector(binning_bitvector &&)=default
Defaulted.
constexpr data_type & raw_data() noexcept
Provides direct, unsafe access to the underlying data structure.
Definition interleaved_bloom_filter.hpp:739
auto end() noexcept
Returns an iterator to the element following the last element of the container.
Definition interleaved_bloom_filter.hpp:687
constexpr data_type const & raw_data() const noexcept
Provides direct, unsafe access to the underlying data structure.
Definition interleaved_bloom_filter.hpp:745
~binning_bitvector()=default
Defaulted.
binning_bitvector(binning_bitvector const &)=default
Defaulted.
auto end() const noexcept
Returns an iterator to the element following the last element of the container.
Definition interleaved_bloom_filter.hpp:693
auto operator[](size_t const i) const noexcept
Return the i-th element.
Definition interleaved_bloom_filter.hpp:726
binning_bitvector(size_t const size)
Construct with given size.
Definition interleaved_bloom_filter.hpp:661
size_t size() const noexcept
Returns the number of elements.
Definition interleaved_bloom_filter.hpp:666
binning_bitvector & operator=(binning_bitvector &&)=default
Defaulted.
auto begin() noexcept
Returns an iterator to the first element of the container.
Definition interleaved_bloom_filter.hpp:675
auto begin() const noexcept
Returns an iterator to the first element of the container.
Definition interleaved_bloom_filter.hpp:681
auto operator[](size_t const i) noexcept
Return the i-th element.
Definition interleaved_bloom_filter.hpp:719
binning_bitvector & operator=(binning_bitvector const &)=default
Defaulted.
binning_bitvector()=default
Defaulted.
friend bool operator==(binning_bitvector const &lhs, binning_bitvector const &rhs) noexcept
Test for equality.
Definition interleaved_bloom_filter.hpp:703
friend bool operator!=(binning_bitvector const &lhs, binning_bitvector const &rhs) noexcept
Test for inequality.
Definition interleaved_bloom_filter.hpp:709
Manages membership queries for the seqan3::interleaved_bloom_filter.
Definition interleaved_bloom_filter.hpp:552
binning_bitvector const & bulk_contains(size_t const value) &noexcept
Determines set membership of a given value.
Definition interleaved_bloom_filter.hpp:604
~membership_agent_type()=default
Defaulted.
membership_agent_type & operator=(membership_agent_type const &)=default
Defaulted.
binning_bitvector const & bulk_contains(size_t const value) &&noexcept=delete
Determines set membership of a given value.
membership_agent_type(membership_agent_type &&)=default
Defaulted.
membership_agent_type(membership_agent_type const &)=default
Defaulted.
membership_agent_type & operator=(membership_agent_type &&)=default
Defaulted.
membership_agent_type()=default
Defaulted.
binning_bitvector result_buffer
Stores the result of bulk_contains().
Definition interleaved_bloom_filter.hpp:582
The IBF binning directory. A data structure that efficiently answers set-membership queries for multi...
Definition interleaved_bloom_filter.hpp:98
interleaved_bloom_filter(interleaved_bloom_filter< data_layout::uncompressed > const &ibf)
Construct a compressed Interleaved Bloom Filter.
Definition interleaved_bloom_filter.hpp:236
void emplace(size_t const value, bin_index const bin) noexcept
Inserts a value into a specific bin.
Definition interleaved_bloom_filter.hpp:261
interleaved_bloom_filter(interleaved_bloom_filter< data_layout::compressed > const &ibf)
Construct an uncompressed Interleaved Bloom Filter from a compressed one.
Definition interleaved_bloom_filter.hpp:216
interleaved_bloom_filter & operator=(interleaved_bloom_filter const &)=default
Defaulted.
membership_agent_type membership_agent() const
Returns a seqan3::interleaved_bloom_filter::membership_agent_type to be used for lookup.
Definition interleaved_bloom_filter.hpp:401
size_t hash_function_count() const noexcept
Returns the number of hash functions used in the Interleaved Bloom Filter.
Definition interleaved_bloom_filter.hpp:430
constexpr data_type const & raw_data() const noexcept
Provides direct, unsafe access to the underlying data structure.
Definition interleaved_bloom_filter.hpp:513
void clear(bin_index const bin) noexcept
Clears a specific bin.
Definition interleaved_bloom_filter.hpp:285
interleaved_bloom_filter(seqan3::bin_count bins_, seqan3::bin_size size, seqan3::hash_function_count funs=seqan3::hash_function_count{2u})
Construct an uncompressed Interleaved Bloom Filter.
Definition interleaved_bloom_filter.hpp:186
interleaved_bloom_filter()=default
Defaulted.
counting_agent_type< value_t > counting_agent() const
Returns a seqan3::interleaved_bloom_filter::counting_agent_type to be used for counting.
Definition interleaved_bloom_filter.hpp:418
constexpr data_type & raw_data() noexcept
Provides direct, unsafe access to the underlying data structure.
Definition interleaved_bloom_filter.hpp:507
interleaved_bloom_filter & operator=(interleaved_bloom_filter &&)=default
Defaulted.
friend bool operator!=(interleaved_bloom_filter const &lhs, interleaved_bloom_filter const &rhs) noexcept
Test for inequality.
Definition interleaved_bloom_filter.hpp:491
interleaved_bloom_filter(interleaved_bloom_filter &&)=default
Defaulted.
void increase_bin_number_to(bin_count const new_bins_)
Increases the number of bins stored in the Interleaved Bloom Filter.
Definition interleaved_bloom_filter.hpp:346
size_t bin_count() const noexcept
Returns the number of bins that the Interleaved Bloom Filter manages.
Definition interleaved_bloom_filter.hpp:438
void clear(rng_t &&bin_range) noexcept
Clears a range of bins.
Definition interleaved_bloom_filter.hpp:308
size_t bin_size() const noexcept
Returns the size of a single bin that the Interleaved Bloom Filter manages.
Definition interleaved_bloom_filter.hpp:446
size_t bit_size() const noexcept
Returns the size of the underlying bitvector.
Definition interleaved_bloom_filter.hpp:454
interleaved_bloom_filter(interleaved_bloom_filter const &)=default
Defaulted.
~interleaved_bloom_filter()=default
Defaulted.
friend bool operator==(interleaved_bloom_filter const &lhs, interleaved_bloom_filter const &rhs) noexcept
Test for equality.
Definition interleaved_bloom_filter.hpp:468
static constexpr data_layout data_layout_mode
Indicates whether the Interleaved Bloom Filter is compressed.
Definition interleaved_bloom_filter.hpp:156
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
data_layout
Determines if the Interleaved Bloom Filter is compressed.
Definition bloom_filter_strong_types.hpp:23
@ uncompressed
The Interleaved Bloom Filter is uncompressed.
Definition bloom_filter_strong_types.hpp:24
@ compressed
The Interleaved Bloom Filter is compressed.
Definition bloom_filter_strong_types.hpp:25
The main SeqAn3 namespace.
Definition aligned_sequence_concept.hpp:26
SeqAn specific customisations in the standard namespace.
A strong type that represents the number of bins for the seqan3::interleaved_bloom_filter.
Definition bloom_filter_strong_types.hpp:31
A strong type that represents the bin index for the seqan3::interleaved_bloom_filter.
Definition bloom_filter_strong_types.hpp:52
A strong type that represents the number of bits for each bin in the seqan3::interleaved_bloom_filter...
Definition bloom_filter_strong_types.hpp:38
A strong type that represents the number of hash functions for the seqan3::interleaved_bloom_filter.
Definition bloom_filter_strong_types.hpp:45
strong_type for seed.
Definition minimiser_hash.hpp:22