18#include <sdsl/bit_vectors.hpp>
35struct bin_count :
public detail::strong_type<size_t, bin_count, detail::strong_type_skill::convert>
37 using detail::strong_type<size_t, bin_count, detail::strong_type_skill::convert>::strong_type;
42struct bin_size :
public detail::strong_type<size_t, bin_size, detail::strong_type_skill::convert>
44 using detail::strong_type<size_t, bin_size, detail::strong_type_skill::convert>::strong_type;
49struct hash_function_count :
public detail::strong_type<size_t, hash_function_count, detail::strong_type_skill::convert>
51 using detail::strong_type<size_t, hash_function_count, detail::strong_type_skill::convert>::strong_type;
56struct bin_index :
public detail::strong_type<size_t, bin_index, detail::strong_type_skill::convert>
58 using detail::strong_type<size_t, bin_index, detail::strong_type_skill::convert>::strong_type;
132template <data_layout data_layout_mode_ = data_layout::uncompressed>
137 template <data_layout data_layout_mode>
149 size_t technical_bins{};
162 13043817825332782213ULL,
163 10650232656628343401ULL,
164 16499269484942379435ULL,
165 4893150838803335377ULL};
174 inline constexpr size_t hash_and_fit(
size_t h,
size_t const seed)
const
177 assert(hash_shift < 64);
178 h ^= h >> hash_shift;
179 h *= 11400714819323198485ULL;
181#ifdef __SIZEOF_INT128__
182 h =
static_cast<uint64_t
>((
static_cast<__uint128_t
>(h) *
static_cast<__uint128_t
>(bin_size_)) >> 64);
195 template <std::
integral value_t>
229 bin_size_ =
size.get();
230 hash_funs = funs.get();
234 if (hash_funs == 0 || hash_funs > 5)
235 throw std::logic_error{
"The number of hash functions must be > 0 and <= 5."};
240 bin_words = (bins + 63) >> 6;
241 technical_bins = bin_words << 6;
242 data = sdsl::bit_vector(technical_bins * bin_size_);
261 std::tie(bins, technical_bins, bin_size_, hash_shift, bin_words, hash_funs) =
262 std::tie(ibf.bins, ibf.technical_bins, ibf.bin_size_, ibf.hash_shift, ibf.bin_words, ibf.hash_funs);
264 data = sdsl::sd_vector<>{ibf.data};
288 assert(bin.get() < bins);
289 for (
size_t i = 0; i < hash_funs; ++i)
291 size_t idx = hash_and_fit(value, hash_seeds[i]);
293 assert(idx < data.size());
314 assert(bin.get() < bins);
315 for (
size_t idx = bin.get(), i = 0; i < bin_size_; idx += technical_bins, ++i)
332 template <
typename rng_t>
336 void clear(rng_t && bin_range)
noexcept
338 static_assert(std::ranges::forward_range<rng_t>,
"The range of bins to clear must model a forward_range.");
339 static_assert(std::same_as<std::remove_cvref_t<std::ranges::range_reference_t<rng_t>>,
bin_index>,
340 "The reference type of the range to clear must be seqan3::bin_index.");
342 for (
auto && bin : bin_range)
343 assert(bin.get() < bins);
346 for (
size_t offset = 0, i = 0; i < bin_size_;
offset += technical_bins, ++i)
347 for (
auto && bin : bin_range)
348 data[bin.get() +
offset] = 0;
379 size_t new_bins = new_bins_.get();
385 size_t new_bin_words = (new_bins + 63) >> 6;
389 if (new_bin_words == bin_words)
392 size_t new_technical_bins = new_bin_words << 6;
393 size_t new_bits = bin_size_ * new_technical_bins;
395 size_t idx_{new_bits}, idx{data.size()};
396 size_t delta = new_technical_bins - technical_bins + 64;
398 data.resize(new_bits);
400 for (
size_t i = idx_, j = idx; j > 0; i -= new_technical_bins, j -= technical_bins)
402 size_t stop = i - new_technical_bins;
404 for (
size_t ii = i - delta, jj = j - 64; stop && ii >= stop; ii -= 64, jj -= 64)
406 uint64_t old = data.get_int(jj);
408 data.set_int(ii, old);
412 bin_words = new_bin_words;
413 technical_bins = new_technical_bins;
447 template <
typename value_t = u
int16_t>
500 return std::tie(lhs.bins, lhs.technical_bins, lhs.bin_size_, lhs.hash_shift, lhs.bin_words, lhs.hash_funs,
502 std::tie(rhs.bins, rhs.technical_bins, rhs.bin_size_, rhs.hash_shift, rhs.bin_words, rhs.hash_funs,
513 return !(lhs == rhs);
546 template <cereal_archive archive_t>
547 void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t & archive)
550 archive(technical_bins);
570template <data_layout data_layout_mode>
578 ibf_t const * ibf_ptr{
nullptr};
598 ibf_ptr(
std::addressof(ibf)), result_buffer(ibf.
bin_count())
627 assert(ibf_ptr !=
nullptr);
628 assert(result_buffer.
size() == ibf_ptr->bin_count());
631 std::memcpy(&bloom_filter_indices, &ibf_ptr->hash_seeds,
sizeof(
size_t) * ibf_ptr->hash_funs);
633 for (
size_t i = 0; i < ibf_ptr->hash_funs; ++i)
634 bloom_filter_indices[i] = ibf_ptr->hash_and_fit(value, bloom_filter_indices[i]);
636 for (
size_t batch = 0; batch < ibf_ptr->bin_words; ++batch)
639 for (
size_t i = 0; i < ibf_ptr->hash_funs; ++i)
641 assert(bloom_filter_indices[i] < ibf_ptr->
data.size());
642 tmp &= ibf_ptr->data.get_int(bloom_filter_indices[i]);
643 bloom_filter_indices[i] += 64;
646 result_buffer.data.set_int(batch << 6, tmp);
649 return result_buffer;
654 [[nodiscard]] binning_bitvector
const & bulk_contains(
size_t const value) &&
noexcept =
delete;
660template <data_layout data_layout_mode>
665 using data_type = sdsl::bit_vector;
671 template <std::
integral value_t>
731 return lhs.data == rhs.data;
737 return !(lhs == rhs);
771 constexpr data_type
const &
raw_data() const noexcept
801template <std::
integral value_t>
818 using base_t::base_t;
833 template <
typename rhs_t>
835 requires std::same_as<rhs_t,
843 assert(this->
size() >= rhs.size());
846 for (
size_t batch = 0, bin = 0; batch < ((rhs.size() + 63) >> 6); bin = 64 * ++batch)
848 size_t tmp = rhs.data.get_int(batch * 64);
849 if (tmp ^ (1ULL<<63))
897template <data_layout data_layout_mode>
898template <std::
integral value_t>
906 ibf_t const * ibf_ptr{
nullptr};
956 template <std::ranges::range value_range_t>
959 assert(ibf_ptr !=
nullptr);
960 assert(result_buffer.
size() == ibf_ptr->bin_count());
962 static_assert(std::ranges::input_range<value_range_t>,
"The values must model input_range.");
963 static_assert(std::unsigned_integral<std::ranges::range_value_t<value_range_t>>,
964 "An individual value must be an unsigned integral.");
966 std::ranges::fill(result_buffer, 0);
968 for (
auto && value : values)
971 return result_buffer;
976 template <std::ranges::range value_range_t>
The <algorithm> header from C++20's standard library.
The <bit> header from C++20's standard library.
Adaptions of concepts from the Cereal library.
A data structure that behaves like a std::vector and can be used to consolidate the results of multip...
Definition: interleaved_bloom_filter.hpp:803
counting_vector & operator+=(rhs_t const &rhs)
Bin-wise adds the bits of a seqan3::interleaved_bloom_filter::membership_agent::binning_bitvector.
Definition: interleaved_bloom_filter.hpp:841
counting_vector & operator+=(counting_vector const &rhs)
Bin-wise addition of two seqan3::counting_vectors.
Definition: interleaved_bloom_filter.hpp:878
counting_vector & operator=(counting_vector const &)=default
Defaulted.
counting_vector & operator=(counting_vector &&)=default
Defaulted.
counting_vector(counting_vector const &)=default
Defaulted.
~counting_vector()=default
Defaulted.
counting_vector(counting_vector &&)=default
Defaulted.
counting_vector()=default
Defaulted.
Manages counting ranges of values for the seqan3::interleaved_bloom_filter.
Definition: interleaved_bloom_filter.hpp:900
counting_vector< value_t > const & bulk_count(value_range_t &&values) &noexcept
Counts the occurrences in each bin for all values in a range.
Definition: interleaved_bloom_filter.hpp:957
~counting_agent_type()=default
Defaulted.
counting_agent_type()=default
Defaulted.
counting_agent_type(counting_agent_type &&)=default
Defaulted.
counting_agent_type(counting_agent_type const &)=default
Defaulted.
counting_agent_type & operator=(counting_agent_type const &)=default
Defaulted.
counting_vector< value_t > result_buffer
Stores the result of bulk_count().
Definition: interleaved_bloom_filter.hpp:932
counting_agent_type & operator=(counting_agent_type &&)=default
Defaulted.
A bitvector representing the result of a call to bulk_contains of the seqan3::interleaved_bloom_filte...
Definition: interleaved_bloom_filter.hpp:662
auto end() noexcept
Returns an iterator to the element following the last element of the container.
Definition: interleaved_bloom_filter.hpp:713
auto end() const noexcept
Returns an iterator to the element following the last element of the container.
Definition: interleaved_bloom_filter.hpp:719
binning_bitvector()=default
Defaulted.
constexpr data_type & raw_data() noexcept
Provides direct, unsafe access to the underlying data structure.
Definition: interleaved_bloom_filter.hpp:765
binning_bitvector(size_t const size)
Construct with given size.
Definition: interleaved_bloom_filter.hpp:686
auto operator[](size_t const i) const noexcept
Return the i-th element.
Definition: interleaved_bloom_filter.hpp:752
binning_bitvector & operator=(binning_bitvector &&)=default
Defaulted.
size_t size() const noexcept
Returns the number of elements.
Definition: interleaved_bloom_filter.hpp:692
auto begin() noexcept
Returns an iterator to the first element of the container.
Definition: interleaved_bloom_filter.hpp:701
binning_bitvector(binning_bitvector &&)=default
Defaulted.
~binning_bitvector()=default
Defaulted.
auto begin() const noexcept
Returns an iterator to the first element of the container.
Definition: interleaved_bloom_filter.hpp:707
binning_bitvector & operator=(binning_bitvector const &)=default
Defaulted.
constexpr data_type const & raw_data() const noexcept
Provides direct, unsafe access to the underlying data structure.
Definition: interleaved_bloom_filter.hpp:771
auto operator[](size_t const i) noexcept
Return the i-th element.
Definition: interleaved_bloom_filter.hpp:745
binning_bitvector(binning_bitvector const &)=default
Defaulted.
friend bool operator==(binning_bitvector const &lhs, binning_bitvector const &rhs) noexcept
Test for equality.
Definition: interleaved_bloom_filter.hpp:729
friend bool operator!=(binning_bitvector const &lhs, binning_bitvector const &rhs) noexcept
Test for inequality.
Definition: interleaved_bloom_filter.hpp:735
Manages membership queries for the seqan3::interleaved_bloom_filter.
Definition: interleaved_bloom_filter.hpp:572
~membership_agent()=default
Defaulted.
membership_agent()=default
Defaulted.
membership_agent & operator=(membership_agent const &)=default
Defaulted.
membership_agent(membership_agent const &)=default
Defaulted.
membership_agent & operator=(membership_agent &&)=default
Defaulted.
membership_agent(membership_agent &&)=default
Defaulted.
binning_bitvector const & bulk_contains(size_t const value) &noexcept
Determines set membership of a given value.
Definition: interleaved_bloom_filter.hpp:625
binning_bitvector result_buffer
Stores the result of bulk_contains().
Definition: interleaved_bloom_filter.hpp:603
The IBF binning directory. A data structure that efficiently answers set-membership queries for multi...
Definition: interleaved_bloom_filter.hpp:134
interleaved_bloom_filter(seqan3::bin_count bins_, seqan3::bin_size size, seqan3::hash_function_count funs=seqan3::hash_function_count{2u})
Construct an uncompressed Interleaved Bloom Filter.
Definition: interleaved_bloom_filter.hpp:221
interleaved_bloom_filter & operator=(interleaved_bloom_filter const &)=default
Defaulted.
size_t hash_function_count() const noexcept
Returns the number of hash functions used in the Interleaved Bloom Filter.
Definition: interleaved_bloom_filter.hpp:460
constexpr data_type const & raw_data() const noexcept
Provides direct, unsafe access to the underlying data structure.
Definition: interleaved_bloom_filter.hpp:533
interleaved_bloom_filter()=default
Defaulted.
counting_agent_type< value_t > counting_agent() const
Returns a seqan3::interleaved_bloom_filter::counting_agent_type to be used for counting.
Definition: interleaved_bloom_filter.hpp:448
constexpr data_type & raw_data() noexcept
Provides direct, unsafe access to the underlying data structure.
Definition: interleaved_bloom_filter.hpp:527
membership_agent membership_agent() const
Returns a seqan3::interleaved_bloom_filter::membership_agent to be used for lookup.
Definition: interleaved_bloom_filter.hpp:431
void emplace(size_t const value, bin_index const bin) noexcept
Inserts a value into a specific bin.
Definition: interleaved_bloom_filter.hpp:283
interleaved_bloom_filter & operator=(interleaved_bloom_filter &&)=default
Defaulted.
friend bool operator!=(interleaved_bloom_filter const &lhs, interleaved_bloom_filter const &rhs) noexcept
Test for inequality.
Definition: interleaved_bloom_filter.hpp:511
interleaved_bloom_filter(interleaved_bloom_filter &&)=default
Defaulted.
void clear(bin_index const bin) noexcept
Clears a specific bin.
Definition: interleaved_bloom_filter.hpp:309
size_t bin_count() const noexcept
Returns the number of bins that the Interleaved Bloom Filter manages.
Definition: interleaved_bloom_filter.hpp:468
size_t bin_size() const noexcept
Returns the size of a single bin that the Interleaved Bloom Filter manages.
Definition: interleaved_bloom_filter.hpp:476
size_t bit_size() const noexcept
Returns the size of the underlying bitvector.
Definition: interleaved_bloom_filter.hpp:484
interleaved_bloom_filter(interleaved_bloom_filter const &)=default
Defaulted.
~interleaved_bloom_filter()=default
Defaulted.
void clear(rng_t &&bin_range) noexcept
Clears a range of bins.
Definition: interleaved_bloom_filter.hpp:336
friend bool operator==(interleaved_bloom_filter const &lhs, interleaved_bloom_filter const &rhs) noexcept
Test for equality.
Definition: interleaved_bloom_filter.hpp:498
static constexpr data_layout data_layout_mode
Indicates whether the Interleaved Bloom Filter is compressed.
Definition: interleaved_bloom_filter.hpp:192
void increase_bin_number_to(bin_count const new_bins_)
Increases the number of bins stored in the Interleaved Bloom Filter.
Definition: interleaved_bloom_filter.hpp:374
interleaved_bloom_filter(interleaved_bloom_filter< data_layout::uncompressed > const &ibf)
Construct a compressed Interleaved Bloom Filter.
Definition: interleaved_bloom_filter.hpp:256
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
data_layout
Determines if the Interleaved Bloom Filter is compressed.
Definition: interleaved_bloom_filter.hpp:28
@ uncompressed
The Interleaved Bloom Filter is uncompressed.
Definition: interleaved_bloom_filter.hpp:29
@ compressed
The Interleaved Bloom Filter is compressed.
Definition: interleaved_bloom_filter.hpp:30
constexpr int countl_zero(T x) noexcept
Returns the number of consecutive 0 bits in the value of x, starting from the most significant bit ("...
Definition: bit:210
constexpr int countr_zero(T x) noexcept
Returns the number of consecutive 0 bits in the value of x, starting from the least significant bit (...
Definition: bit:228
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:151
The main SeqAn3 namespace.
Definition: cigar_operation_table.hpp:2
SeqAn specific customisations in the standard namespace.
Provides basic data structure for strong types.
A strong type that represents the number of bins for the seqan3::interleaved_bloom_filter.
Definition: interleaved_bloom_filter.hpp:36
A strong type that represents the bin index for the seqan3::interleaved_bloom_filter.
Definition: interleaved_bloom_filter.hpp:57
A strong type that represents the number of bits for each bin in the seqan3::interleaved_bloom_filter...
Definition: interleaved_bloom_filter.hpp:43
A strong type that represents the number of hash functions for the seqan3::interleaved_bloom_filter.
Definition: interleaved_bloom_filter.hpp:50
strong_type for seed.
Definition: minimiser_hash.hpp:25