18 #include <sdsl/bit_vectors.hpp>
37 struct bin_count :
public detail::strong_type<size_t, bin_count, detail::strong_type_skill::convert>
39 using detail::strong_type<size_t, bin_count, detail::strong_type_skill::convert>::strong_type;
43 struct bin_size :
public detail::strong_type<size_t, bin_size, detail::strong_type_skill::convert>
45 using detail::strong_type<size_t, bin_size, detail::strong_type_skill::convert>::strong_type;
49 struct hash_function_count :
public detail::strong_type<size_t, hash_function_count, detail::strong_type_skill::convert>
51 using detail::strong_type<size_t, hash_function_count, detail::strong_type_skill::convert>::strong_type;
55 struct bin_index :
public detail::strong_type<size_t, bin_index, detail::strong_type_skill::convert>
57 using detail::strong_type<size_t, bin_index, detail::strong_type_skill::convert>::strong_type;
130 template <data_layout data_layout_mode_ = data_layout::uncompressed>
135 template <data_layout data_layout_mode>
147 size_t technical_bins{};
160 13043817825332782213ULL,
161 10650232656628343401ULL,
162 16499269484942379435ULL,
163 4893150838803335377ULL};
172 inline constexpr
size_t hash_and_fit(
size_t h,
size_t const seed)
const
175 assert(hash_shift < 64);
176 h ^= h >> hash_shift;
177 h *= 11400714819323198485ULL;
179 #ifdef __SIZEOF_INT128__
180 h =
static_cast<uint64_t
>((
static_cast<__uint128_t
>(h) *
static_cast<__uint128_t
>(bin_size_)) >> 64);
193 template <std::
integral value_t>
227 bin_size_ =
size.get();
228 hash_funs = funs.get();
232 if (hash_funs == 0 || hash_funs > 5)
233 throw std::logic_error{
"The number of hash functions must be > 0 and <= 5."};
238 bin_words = (bins + 63) >> 6;
239 technical_bins = bin_words << 6;
240 data = sdsl::bit_vector(technical_bins * bin_size_);
259 std::tie(bins, technical_bins, bin_size_, hash_shift, bin_words, hash_funs) =
260 std::tie(ibf.bins, ibf.technical_bins, ibf.bin_size_, ibf.hash_shift, ibf.bin_words, ibf.hash_funs);
262 data = sdsl::sd_vector<>{ibf.data};
286 assert(bin.get() < bins);
287 for (
size_t i = 0; i < hash_funs; ++i)
289 size_t idx = hash_and_fit(value, hash_seeds[i]);
291 assert(idx < data.size());
312 assert(bin.get() < bins);
313 for (
size_t idx = bin.get(), i = 0; i < bin_size_; idx += technical_bins, ++i)
330 template <
typename rng_t>
334 void
clear(rng_t && bin_range) noexcept
336 static_assert(std::ranges::forward_range<rng_t>,
"The range of bins to clear must model a forward_range.");
338 "The reference type of the range to clear must be seqan3::bin_index.");
340 for (
auto && bin : bin_range)
341 assert(bin.get() < bins);
344 for (
size_t offset = 0, i = 0; i < bin_size_;
offset += technical_bins, ++i)
345 for (
auto && bin : bin_range)
346 data[bin.get() +
offset] = 0;
377 size_t new_bins = new_bins_.get();
383 size_t new_bin_words = (new_bins + 63) >> 6;
387 if (new_bin_words == bin_words)
390 size_t new_technical_bins = new_bin_words << 6;
391 size_t new_bits = bin_size_ * new_technical_bins;
393 size_t idx_{new_bits}, idx{data.size()};
394 size_t delta = new_technical_bins - technical_bins + 64;
396 data.resize(new_bits);
398 for (
size_t i = idx_, j = idx; j > 0; i -= new_technical_bins, j -= technical_bins)
400 size_t stop = i - new_technical_bins;
402 for (
size_t ii = i - delta, jj = j - 64; stop && ii >= stop; ii -= 64, jj -= 64)
404 uint64_t old = data.get_int(jj);
406 data.set_int(ii, old);
410 bin_words = new_bin_words;
411 technical_bins = new_technical_bins;
445 template <
typename value_t = u
int16_t>
498 return std::tie(lhs.bins, lhs.technical_bins, lhs.bin_size_, lhs.hash_shift, lhs.bin_words, lhs.hash_funs,
500 std::tie(rhs.bins, rhs.technical_bins, rhs.bin_size_, rhs.hash_shift, rhs.bin_words, rhs.hash_funs,
511 return !(lhs == rhs);
544 template <cereal_archive archive_t>
545 void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t & archive)
548 archive(technical_bins);
568 template <data_layout data_layout_mode>
576 ibf_t const * ibf_ptr{
nullptr};
596 ibf_ptr(
std::addressof(ibf)), result_buffer(ibf.
bin_count())
625 assert(ibf_ptr !=
nullptr);
626 assert(result_buffer.
size() == ibf_ptr->bin_count());
629 std::memcpy(&bloom_filter_indices, &ibf_ptr->hash_seeds,
sizeof(
size_t) * ibf_ptr->hash_funs);
631 for (
size_t i = 0; i < ibf_ptr->hash_funs; ++i)
632 bloom_filter_indices[i] = ibf_ptr->hash_and_fit(value, bloom_filter_indices[i]);
634 for (
size_t batch = 0; batch < ibf_ptr->bin_words; ++batch)
637 for (
size_t i = 0; i < ibf_ptr->hash_funs; ++i)
639 assert(bloom_filter_indices[i] < ibf_ptr->
data.size());
640 tmp &= ibf_ptr->data.get_int(bloom_filter_indices[i]);
641 bloom_filter_indices[i] += 64;
644 result_buffer.data.set_int(batch << 6, tmp);
647 return result_buffer;
652 [[nodiscard]] binning_bitvector
const & bulk_contains(
size_t const value) && noexcept =
delete;
658 template <data_layout data_layout_mode>
663 using data_type = sdsl::bit_vector;
669 template <std::
integral value_t>
729 return lhs.data == rhs.data;
735 return !(lhs == rhs);
738 #ifdef SEQAN3_DEPRECATED_310
743 return lhs.data == rhs;
750 return lhs == rhs.data;
757 return !(lhs.data == rhs);
764 return !(lhs == rhs.data);
799 constexpr data_type
const &
raw_data() const noexcept
828 template <std::
integral value_t>
845 using base_t::base_t;
860 template <
typename rhs_t>
862 requires std::same_as<rhs_t,
870 assert(this->
size() >= rhs.size());
873 for (
size_t batch = 0, bin = 0; batch < ((rhs.size() + 63) >> 6); bin = 64 * ++batch)
875 size_t tmp = rhs.data.get_int(batch * 64);
876 if (tmp ^ (1ULL<<63))
924 template <data_layout data_layout_mode>
925 template <std::
integral value_t>
933 ibf_t const * ibf_ptr{
nullptr};
983 template <std::ranges::range value_range_t>
986 assert(ibf_ptr !=
nullptr);
987 assert(result_buffer.
size() == ibf_ptr->bin_count());
989 static_assert(std::ranges::input_range<value_range_t>,
"The values must model input_range.");
990 static_assert(std::unsigned_integral<std::ranges::range_value_t<value_range_t>>,
991 "An individual value must be an unsigned integral.");
993 std::ranges::fill(result_buffer, 0);
995 for (
auto && value : values)
998 return result_buffer;
1003 template <std::ranges::range value_range_t>
Adaptations of algorithms from the Ranges TS.
Provides the C++20 <bit> header if it is not already available.
Adaptions of concepts from the Cereal library.
A data structure that behaves like a std::vector and can be used to consolidate the results of multip...
Definition: interleaved_bloom_filter.hpp:830
counting_vector & operator+=(rhs_t const &rhs)
Bin-wise adds the bits of a seqan3::interleaved_bloom_filter::membership_agent::binning_bitvector.
Definition: interleaved_bloom_filter.hpp:868
counting_vector(counting_vector const &)=default
Defaulted.
~counting_vector()=default
Defaulted.
counting_vector(counting_vector &&)=default
Defaulted.
counting_vector & operator=(counting_vector &&)=default
Defaulted.
counting_vector()=default
Defaulted.
counting_vector & operator=(counting_vector const &)=default
Defaulted.
counting_vector & operator+=(counting_vector const &rhs)
Bin-wise addition of two seqan3::counting_vectors.
Definition: interleaved_bloom_filter.hpp:905
Manages counting ranges of values for the seqan3::interleaved_bloom_filter.
Definition: interleaved_bloom_filter.hpp:927
counting_agent_type & operator=(counting_agent_type const &)=default
Defaulted.
~counting_agent_type()=default
Defaulted.
counting_vector< value_t > const & bulk_count(value_range_t &&values) &noexcept
Counts the occurrences in each bin for all values in a range.
Definition: interleaved_bloom_filter.hpp:984
counting_agent_type()=default
Defaulted.
counting_agent_type(counting_agent_type &&)=default
Defaulted.
counting_agent_type(counting_agent_type const &)=default
Defaulted.
counting_agent_type & operator=(counting_agent_type &&)=default
Defaulted.
counting_vector< value_t > result_buffer
Stores the result of bulk_count().
Definition: interleaved_bloom_filter.hpp:959
A bitvector representing the result of a call to bulk_contains of the seqan3::interleaved_bloom_filte...
Definition: interleaved_bloom_filter.hpp:660
auto end() noexcept
Returns an iterator to the element following the last element of the container.
Definition: interleaved_bloom_filter.hpp:711
auto end() const noexcept
Returns an iterator to the element following the last element of the container.
Definition: interleaved_bloom_filter.hpp:717
binning_bitvector()=default
Defaulted.
SEQAN3_DEPRECATED_310 friend bool operator==(binning_bitvector const &lhs, sdsl::bit_vector const &rhs) noexcept
Test for equality.
Definition: interleaved_bloom_filter.hpp:741
SEQAN3_DEPRECATED_310 friend bool operator!=(sdsl::bit_vector const &lhs, binning_bitvector const &rhs) noexcept
Test for inequality.
Definition: interleaved_bloom_filter.hpp:762
binning_bitvector & operator=(binning_bitvector const &)=default
Defaulted.
binning_bitvector(size_t const size)
Construct with given size.
Definition: interleaved_bloom_filter.hpp:684
SEQAN3_DEPRECATED_310 friend bool operator==(sdsl::bit_vector const &lhs, binning_bitvector const &rhs) noexcept
Test for equality.
Definition: interleaved_bloom_filter.hpp:748
auto operator[](size_t const i) const noexcept
Return the i-th element.
Definition: interleaved_bloom_filter.hpp:780
size_t size() const noexcept
Returns the number of elements.
Definition: interleaved_bloom_filter.hpp:690
auto begin() noexcept
Returns an iterator to the first element of the container.
Definition: interleaved_bloom_filter.hpp:699
binning_bitvector(binning_bitvector &&)=default
Defaulted.
~binning_bitvector()=default
Defaulted.
constexpr data_type const & raw_data() const noexcept
Provides direct, unsafe access to the underlying data structure.
Definition: interleaved_bloom_filter.hpp:799
auto begin() const noexcept
Returns an iterator to the first element of the container.
Definition: interleaved_bloom_filter.hpp:705
constexpr data_type & raw_data() noexcept
Provides direct, unsafe access to the underlying data structure.
Definition: interleaved_bloom_filter.hpp:793
SEQAN3_DEPRECATED_310 friend bool operator!=(binning_bitvector const &lhs, sdsl::bit_vector const &rhs) noexcept
Test for inequality.
Definition: interleaved_bloom_filter.hpp:755
binning_bitvector & operator=(binning_bitvector &&)=default
Defaulted.
auto operator[](size_t const i) noexcept
Return the i-th element.
Definition: interleaved_bloom_filter.hpp:773
binning_bitvector(binning_bitvector const &)=default
Defaulted.
friend bool operator==(binning_bitvector const &lhs, binning_bitvector const &rhs) noexcept
Test for equality.
Definition: interleaved_bloom_filter.hpp:727
friend bool operator!=(binning_bitvector const &lhs, binning_bitvector const &rhs) noexcept
Test for inequality.
Definition: interleaved_bloom_filter.hpp:733
Manages membership queries for the seqan3::interleaved_bloom_filter.
Definition: interleaved_bloom_filter.hpp:570
~membership_agent()=default
Defaulted.
membership_agent()=default
Defaulted.
membership_agent(membership_agent const &)=default
Defaulted.
membership_agent & operator=(membership_agent const &)=default
Defaulted.
binning_bitvector const & bulk_contains(size_t const value) &noexcept
Determines set membership of a given value.
Definition: interleaved_bloom_filter.hpp:623
membership_agent & operator=(membership_agent &&)=default
Defaulted.
membership_agent(membership_agent &&)=default
Defaulted.
binning_bitvector result_buffer
Stores the result of bulk_contains().
Definition: interleaved_bloom_filter.hpp:601
The IBF binning directory. A data structure that efficiently answers set-membership queries for multi...
Definition: interleaved_bloom_filter.hpp:132
interleaved_bloom_filter(seqan3::bin_count bins_, seqan3::bin_size size, seqan3::hash_function_count funs=seqan3::hash_function_count{2u})
Construct an uncompressed Interleaved Bloom Filter.
Definition: interleaved_bloom_filter.hpp:219
size_t hash_function_count() const noexcept
Returns the number of hash functions used in the Interleaved Bloom Filter.
Definition: interleaved_bloom_filter.hpp:458
constexpr data_type const & raw_data() const noexcept
Provides direct, unsafe access to the underlying data structure.
Definition: interleaved_bloom_filter.hpp:531
interleaved_bloom_filter & operator=(interleaved_bloom_filter const &)=default
Defaulted.
interleaved_bloom_filter()=default
Defaulted.
membership_agent membership_agent() const
Returns a seqan3::interleaved_bloom_filter::membership_agent to be used for lookup.
Definition: interleaved_bloom_filter.hpp:429
void emplace(size_t const value, bin_index const bin) noexcept
Inserts a value into a specific bin.
Definition: interleaved_bloom_filter.hpp:281
friend bool operator!=(interleaved_bloom_filter const &lhs, interleaved_bloom_filter const &rhs) noexcept
Test for inequality.
Definition: interleaved_bloom_filter.hpp:509
interleaved_bloom_filter(interleaved_bloom_filter &&)=default
Defaulted.
void clear(bin_index const bin) noexcept
Clears a specific bin.
Definition: interleaved_bloom_filter.hpp:307
size_t bin_count() const noexcept
Returns the number of bins that the Interleaved Bloom Filter manages.
Definition: interleaved_bloom_filter.hpp:466
size_t bin_size() const noexcept
Returns the size of a single bin that the Interleaved Bloom Filter manages.
Definition: interleaved_bloom_filter.hpp:474
size_t bit_size() const noexcept
Returns the size of the underlying bitvector.
Definition: interleaved_bloom_filter.hpp:482
interleaved_bloom_filter(interleaved_bloom_filter const &)=default
Defaulted.
~interleaved_bloom_filter()=default
Defaulted.
friend bool operator==(interleaved_bloom_filter const &lhs, interleaved_bloom_filter const &rhs) noexcept
Test for equality.
Definition: interleaved_bloom_filter.hpp:496
static constexpr data_layout data_layout_mode
Indicates whether the Interleaved Bloom Filter is compressed.
Definition: interleaved_bloom_filter.hpp:190
void increase_bin_number_to(bin_count const new_bins_)
Increases the number of bins stored in the Interleaved Bloom Filter.
Definition: interleaved_bloom_filter.hpp:372
interleaved_bloom_filter(interleaved_bloom_filter< data_layout::uncompressed > const &ibf)
Construct a compressed Interleaved Bloom Filter.
Definition: interleaved_bloom_filter.hpp:254
constexpr data_type & raw_data() noexcept
Provides direct, unsafe access to the underlying data structure.
Definition: interleaved_bloom_filter.hpp:525
counting_agent_type< value_t > counting_agent() const
Returns a seqan3::interleaved_bloom_filter::counting_agent_type to be used for counting.
Definition: interleaved_bloom_filter.hpp:446
interleaved_bloom_filter & operator=(interleaved_bloom_filter &&)=default
Defaulted.
constexpr int countl_zero(T x) noexcept
Returns the number of consecutive 0 bits in the value of x, starting from the most significant bit ("...
Definition: bit:181
constexpr int countr_zero(T x) noexcept
Returns the number of consecutive 0 bits in the value of x, starting from the least significant bit (...
Definition: bit:199
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
data_layout
Determines if the Interleaved Bloom Filter is compressed.
Definition: interleaved_bloom_filter.hpp:31
@ uncompressed
The Interleaved Bloom Filter is uncompressed.
Definition: interleaved_bloom_filter.hpp:32
@ compressed
The Interleaved Bloom Filter is compressed.
Definition: interleaved_bloom_filter.hpp:33
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:151
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
SeqAn specific customisations in the standard namespace.
Provides basic data structure for strong types.
A strong type that represents the number of bins for the seqan3::interleaved_bloom_filter.
Definition: interleaved_bloom_filter.hpp:38
A strong type that represents the bin index for the seqan3::interleaved_bloom_filter.
Definition: interleaved_bloom_filter.hpp:56
A strong type that represents the number of bits for each bin in the seqan3::interleaved_bloom_filter...
Definition: interleaved_bloom_filter.hpp:44
A strong type that represents the number of hash functions for the seqan3::interleaved_bloom_filter.
Definition: interleaved_bloom_filter.hpp:50
strong_type for seed.
Definition: minimiser_hash.hpp:24