80template <data_layout data_layout_mode_ = data_layout::uncompressed>
85 template <data_layout data_layout_mode>
91 seqan3::contrib::sdsl::bit_vector,
92 seqan3::contrib::sdsl::sd_vector<>>;
95 size_t size_in_bits{};
104 13'043'817'825'332'782'213ULL,
105 10'650'232'656'628'343'401ULL,
106 16'499'269'484'942'379'435ULL,
107 4'893'150'838'803'335'377ULL};
116 inline constexpr size_t hash_and_fit(
size_t h,
size_t const seed)
const
119 h ^= h >> hash_shift;
120 h *= 11'400'714'819'323'198'485ULL;
122#ifdef __SIZEOF_INT128__
123 h =
static_cast<uint64_t
>((
static_cast<__uint128_t
>(h) *
static_cast<__uint128_t
>(size_in_bits)) >> 64);
159 size_in_bits = size.get();
160 hash_funs = funs.get();
162 if (hash_funs == 0 || hash_funs > 5)
163 throw std::logic_error{
"The number of hash functions must be > 0 and <= 5."};
164 if (size_in_bits == 0)
168 data = seqan3::contrib::sdsl::bit_vector(size_in_bits);
185 std::tie(size_in_bits, hash_shift, hash_funs) =
std::tie(bf.size_in_bits, bf.hash_shift, bf.hash_funs);
187 data = seqan3::contrib::sdsl::sd_vector<>{bf.data};
208 for (
size_t i = 0; i < hash_funs; ++i)
210 size_t idx = hash_and_fit(value, hash_seeds[i]);
211 assert(idx < data.size());
231 seqan3::contrib::sdsl::util::_set_zero_bits(data);
251 for (
size_t i = 0; i < hash_funs; i++)
253 size_t idx = hash_and_fit(value, hash_seeds[i]);
254 assert(idx < data.size());
280 template <std::ranges::range value_range_t>
281 size_t count(value_range_t && values)
const noexcept
283 static_assert(std::ranges::input_range<value_range_t>,
"The values must model input_range.");
284 static_assert(std::unsigned_integral<std::ranges::range_value_t<value_range_t>>,
285 "An individual value must be an unsigned integral.");
289 for (
auto && value : values)
281 size_t count(value_range_t && values)
const noexcept {
…}
326 return std::tie(lhs.size_in_bits, lhs.hash_shift, lhs.hash_funs, lhs.data)
327 ==
std::tie(rhs.size_in_bits, rhs.hash_shift, rhs.hash_funs, rhs.data);
337 return !(lhs == rhs);
370 template <cereal_archive archive_t>
371 void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t & archive)
373 archive(size_in_bits);
The Bloom Filter. A data structure that efficiently answers set-membership queries.
Definition bloom_filter.hpp:82
friend bool operator!=(bloom_filter const &lhs, bloom_filter const &rhs) noexcept
Test for inequality.
Definition bloom_filter.hpp:335
constexpr data_type & raw_data() noexcept
Provides direct, unsafe access to the underlying data structure.
Definition bloom_filter.hpp:351
bloom_filter(bloom_filter &&)=default
Defaulted.
static constexpr data_layout data_layout_mode
Indicates whether the Bloom Filter is compressed.
Definition bloom_filter.hpp:132
constexpr data_type const & raw_data() const noexcept
Provides direct, unsafe access to the underlying data structure.
Definition bloom_filter.hpp:357
bloom_filter & operator=(bloom_filter const &)=default
Defaulted.
bool contains(size_t const value) const noexcept
Check whether a value is present in the Bloom Filter.
Definition bloom_filter.hpp:249
bloom_filter(bloom_filter< data_layout::uncompressed > const &bf)
Construct a compressed Bloom Filter.
Definition bloom_filter.hpp:182
friend bool operator==(bloom_filter const &lhs, bloom_filter const &rhs) noexcept
Test for equality.
Definition bloom_filter.hpp:324
void reset() noexcept
Remove all values from the Bloom Filter by setting all bits to 0.
Definition bloom_filter.hpp:228
bloom_filter(bloom_filter const &)=default
Defaulted.
size_t hash_function_count() const noexcept
Returns the number of hash functions used in the Bloom Filter.
Definition bloom_filter.hpp:302
size_t count(value_range_t &&values) const noexcept
Counts the occurrences for all values in a range.
Definition bloom_filter.hpp:281
bloom_filter()=default
Defaulted.
size_t bit_size() const noexcept
Returns the size of the underlying bitvector.
Definition bloom_filter.hpp:310
bloom_filter & operator=(bloom_filter &&)=default
Defaulted.
void emplace(size_t const value) noexcept
Inserts a value into the Bloom Filter.
Definition bloom_filter.hpp:205
~bloom_filter()=default
Defaulted.
bloom_filter(seqan3::bin_size size, seqan3::hash_function_count funs=seqan3::hash_function_count{2u})
Construct an uncompressed Bloom Filter.
Definition bloom_filter.hpp:156
data_layout
Determines if the Interleaved Bloom Filter is compressed.
Definition interleaved_bloom_filter.hpp:24
@ uncompressed
The Interleaved Bloom Filter is uncompressed.
Definition interleaved_bloom_filter.hpp:25
@ compressed
The Interleaved Bloom Filter is compressed.
Definition interleaved_bloom_filter.hpp:26
Provides seqan3::interleaved_bloom_filter.
The main SeqAn3 namespace.
Definition aligned_sequence_concept.hpp:26
A strong type that represents the number of bits for each bin in the seqan3::interleaved_bloom_filter...
Definition interleaved_bloom_filter.hpp:39
A strong type that represents the number of hash functions for the seqan3::interleaved_bloom_filter.
Definition interleaved_bloom_filter.hpp:46
strong_type for seed.
Definition minimiser_hash.hpp:22