 |
SeqAn3
3.0.2
The Modern C++ library for sequence analysis.
|
|
Go to the documentation of this file.
15 #include <sdsl/bit_vectors.hpp>
36 struct bin_count :
public detail::strong_type<size_t, bin_count, detail::strong_type_skill::convert>
38 using detail::strong_type<size_t, bin_count, detail::strong_type_skill::convert>::strong_type;
42 struct bin_size :
public detail::strong_type<size_t, bin_size, detail::strong_type_skill::convert>
44 using detail::strong_type<size_t, bin_size, detail::strong_type_skill::convert>::strong_type;
48 struct hash_function_count :
public detail::strong_type<size_t, hash_function_count, detail::strong_type_skill::convert>
50 using detail::strong_type<size_t, hash_function_count, detail::strong_type_skill::convert>::strong_type;
54 struct bin_index :
public detail::strong_type<size_t, bin_index, detail::strong_type_skill::convert>
56 using detail::strong_type<size_t, bin_index, detail::strong_type_skill::convert>::strong_type;
125 template <data_layout data_layout_mode_ = data_layout::uncompressed>
130 template <data_layout data_layout_mode>
144 size_t technical_bins{};
157 13043817825332782213ULL,
158 10650232656628343401ULL,
159 16499269484942379435ULL,
160 4893150838803335377ULL};
169 inline constexpr
size_t hash_and_fit(
size_t h,
size_t const seed)
const
172 assert(hash_shift < 64);
173 h ^= h >> hash_shift;
174 h *= 11400714819323198485ULL;
176 #ifdef __SIZEOF_INT128__
177 h =
static_cast<uint64_t
>((
static_cast<__uint128_t
>(h) *
static_cast<__uint128_t
>(bin_size_)) >> 64);
220 bin_size_ =
size.get();
221 hash_funs = funs.get();
225 if (hash_funs == 0 || hash_funs > 5)
226 throw std::logic_error{
"The number of hash functions must be > 0 and <= 5."};
230 hash_shift = detail::count_leading_zeros(bin_size_);
231 bin_words = (bins + 63) >> 6;
232 technical_bins = bin_words << 6;
233 data = sdsl::bit_vector(technical_bins * bin_size_);
252 std::tie(bins, technical_bins, bin_size_, hash_shift, bin_words, hash_funs) =
253 std::tie(ibf.bins, ibf.technical_bins, ibf.bin_size_, ibf.hash_shift, ibf.bin_words, ibf.hash_funs);
255 data = sdsl::sd_vector<>{ibf.data};
279 assert(bin.get() < bins);
280 for (
size_t i = 0; i < hash_funs; ++i)
282 size_t idx = hash_and_fit(value, hash_seeds[i]);
284 assert(idx < data.size());
317 size_t new_bins = new_bins_.get();
323 size_t new_bin_words = (new_bins + 63) >> 6;
327 if (new_bin_words == bin_words)
330 size_t new_technical_bins = new_bin_words << 6;
331 size_t new_bits = bin_size_ * new_technical_bins;
333 size_t idx_{new_bits}, idx{data.size()};
334 size_t delta = new_technical_bins - technical_bins + 64;
336 data.resize(new_bits);
338 for (
size_t i = idx_, j = idx; j > 0; i -= new_technical_bins, j -= technical_bins)
340 size_t stop = i - new_technical_bins;
342 for (
size_t ii = i - delta, jj = j - 64; stop && ii >= stop; ii -= 64, jj -= 64)
344 uint64_t old = data.get_int(jj);
346 data.set_int(ii, old);
350 bin_words = new_bin_words;
351 technical_bins = new_technical_bins;
421 return std::tie(lhs.bins, lhs.technical_bins, lhs.bin_size_, lhs.hash_shift, lhs.bin_words, lhs.hash_funs,
423 std::tie(rhs.bins, rhs.technical_bins, rhs.bin_size_, rhs.hash_shift, rhs.bin_words, rhs.hash_funs,
434 return !(lhs == rhs);
445 template <cereal_archive archive_t>
446 void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t & archive)
449 archive(technical_bins);
469 template <data_layout data_layout_mode>
477 ibf_t const * ibf_ptr{
nullptr};
498 result_buffer.resize(ibf_ptr->bin_count());
527 assert(ibf_ptr !=
nullptr);
528 assert(result_buffer.
size() == ibf_ptr->bin_count());
531 std::memcpy(&bloom_filter_indices, &ibf_ptr->hash_seeds,
sizeof(
size_t) * ibf_ptr->hash_funs);
533 for (
size_t i = 0; i < ibf_ptr->hash_funs; ++i)
534 bloom_filter_indices[i] = ibf_ptr->hash_and_fit(value, bloom_filter_indices[i]);
536 for (
size_t batch = 0; batch < ibf_ptr->bin_words; ++batch)
539 for (
size_t i = 0; i < ibf_ptr->hash_funs; ++i)
541 assert(bloom_filter_indices[i] < ibf_ptr->
data.size());
542 tmp &= ibf_ptr->data.get_int(bloom_filter_indices[i]);
543 bloom_filter_indices[i] += 64;
546 result_buffer.set_int(batch << 6, tmp);
549 return result_buffer;
554 [[nodiscard]] binning_bitvector
const & bulk_contains(
size_t const value) && noexcept =
delete;
560 template <data_layout data_layout_mode>
575 using sdsl::bit_vector::begin;
576 using sdsl::bit_vector::end;
577 using sdsl::bit_vector::operator==;
578 using sdsl::bit_vector::operator[];
581 #if SEQAN3_DOXYGEN_ONLY(1)0
593 bool operator==(bit_vector const & other) const noexcept;
604 using sdsl::bit_vector::resize;
605 using sdsl::bit_vector::set_int;
606 template <std::
integral value_t>
632 template <std::
integral value_t>
649 using base_t::base_t;
664 template <
typename rhs_t>
670 assert(this->
size() >= rhs.size());
673 for (
size_t batch = 0, bin = 0; batch < ((rhs.size() + 63) >> 6); bin = 64 * ++batch)
675 size_t tmp = rhs.get_int(batch * 64);
676 if (tmp ^ (1ULL<<63))
681 uint8_t step = detail::count_trailing_zeros(tmp);
membership_agent membership_agent() const
Returns seqan3::interleaved_bloom_filter::membership_agent to be used for lookup.
Definition: interleaved_bloom_filter.hpp:369
binning_bitvector & operator=(binning_bitvector const &)=default
Defaulted.
IMPLEMENTATION_DEFINED const_reference_t
The const_reference type of the binning_bitvector;.
Definition: interleaved_bloom_filter.hpp:587
data_layout
Determines if the Interleaved Bloom Filter is compressed.
Definition: interleaved_bloom_filter.hpp:30
binning_bitvector result_buffer
Stores the result of bulk_contains().
Definition: interleaved_bloom_filter.hpp:499
Provides utility functions for bit twiddling.
membership_agent & operator=(membership_agent &&)=default
Defaulted.
size_t hash_function_count() const noexcept
Returns the number of hash functions used in the Interleaved Bloom Filter.
Definition: interleaved_bloom_filter.hpp:381
binning_bitvector & operator=(binning_bitvector &&)=default
Defaulted.
interleaved_bloom_filter & operator=(interleaved_bloom_filter &&)=default
Defaulted.
@ compressed
The Interleaved Bloom Filter is compressed.
Definition: interleaved_bloom_filter.hpp:32
A data structure that behaves like a std::vector and can be used to consolidate the results of multip...
Definition: interleaved_bloom_filter.hpp:634
A strong type that represents the number of hash functions for the seqan3::interleaved_bloom_filter.
Definition: interleaved_bloom_filter.hpp:49
Provides basic data structure for strong types.
The IBF binning directory. A data structure that efficiently answers set-membership queries for multi...
Definition: interleaved_bloom_filter.hpp:127
membership_agent(membership_agent const &)=default
Defaulted.
binning_bitvector()=default
Defaulted.
membership_agent(membership_agent &&)=default
Defaulted.
Manages membership queries for the seqan3::interleaved_bloom_filter.
Definition: interleaved_bloom_filter.hpp:471
counting_vector(counting_vector const &)=default
Defaulted.
strong_type for seed.
Definition: minimiser_hash.hpp:24
interleaved_bloom_filter(interleaved_bloom_filter &&)=default
Defaulted.
Adaptations of algorithms from the Ranges TS.
counting_vector & operator+=(rhs_t const &rhs)
Bin-wise adds the bits of a seqan3::interleaved_bloom_filter::membership_agent::binning_bitvector.
Definition: interleaved_bloom_filter.hpp:668
interleaved_bloom_filter(interleaved_bloom_filter< data_layout::uncompressed > const &ibf)
Construct a compressed Interleaved Bloom Filter.
Definition: interleaved_bloom_filter.hpp:247
~interleaved_bloom_filter()=default
Defaulted.
counting_vector & operator=(counting_vector const &)=default
Defaulted.
counting_vector & operator=(counting_vector &&)=default
Defaulted.
~counting_vector()=default
Defaulted.
A strong type that represents the bin index for the seqan3::interleaved_bloom_filter.
Definition: interleaved_bloom_filter.hpp:55
size_t bit_size() const noexcept
Returns the size of the underlying bitvector.
Definition: interleaved_bloom_filter.hpp:405
friend bool operator!=(interleaved_bloom_filter const &lhs, interleaved_bloom_filter const &rhs) noexcept
Test for inequality.
Definition: interleaved_bloom_filter.hpp:432
iterator_t begin() noexcept
Returns an iterator to the begin of the bitvector.
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
A strong type that represents the number of bins for the seqan3::interleaved_bloom_filter.
Definition: interleaved_bloom_filter.hpp:37
A strong type that represents the number of bits for each bin in the seqan3::interleaved_bloom_filter...
Definition: interleaved_bloom_filter.hpp:43
size_t bin_count() const noexcept
Returns the number of bins that the Interleaved Bloom Filter manages.
Definition: interleaved_bloom_filter.hpp:389
size_t bin_size() const noexcept
Returns the size of a single bin that the Interleaved Bloom Filter manages.
Definition: interleaved_bloom_filter.hpp:397
interleaved_bloom_filter & operator=(interleaved_bloom_filter const &)=default
Defaulted.
size_t size() noexcept
Returns the size of the bitvector.
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:116
void increase_bin_number_to(bin_count const new_bins_)
Increases the number of bins stored in the Interleaved Bloom Filter.
Definition: interleaved_bloom_filter.hpp:312
membership_agent()=default
Defaulted.
SeqAn specific customisations in the standard namespace.
interleaved_bloom_filter()=default
Defaulted.
A bitvector representing the result of a call to bulk_contains of the seqan3::interleaved_bloom_filte...
Definition: interleaved_bloom_filter.hpp:562
interleaved_bloom_filter(interleaved_bloom_filter const &)=default
Defaulted.
counting_vector()=default
Defaulted.
IMPLEMENTATION_DEFINED reference_t
The reference type of the binning_bitvector;.
Definition: interleaved_bloom_filter.hpp:585
binning_bitvector(binning_bitvector &&)=default
Defaulted.
friend bool operator==(interleaved_bloom_filter const &lhs, interleaved_bloom_filter const &rhs) noexcept
Test for equality.
Definition: interleaved_bloom_filter.hpp:419
Adaptions of concepts from the Cereal library.
~binning_bitvector()=default
binning_bitvector(binning_bitvector const &)=default
Defaulted.
~membership_agent()=default
Defaulted.
IMPLEMENTATION_DEFINED iterator_t
The iterator type of the binning_bitvector;.
Definition: interleaved_bloom_filter.hpp:583
interleaved_bloom_filter(seqan3::bin_count bins_, seqan3::bin_size size, seqan3::hash_function_count funs=seqan3::hash_function_count{2u})
Construct an uncompressed Interleaved Bloom Filter.
Definition: interleaved_bloom_filter.hpp:212
@ uncompressed
The Interleaved Bloom Filter is uncompressed.
Definition: interleaved_bloom_filter.hpp:31
static constexpr data_layout data_layout_mode
Indicates whether the Interleaved Bloom Filter is compressed.
Definition: interleaved_bloom_filter.hpp:187
membership_agent & operator=(membership_agent const &)=default
Defaulted.
void emplace(size_t const value, bin_index const bin)
Inserts a value into a specific bin.
Definition: interleaved_bloom_filter.hpp:274
binning_bitvector const & bulk_contains(size_t const value) &noexcept
Determines set membership of a given value.
Definition: interleaved_bloom_filter.hpp:525
counting_vector(counting_vector &&)=default
Defaulted.