HIBF 1.0.0-rc.1
All Classes Namespaces Files Functions Variables Typedefs Friends Macros Modules Pages Concepts
minhashes.hpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: 2006-2025, Knut Reinert & Freie Universität Berlin
2// SPDX-FileCopyrightText: 2016-2025, Knut Reinert & MPI für molekulare Genetik
3// SPDX-License-Identifier: BSD-3-Clause
4
10#pragma once
11
12#include <cstddef> // for size_t
13#include <cstdint> // for uint64_t, uint32_t
14#include <span> // for span
15#include <vector> // for vector
16
17#include <cereal/access.hpp> // for access
18#include <cereal/cereal.hpp> // for make_nvp, CEREAL_NVP
19
20#include <hibf/platform.hpp>
21
22namespace seqan::hibf::sketch
23{
24
36{
37 static constexpr uint64_t register_id_mask{15};
38 static constexpr size_t num_sketches{16};
39 static constexpr size_t sketch_size{40};
40
42 std::vector<std::vector<uint64_t>> table{}; // Each element (vector<uint64_t>) is a minhash.
43
47 minhashes() = default;
48 minhashes(minhashes const &) = default;
49 minhashes & operator=(minhashes const &) = default;
50 minhashes(minhashes &&) = default;
51 minhashes & operator=(minhashes &&) = default;
52 ~minhashes() = default;
54 minhashes(std::vector<uint64_t> const & smallest_values);
56
58 bool is_valid() const;
59
61 void fill_incomplete_sketches(std::span<uint64_t> const & more_smallest_values);
62
64 static void push_to_heap_if_smaller(uint64_t const value, std::vector<uint64_t> & heap);
65
66private:
67 friend class cereal::access;
68
69 template <typename archive_t>
70 void serialize(archive_t & archive)
71 {
72 uint32_t version{1};
73 archive(CEREAL_NVP(version));
74
75 // other members are const static currently
76 archive(CEREAL_NVP(table));
77 }
78};
79
80} // namespace seqan::hibf::sketch
Provides platform and dependency checks.
MinHash sketches design to be used for Locality sensitive hashing.
Definition minhashes.hpp:36
minhashes(minhashes const &)=default
Defaulted.
minhashes(std::vector< uint64_t > const &smallest_values)
construct from a vector of the smallest values in a set (sorted ascending).
minhashes()=default
Defaulted.
bool is_valid() const
Checks whether the minHash table is completely filled.
static void push_to_heap_if_smaller(uint64_t const value, std::vector< uint64_t > &heap)
Pushes value to the heap if it is smaller than the current largest element.
static constexpr size_t num_sketches
...00001111
Definition minhashes.hpp:38
minhashes(minhashes &&)=default
Defaulted.
minhashes & operator=(minhashes const &)=default
Defaulted.
minhashes & operator=(minhashes &&)=default
Defaulted.
std::vector< std::vector< uint64_t > > table
A table of sketches. For LSH we need multiple sketches, stored in a table.
Definition minhashes.hpp:42
void fill_incomplete_sketches(std::span< uint64_t > const &more_smallest_values)
Adds more minhash values to an existing but incomplete table.