HIBF 1.0.0-rc.1
All Classes Namespaces Files Functions Variables Typedefs Friends Macros Modules Pages Concepts
hyperloglog.hpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: 2006-2025, Knut Reinert & Freie Universität Berlin
2// SPDX-FileCopyrightText: 2016-2025, Knut Reinert & MPI für molekulare Genetik
3// SPDX-FileCopyrightText: 2013 Hideaki Ohno <hide.o.j55{at}gmail.com>
4// SPDX-License-Identifier: BSD-3-Clause AND MIT
5
12#pragma once
13
14#include <array> // for array
15#include <cstddef> // for size_t
16#include <cstdint> // for uint64_t, uint8_t, uint32_t
17#include <iosfwd> // for istream, ostream
18#include <vector> // for vector
19
20#include <cereal/access.hpp> // for access
21#include <cereal/cereal.hpp> // for make_nvp, CEREAL_NVP
22
23#include <hibf/contrib/aligned_allocator.hpp> // for aligned_allocator
24#include <hibf/platform.hpp>
25
26namespace seqan::hibf::sketch
27{
28
36{
37public:
48 hyperloglog(uint8_t const num_bits = 5u);
49 hyperloglog(hyperloglog const &) = default;
50 hyperloglog & operator=(hyperloglog const &) = default;
51 hyperloglog(hyperloglog &&) = default;
53 ~hyperloglog() = default;
54
56
60 void add(uint64_t const value);
61
65 double estimate() const;
66
74 void merge(hyperloglog const & other);
75
83 double merge_and_estimate(hyperloglog const & other);
84
88 void reset();
89
94 uint64_t data_size() const
95 {
96 return size;
97 }
98
103 void store(std::ostream & os) const;
104
109 void load(std::istream & is);
110
111private:
113 static constexpr std::array<float, 61> expectation_values = []() constexpr
114 {
115 std::array<float, 61> result{};
116 for (size_t i = 0; i < 61; ++i)
117 result[i] = 1.0f / (1ULL << i);
118 return result;
119 }();
120
122 uint8_t bits{};
124 uint64_t size{};
126 uint64_t rank_mask{};
128 double normalization_factor{};
131
132 friend class cereal::access;
133
134 template <typename archive_t>
135 void serialize(archive_t & archive)
136 {
137 uint32_t version{1};
138 archive(CEREAL_NVP(version));
139
140 archive(CEREAL_NVP(bits));
141 archive(CEREAL_NVP(size));
142 archive(CEREAL_NVP(rank_mask));
143 archive(CEREAL_NVP(normalization_factor));
144 archive(CEREAL_NVP(data));
145 }
146};
147
148} // namespace seqan::hibf::sketch
HyperLogLog estimates.
Definition hyperloglog.hpp:36
double merge_and_estimate(hyperloglog const &other)
Merges another hyperloglog and returns the new estimate.
hyperloglog & operator=(hyperloglog const &)=default
Defaulted.
uint64_t data_size() const
Returns size of the internal data.
Definition hyperloglog.hpp:94
hyperloglog(hyperloglog &&)=default
Defaulted.
double estimate() const
Estimates cardinality value.
hyperloglog(hyperloglog const &)=default
Defaulted.
hyperloglog(uint8_t const num_bits=5u)
Default constructor.
~hyperloglog()=default
Defaulted.
void add(uint64_t const value)
Adds a value.
void reset()
Clears added values. The size is unaffected.
void load(std::istream &is)
Loads the hyperloglog from a stream.
hyperloglog & operator=(hyperloglog &&)=default
Defaulted.
void store(std::ostream &os) const
Write the hyperloglog to a stream.
void merge(hyperloglog const &other)
Merges another hyperloglog into this object.
Provides platform and dependency checks.