SeqAn3 3.4.0-rc.1
The Modern C++ library for sequence analysis.
Loading...
Searching...
No Matches
fast_istreambuf_iterator.hpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
2// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
3// SPDX-License-Identifier: BSD-3-Clause
4
11#pragma once
12
13#include <algorithm>
14#include <cassert>
15#include <iterator>
16#include <string>
17#include <vector>
18
20
21namespace seqan3::detail
22{
35template <typename char_t, typename traits_t = std::char_traits<char_t>>
36class fast_istreambuf_iterator
37{
38private:
40 stream_buffer_exposer<char_t, traits_t> * stream_buf = nullptr;
41
43 std::string overflow_buffer{};
44
45public:
49 using difference_type = ptrdiff_t;
50 using value_type = char_t;
51 using reference = char_t;
52 using pointer = void;
53 using iterator_category = std::input_iterator_tag;
55
59 fast_istreambuf_iterator() noexcept = default;
60 fast_istreambuf_iterator(fast_istreambuf_iterator const &) noexcept = default;
61 fast_istreambuf_iterator(fast_istreambuf_iterator &&) noexcept = default;
62 fast_istreambuf_iterator & operator=(fast_istreambuf_iterator const &) noexcept = default;
63 fast_istreambuf_iterator & operator=(fast_istreambuf_iterator &&) noexcept = default;
64 ~fast_istreambuf_iterator() noexcept = default;
65
67 explicit fast_istreambuf_iterator(std::basic_streambuf<char_t, traits_t> & ibuf) :
68 stream_buf{reinterpret_cast<stream_buffer_exposer<char_t, traits_t> *>(&ibuf)}
69 {
70 assert(stream_buf != nullptr);
71
72 if (stream_buf->gptr() == stream_buf->egptr()) // If current get area is empty,
73 stream_buf->underflow(); // ensure the stream buffer has content on construction.
74 }
76
78 template <typename record_type>
79 requires std::same_as<std::ranges::range_value_t<record_type>, std::string_view>
80 void cache_record_into(char const record_end, char const field_sep, record_type & raw_record)
81 {
82 bool has_overflowed = false;
83 size_t old_count = 0;
84 char * data_begin = stream_buf->gptr(); // point into stream buffer by default
85 size_t const number_of_fields = raw_record.size();
86 size_t number_of_seen_fields = 0;
87 std::vector<size_t> field_positions(number_of_fields, 0u);
88
89 char const * ptr = stream_buf->gptr();
90
91 auto overflow_into_buffer = [&]()
92 {
93 size_t count = stream_buf->egptr() - stream_buf->gptr();
94 has_overflowed = true;
95 overflow_buffer.resize(old_count + count);
96 std::ranges::copy(stream_buf->gptr(), stream_buf->egptr(), overflow_buffer.data() + old_count);
97
98 old_count += count;
99 stream_buf->gbump(count);
100 stream_buf->underflow();
101 };
102
103 while (number_of_seen_fields < number_of_fields - 1)
104 {
105 ptr = std::find(ptr, static_cast<char const *>(stream_buf->egptr()), field_sep);
106
107 if (ptr != stream_buf->egptr()) // found an end of field
108 {
109 field_positions[number_of_seen_fields] = ptr - stream_buf->gptr() + old_count;
110 ++ptr;
111 ++number_of_seen_fields;
112 }
113 else
114 {
115 overflow_into_buffer();
116 assert(stream_buf->gptr() != stream_buf->egptr()); // stream is not at end after overflow
117 ptr = stream_buf->gptr();
118 }
119 }
120
121 size_t count = 0;
122
123 while (true) // Note: Might run idefinitely in release mode if no record_end is in input.
124 {
125 ptr = std::find(ptr, static_cast<char const *>(stream_buf->egptr()), record_end);
126
127 if (ptr == stream_buf->egptr()) // stop_chr could not be found in current buffer
128 {
129 overflow_into_buffer();
130 assert(stream_buf->gptr() != stream_buf->egptr()); // stream is not at end after overflow
131 ptr = stream_buf->gptr();
132 }
133 else
134 {
135 count = ptr - stream_buf->gptr(); // processed characters until stop_chr has been found
136 break;
137 }
138 }
139
140 if (has_overflowed)
141 {
142 // need to copy last data
143 overflow_buffer.resize(old_count + count);
144 std::ranges::copy(stream_buf->gptr(), stream_buf->gptr() + count, overflow_buffer.data() + old_count);
145
146 // make data pointer point into overflow
147 data_begin = overflow_buffer.data();
148 }
149
150 stream_buf->gbump(count);
151
152 // instantiate string_views in raw_record
153 field_positions.back() = old_count + count;
154 raw_record[0] = std::string_view{data_begin, field_positions[0]};
155 for (size_t i = 1; i < number_of_fields; ++i)
156 raw_record[i] = std::string_view{data_begin + field_positions[i - 1] + 1, data_begin + field_positions[i]};
157 }
158
160 std::string_view cache_bytes(int32_t const size)
161 {
162 std::string_view result;
163
164 if (stream_buf->egptr() - stream_buf->gptr() >= size)
165 {
166 result = std::string_view{stream_buf->gptr(), stream_buf->gptr() + size};
167 stream_buf->gbump(size);
168 }
169 else
170 {
171 overflow_buffer.resize(size);
172
173 int32_t remaining_bytes{size};
174
175 while (stream_buf->egptr() - stream_buf->gptr() < remaining_bytes) // still not fully in buffer
176 {
177 std::ranges::copy(stream_buf->gptr(),
178 stream_buf->egptr(),
179 overflow_buffer.data() + size - remaining_bytes);
180 size_t const number_of_copied_bytes = stream_buf->egptr() - stream_buf->gptr();
181 remaining_bytes -= number_of_copied_bytes;
182 stream_buf->gbump(number_of_copied_bytes);
183 stream_buf->underflow();
184 assert((remaining_bytes == 0 || stream_buf->egptr() != stream_buf->gptr())
185 && "I still need to read characters but my stream is at end.");
186 }
187
188 if (remaining_bytes != 0) // In stream_buf but not yet copied to overflow_buffer
189 {
190 std::ranges::copy(stream_buf->gptr(),
191 stream_buf->gptr() + remaining_bytes,
192 overflow_buffer.data() + size - remaining_bytes);
193
194 stream_buf->gbump(remaining_bytes);
195 }
196
197 result = {overflow_buffer.begin(), overflow_buffer.end()};
198 }
199
200 return result;
201 }
202
207 fast_istreambuf_iterator & operator++()
208 {
209 assert(stream_buf != nullptr);
210
211 if ((stream_buf->gptr() + 1) == stream_buf->egptr())
212 stream_buf->snextc(); // move right, then underflow()
213 else
214 stream_buf->gbump(1);
215 return *this;
216 }
217
219 void operator++(int)
220 {
221 ++(*this);
222 }
224
226 reference operator*() const
227 {
228 assert(stream_buf != nullptr);
229 assert(stream_buf->gptr() != stream_buf->egptr());
230 return *stream_buf->gptr();
231 }
232
238 friend bool operator==(fast_istreambuf_iterator const & lhs, std::default_sentinel_t const &) noexcept
239 {
240 assert(lhs.stream_buf != nullptr);
241 // compare size of remaining buffer; since ++ always resizes if possible, safe to compare pointers here
242 return (lhs.stream_buf->gptr() == lhs.stream_buf->egptr());
243 }
244
246 friend bool operator!=(fast_istreambuf_iterator const & lhs, std::default_sentinel_t const &) noexcept
247 {
248 return !(lhs == std::default_sentinel);
249 }
250
252 friend bool operator==(std::default_sentinel_t const &, fast_istreambuf_iterator const & rhs) noexcept
253 {
254 return rhs == std::default_sentinel;
255 }
256
258 friend bool operator!=(std::default_sentinel_t const &, fast_istreambuf_iterator const & rhs) noexcept
259 {
260 return !(rhs == std::default_sentinel);
261 }
263};
264
265} // namespace seqan3::detail
T begin(T... args)
T copy(T... args)
T find(T... args)
constexpr ptrdiff_t count
Count the occurrences of a type in a pack.
Definition type_pack/traits.hpp:161
constexpr size_t size
The size of a type pack.
Definition type_pack/traits.hpp:143
SeqAn specific customisations in the standard namespace.
T operator!=(T... args)
T size(T... args)
Provides seqan3::detail::stream_buffer_exposer.
Hide me