SeqAn3 3.3.0
The Modern C++ library for sequence analysis.
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
fast_istreambuf_iterator.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
14#pragma once
15
16#include <algorithm>
17#include <cassert>
18#include <iterator>
19#include <string>
20#include <vector>
21
23
24namespace seqan3::detail
25{
38template <typename char_t, typename traits_t = std::char_traits<char_t>>
39class fast_istreambuf_iterator
40{
41private:
43 stream_buffer_exposer<char_t, traits_t> * stream_buf = nullptr;
44
46 std::string overflow_buffer{};
47
48public:
52 using difference_type = ptrdiff_t;
53 using value_type = char_t;
54 using reference = char_t;
55 using pointer = void;
56 using iterator_category = std::input_iterator_tag;
58
62 fast_istreambuf_iterator() noexcept = default;
63 fast_istreambuf_iterator(fast_istreambuf_iterator const &) noexcept = default;
64 fast_istreambuf_iterator(fast_istreambuf_iterator &&) noexcept = default;
65 fast_istreambuf_iterator & operator=(fast_istreambuf_iterator const &) noexcept = default;
66 fast_istreambuf_iterator & operator=(fast_istreambuf_iterator &&) noexcept = default;
67 ~fast_istreambuf_iterator() noexcept = default;
68
70 explicit fast_istreambuf_iterator(std::basic_streambuf<char_t, traits_t> & ibuf) :
71 stream_buf{reinterpret_cast<stream_buffer_exposer<char_t, traits_t> *>(&ibuf)}
72 {
73 assert(stream_buf != nullptr);
74
75 if (stream_buf->gptr() == stream_buf->egptr()) // If current get area is empty,
76 stream_buf->underflow(); // ensure the stream buffer has content on construction.
77 }
79
81 template <typename record_type>
82 requires std::same_as<std::ranges::range_value_t<record_type>, std::string_view>
83 void cache_record_into(char const record_end, char const field_sep, record_type & raw_record)
84 {
85 bool has_overflowed = false;
86 size_t old_count = 0;
87 char * data_begin = stream_buf->gptr(); // point into stream buffer by default
88 size_t const number_of_fields = raw_record.size();
89 size_t number_of_seen_fields = 0;
90 std::vector<size_t> field_positions(number_of_fields, 0u);
91
92 char const * ptr = stream_buf->gptr();
93
94 auto overflow_into_buffer = [&]()
95 {
96 size_t count = stream_buf->egptr() - stream_buf->gptr();
97 has_overflowed = true;
98 overflow_buffer.resize(old_count + count);
99 std::ranges::copy(stream_buf->gptr(), stream_buf->egptr(), overflow_buffer.data() + old_count);
100
101 old_count += count;
102 stream_buf->gbump(count);
103 stream_buf->underflow();
104 };
105
106 while (number_of_seen_fields < number_of_fields - 1)
107 {
108 ptr = std::find(ptr, static_cast<char const *>(stream_buf->egptr()), field_sep);
109
110 if (ptr != stream_buf->egptr()) // found an end of field
111 {
112 field_positions[number_of_seen_fields] = ptr - stream_buf->gptr() + old_count;
113 ++ptr;
114 ++number_of_seen_fields;
115 }
116 else
117 {
118 overflow_into_buffer();
119 assert(stream_buf->gptr() != stream_buf->egptr()); // stream is not at end after overflow
120 ptr = stream_buf->gptr();
121 }
122 }
123
124 size_t count = 0;
125
126 while (true) // Note: Might run idefinitely in release mode if no record_end is in input.
127 {
128 ptr = std::find(ptr, static_cast<char const *>(stream_buf->egptr()), record_end);
129
130 if (ptr == stream_buf->egptr()) // stop_chr could not be found in current buffer
131 {
132 overflow_into_buffer();
133 assert(stream_buf->gptr() != stream_buf->egptr()); // stream is not at end after overflow
134 ptr = stream_buf->gptr();
135 }
136 else
137 {
138 count = ptr - stream_buf->gptr(); // processed characters until stop_chr has been found
139 break;
140 }
141 }
142
143 if (has_overflowed)
144 {
145 // need to copy last data
146 overflow_buffer.resize(old_count + count);
147 std::ranges::copy(stream_buf->gptr(), stream_buf->gptr() + count, overflow_buffer.data() + old_count);
148
149 // make data pointer point into overflow
150 data_begin = overflow_buffer.data();
151 }
152
153 stream_buf->gbump(count);
154
155 // instantiate string_views in raw_record
156 field_positions.back() = old_count + count;
157 raw_record[0] = std::string_view{data_begin, field_positions[0]};
158 for (size_t i = 1; i < number_of_fields; ++i)
159 raw_record[i] = std::string_view{data_begin + field_positions[i - 1] + 1, data_begin + field_positions[i]};
160 }
161
163 std::string_view cache_bytes(int32_t const size)
164 {
165 std::string_view result;
166
167 if (stream_buf->egptr() - stream_buf->gptr() >= size)
168 {
169 result = std::string_view{stream_buf->gptr(), stream_buf->gptr() + size};
170 stream_buf->gbump(size);
171 }
172 else
173 {
174 overflow_buffer.resize(size);
175
176 int32_t remaining_bytes{size};
177
178 while (stream_buf->egptr() - stream_buf->gptr() < remaining_bytes) // still not fully in buffer
179 {
180 std::ranges::copy(stream_buf->gptr(),
181 stream_buf->egptr(),
182 overflow_buffer.data() + size - remaining_bytes);
183 size_t const number_of_copied_bytes = stream_buf->egptr() - stream_buf->gptr();
184 remaining_bytes -= number_of_copied_bytes;
185 stream_buf->gbump(number_of_copied_bytes);
186 stream_buf->underflow();
187 assert((remaining_bytes == 0 || stream_buf->egptr() != stream_buf->gptr())
188 && "I still need to read characters but my stream is at end.");
189 }
190
191 if (remaining_bytes != 0) // In stream_buf but not yet copied to overflow_buffer
192 {
193 std::ranges::copy(stream_buf->gptr(),
194 stream_buf->gptr() + remaining_bytes,
195 overflow_buffer.data() + size - remaining_bytes);
196
197 stream_buf->gbump(remaining_bytes);
198 }
199
200 result = {overflow_buffer.begin(), overflow_buffer.end()};
201 }
202
203 return result;
204 }
205
210 fast_istreambuf_iterator & operator++()
211 {
212 assert(stream_buf != nullptr);
213
214 if ((stream_buf->gptr() + 1) == stream_buf->egptr())
215 stream_buf->snextc(); // move right, then underflow()
216 else
217 stream_buf->gbump(1);
218 return *this;
219 }
220
222 void operator++(int)
223 {
224 ++(*this);
225 }
227
229 reference operator*() const
230 {
231 assert(stream_buf != nullptr);
232 return *stream_buf->gptr();
233 }
234
240 friend bool operator==(fast_istreambuf_iterator const & lhs, std::default_sentinel_t const &) noexcept
241 {
242 assert(lhs.stream_buf != nullptr);
243 // compare size of remaining buffer; since ++ always resizes if possible, safe to compare pointers here
244 return (lhs.stream_buf->gptr() == lhs.stream_buf->egptr());
245 }
246
248 friend bool operator!=(fast_istreambuf_iterator const & lhs, std::default_sentinel_t const &) noexcept
249 {
250 return !(lhs == std::default_sentinel);
251 }
252
254 friend bool operator==(std::default_sentinel_t const &, fast_istreambuf_iterator const & rhs) noexcept
255 {
256 return rhs == std::default_sentinel;
257 }
258
260 friend bool operator!=(std::default_sentinel_t const &, fast_istreambuf_iterator const & rhs) noexcept
261 {
262 return !(rhs == std::default_sentinel);
263 }
265};
266
267} // namespace seqan3::detail
T begin(T... args)
T copy(T... args)
T find(T... args)
constexpr ptrdiff_t count
Count the occurrences of a type in a pack.
Definition: type_pack/traits.hpp:164
constexpr size_t size
The size of a type pack.
Definition: type_pack/traits.hpp:146
SeqAn specific customisations in the standard namespace.
T operator!=(T... args)
T size(T... args)
Provides seqan3::detail::stream_buffer_exposer.