SeqAn3 3.4.0-rc.1
The Modern C++ library for sequence analysis.
Loading...
Searching...
No Matches
gz_istream.hpp
1// SPDX-FileCopyrightText: 2003 Jonathan de Halleux
2// SPDX-License-Identifier: Zlib
3
4// zipstream Library License:
5// --------------------------
6//
7// The zlib/libpng License Copyright (c) 2003 Jonathan de Halleux.
8//
9// This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software.
10//
11// Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions:
12//
13// 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
14//
15// 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
16//
17// 3. This notice may not be removed or altered from any source distribution
18//
19// Altered zipstream library header
20// Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003
21// Author: David Weese <david.weese@fu-berlin.de>
22// Author: Enrico Siragusa <enrico.siragusa@fu-berlin.de>
23// Author: Hannes Hauswedell <hannes.hauswedell@fu-berlin.de>
24
25#pragma once
26
27#include <iostream>
28#include <cstring>
29#include <vector>
30
31#if !defined(SEQAN3_HAS_ZLIB) && !defined(SEQAN3_HEADER_TEST)
32#error "This file cannot be used when building without ZLIB-support."
33#endif // !defined(SEQAN3_HAS_ZLIB) && !defined(SEQAN3_HEADER_TEST)
34
35#if defined(SEQAN3_HAS_ZLIB)
36
37#include <zlib.h>
38
39namespace seqan3::contrib
40{
41
42// Default gzip buffer size, change this to suite your needs.
43const size_t GZ_INPUT_DEFAULT_BUFFER_SIZE = 921600;
44
45// --------------------------------------------------------------------------
46// Class basic_gz_istreambuf
47// --------------------------------------------------------------------------
48// A stream decorator that takes compressed input and unzips it to a istream.
49// The class wraps up the deflate method of the zlib library 1.1.4 https://www.zlib.net
50
51template <typename Elem,
52 typename Tr = std::char_traits<Elem>,
53 typename ElemA = std::allocator<Elem>,
54 typename ByteT = unsigned char,
55 typename ByteAT = std::allocator<ByteT>
56 >
57class basic_gz_istreambuf :
58 public std::basic_streambuf<Elem, Tr>
59{
60public:
61 typedef std::basic_istream<Elem, Tr> & istream_reference;
62 typedef ElemA char_allocator_type;
63 typedef ByteT byte_type;
64 typedef ByteAT byte_allocator_type;
65 typedef byte_type * byte_buffer_type;
66 typedef Tr traits_type;
67 typedef typename Tr::char_type char_type;
68 typedef typename Tr::int_type int_type;
69 typedef std::vector<byte_type, byte_allocator_type> byte_vector_type;
70 typedef std::vector<char_type, char_allocator_type> char_vector_type;
71
72 // Construct a unzip stream
73 // More info on the following parameters can be found in the zlib documentation.
74 basic_gz_istreambuf(istream_reference istream_,
75 size_t window_size_,
76 size_t read_buffer_size_,
77 size_t input_buffer_size_);
78
79 ~basic_gz_istreambuf();
80
81 int_type underflow();
82
83 // returns the compressed input istream
84 istream_reference get_istream() { return m_istream; }
85 // returns the zlib stream structure
86 z_stream & get_zip_stream() { return m_zip_stream; }
87
88private:
89 void put_back_from_zip_stream();
90 std::streamsize unzip_from_stream(char_type *, std::streamsize);
91 size_t fill_input_buffer();
92
93 istream_reference m_istream;
94 z_stream m_zip_stream;
95 int m_err;
96 byte_vector_type m_input_buffer;
97 char_vector_type m_buffer;
98};
99
100// --------------------------------------------------------------------------
101// Class basic_gz_istreambuf implementation
102// --------------------------------------------------------------------------
103
104template <typename Elem,
105 typename Tr,
106 typename ElemA,
107 typename ByteT,
108 typename ByteAT>
109basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::basic_gz_istreambuf(
110 istream_reference istream_,
111 size_t window_size_,
112 size_t read_buffer_size_,
113 size_t input_buffer_size_
114 ) :
115 m_istream(istream_),
116 m_input_buffer(input_buffer_size_),
117 m_buffer(read_buffer_size_)
118{
119 // setting zalloc, zfree and opaque
120 m_zip_stream.zalloc = (alloc_func)0;
121 m_zip_stream.zfree = (free_func)0;
122
123 m_zip_stream.next_in = NULL;
124 m_zip_stream.avail_in = 0;
125 m_zip_stream.avail_out = 0;
126 m_zip_stream.next_out = NULL;
127
128 m_err = inflateInit2(&m_zip_stream, static_cast<int>(window_size_));
129
130 this->setg(&(m_buffer[0]) + 4, // beginning of putback area
131 &(m_buffer[0]) + 4, // read position
132 &(m_buffer[0]) + 4); // end position
133}
134
135template <typename Elem,
136 typename Tr,
137 typename ElemA,
138 typename ByteT,
139 typename ByteAT>
140basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::~basic_gz_istreambuf()
141{
142 inflateEnd(&m_zip_stream);
143}
144
145template <typename Elem,
146 typename Tr,
147 typename ElemA,
148 typename ByteT,
149 typename ByteAT>
150typename basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::int_type
151basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::underflow()
152{
153 if (this->gptr() && (this->gptr() < this->egptr()))
154 return *reinterpret_cast<unsigned char *>(this->gptr());
155
156 int n_putback = static_cast<int>(this->gptr() - this->eback());
157 if (n_putback > 4)
158 n_putback = 4;
159
160 std::memmove(&(m_buffer[0]) + (4 - n_putback), this->gptr() - n_putback, n_putback * sizeof(char_type));
161
162 int num = unzip_from_stream(&(m_buffer[0]) + 4,
163 static_cast<std::streamsize>((m_buffer.size() - 4) * sizeof(char_type)));
164
165 if (num <= 0) // ERROR or EOF
166 return traits_type::eof();
167
168 // reset buffer pointers
169 this->setg(&(m_buffer[0]) + (4 - n_putback), // beginning of putback area
170 &(m_buffer[0]) + 4, // read position
171 &(m_buffer[0]) + 4 + num); // end of buffer
172
173 // return next character
174 return *reinterpret_cast<unsigned char *>(this->gptr());
175}
176
177template <typename Elem,
178 typename Tr,
179 typename ElemA,
180 typename ByteT,
181 typename ByteAT>
182std::streamsize basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::unzip_from_stream(
183 char_type * buffer_,
184 std::streamsize buffer_size_)
185{
186 m_zip_stream.next_out = (byte_buffer_type)buffer_;
187 m_zip_stream.avail_out = static_cast<uInt>(buffer_size_ * sizeof(char_type));
188 size_t count = m_zip_stream.avail_in;
189
190 do
191 {
192 if (m_zip_stream.avail_in == 0)
193 count = fill_input_buffer();
194
195 if (m_zip_stream.avail_in)
196 m_err = inflate(&m_zip_stream, Z_SYNC_FLUSH);
197
198 if (m_err == Z_STREAM_END)
199 inflateReset(&m_zip_stream);
200 else if (m_err < 0)
201 break;
202 }
203 while (m_zip_stream.avail_out > 0 && count > 0);
204
205 std::streamsize n_read = buffer_size_ - m_zip_stream.avail_out / sizeof(char_type);
206
207 // check if it is the end
208 if (m_zip_stream.avail_out > 0 && m_err == Z_STREAM_END)
209 put_back_from_zip_stream();
210
211 return n_read;
212}
213
214template <typename Elem,
215 typename Tr,
216 typename ElemA,
217 typename ByteT,
218 typename ByteAT>
219size_t basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::fill_input_buffer()
220{
221 m_zip_stream.next_in = &(m_input_buffer[0]);
222 m_istream.read((char_type *)(&(m_input_buffer[0])),
223 static_cast<std::streamsize>(m_input_buffer.size() / sizeof(char_type)));
224 return m_zip_stream.avail_in = m_istream.gcount() * sizeof(char_type);
225}
226
227template <typename Elem,
228 typename Tr,
229 typename ElemA,
230 typename ByteT,
231 typename ByteAT>
232void basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::put_back_from_zip_stream()
233{
234 if (m_zip_stream.avail_in == 0)
235 return;
236
237 m_istream.clear(std::ios::goodbit);
238 m_istream.seekg(-static_cast<int>(m_zip_stream.avail_in), std::ios_base::cur);
239
240 m_zip_stream.avail_in = 0;
241}
242
243// --------------------------------------------------------------------------
244// Class basic_gz_istreambase
245// --------------------------------------------------------------------------
246// Base class for unzip istreams
247// Contains a basic_gz_istreambuf.
248
249template <typename Elem,
250 typename Tr = std::char_traits<Elem>,
251 typename ElemA = std::allocator<Elem>,
252 typename ByteT = unsigned char,
253 typename ByteAT = std::allocator<ByteT>
254 >
255class basic_gz_istreambase :
256 virtual public std::basic_ios<Elem, Tr>
257{
258public:
259 typedef std::basic_istream<Elem, Tr> & istream_reference;
260 typedef basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT> unzip_streambuf_type;
261
262 basic_gz_istreambase(istream_reference ostream_,
263 size_t window_size_,
264 size_t read_buffer_size_,
265 size_t input_buffer_size_) :
266 m_buf(ostream_, window_size_, read_buffer_size_, input_buffer_size_)
267 {
268 this->init(&m_buf);
269 }
270
271 // returns the underlying unzip istream object
272 unzip_streambuf_type * rdbuf() { return &m_buf; }
273
274private:
275 unzip_streambuf_type m_buf;
276};
277
278// --------------------------------------------------------------------------
279// Class basic_gz_istream
280// --------------------------------------------------------------------------
281// A zipper istream
282//
283// This class is a istream decorator that behaves 'almost' like any other ostream.
284// At construction, it takes any istream that shall be used to input of the compressed data.
285//
286// Simlpe example:
287//
288// // create a stream on zip string
289// istringstream istringstream_( ostringstream_.str());
290// // create unzipper istream
291// zip_istream unzipper( istringstream_);
292// // read and unzip
293// unzipper>>f_r>>d_r>>ui_r>>ul_r>>us_r>>c_r>>dum_r;
294
295template <typename Elem,
296 typename Tr = std::char_traits<Elem>,
297 typename ElemA = std::allocator<Elem>,
298 typename ByteT = unsigned char,
299 typename ByteAT = std::allocator<ByteT>
300 >
301class basic_gz_istream :
302 public basic_gz_istreambase<Elem, Tr, ElemA, ByteT, ByteAT>,
303 public std::basic_istream<Elem, Tr>
304{
305public:
306 typedef basic_gz_istreambase<Elem, Tr, ElemA, ByteT, ByteAT> zip_istreambase_type;
307 typedef std::basic_istream<Elem, Tr> istream_type;
308 typedef istream_type & istream_reference;
309 typedef ByteT byte_type;
310 typedef Tr traits_type;
311
312 // Construct a unzipper stream
313 //
314 // istream_ input buffer
315 // window_size_
316 // read_buffer_size_
317 // input_buffer_size_
318
319 basic_gz_istream(istream_reference istream_,
320 size_t window_size_ = 31, // 15 (size) + 16 (gzip header)
321 size_t read_buffer_size_ = GZ_INPUT_DEFAULT_BUFFER_SIZE,
322 size_t input_buffer_size_ = GZ_INPUT_DEFAULT_BUFFER_SIZE) :
323 zip_istreambase_type(istream_, window_size_, read_buffer_size_, input_buffer_size_),
324 istream_type(this->rdbuf())
325 {}
326
327#ifdef _WIN32
328private:
329 void _Add_vtordisp1() {} // Required to avoid VC++ warning C4250
330 void _Add_vtordisp2() {} // Required to avoid VC++ warning C4250
331#endif
332};
333
334// ===========================================================================
335// Typedefs
336// ===========================================================================
337
338// A typedef for basic_gz_istream<char>
339typedef basic_gz_istream<char> gz_istream;
340// A typedef for basic_gz_istream<wchart>
341typedef basic_gz_istream<wchar_t> gz_wistream;
342
343} // namespace seqan3::contrib
344
345#endif // defined(SEQAN3_HAS_ZLIB)
constexpr ptrdiff_t count
Count the occurrences of a type in a pack.
Definition type_pack/traits.hpp:161
T init(T... args)
T memmove(T... args)
T rdbuf(T... args)
Hide me