SeqAn3  3.0.2
The Modern C++ library for sequence analysis.
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
gz_istream.hpp
1 // zipstream Library License:
2 // --------------------------
3 //
4 // The zlib/libpng License Copyright (c) 2003 Jonathan de Halleux.
5 //
6 // This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software.
7 //
8 // Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions:
9 //
10 // 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 //
12 // 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
13 //
14 // 3. This notice may not be removed or altered from any source distribution
15 //
16 // Altered zipstream library header
17 // Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003
18 // Author: David Weese <david.weese@fu-berlin.de>
19 // Author: Enrico Siragusa <enrico.siragusa@fu-berlin.de>
20 // Author: Hannes Hauswedell <hannes.hauswedell@fu-berlin.de>
21 
22 #pragma once
23 
24 #include <iostream>
25 #include <cstring>
26 #include <vector>
27 
28 #ifndef SEQAN3_HAS_ZLIB
29 #error "This file cannot be used when building without ZLIB-support."
30 #endif
31 
32 #include <zlib.h>
33 
34 namespace seqan3::contrib
35 {
36 
37 // Default gzip buffer size, change this to suite your needs.
38 const size_t GZ_INPUT_DEFAULT_BUFFER_SIZE = 921600;
39 
40 // --------------------------------------------------------------------------
41 // Class basic_gz_istreambuf
42 // --------------------------------------------------------------------------
43 // A stream decorator that takes compressed input and unzips it to a istream.
44 // The class wraps up the deflate method of the zlib library 1.1.4 https://www.zlib.net
45 
46 template <typename Elem,
47  typename Tr = std::char_traits<Elem>,
48  typename ElemA = std::allocator<Elem>,
49  typename ByteT = unsigned char,
50  typename ByteAT = std::allocator<ByteT>
51  >
52 class basic_gz_istreambuf :
53  public std::basic_streambuf<Elem, Tr>
54 {
55 public:
56  typedef std::basic_istream<Elem, Tr> & istream_reference;
57  typedef ElemA char_allocator_type;
58  typedef ByteT byte_type;
59  typedef ByteAT byte_allocator_type;
60  typedef byte_type * byte_buffer_type;
61  typedef Tr traits_type;
62  typedef typename Tr::char_type char_type;
63  typedef typename Tr::int_type int_type;
64  typedef std::vector<byte_type, byte_allocator_type> byte_vector_type;
65  typedef std::vector<char_type, char_allocator_type> char_vector_type;
66 
67  // Construct a unzip stream
68  // More info on the following parameters can be found in the zlib documentation.
69  basic_gz_istreambuf(istream_reference istream_,
70  size_t window_size_,
71  size_t read_buffer_size_,
72  size_t input_buffer_size_);
73 
74  ~basic_gz_istreambuf();
75 
76  int_type underflow();
77 
78  // returns the compressed input istream
79  istream_reference get_istream() { return m_istream; }
80  // returns the zlib stream structure
81  z_stream & get_zip_stream() { return m_zip_stream; }
82 
83 private:
84  void put_back_from_zip_stream();
85  std::streamsize unzip_from_stream(char_type *, std::streamsize);
86  size_t fill_input_buffer();
87 
88  istream_reference m_istream;
89  z_stream m_zip_stream;
90  int m_err;
91  byte_vector_type m_input_buffer;
92  char_vector_type m_buffer;
93 };
94 
95 // --------------------------------------------------------------------------
96 // Class basic_gz_istreambuf implementation
97 // --------------------------------------------------------------------------
98 
99 template <typename Elem,
100  typename Tr,
101  typename ElemA,
102  typename ByteT,
103  typename ByteAT>
104 basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::basic_gz_istreambuf(
105  istream_reference istream_,
106  size_t window_size_,
107  size_t read_buffer_size_,
108  size_t input_buffer_size_
109  ) :
110  m_istream(istream_),
111  m_input_buffer(input_buffer_size_),
112  m_buffer(read_buffer_size_)
113 {
114  // setting zalloc, zfree and opaque
115  m_zip_stream.zalloc = (alloc_func)0;
116  m_zip_stream.zfree = (free_func)0;
117 
118  m_zip_stream.next_in = NULL;
119  m_zip_stream.avail_in = 0;
120  m_zip_stream.avail_out = 0;
121  m_zip_stream.next_out = NULL;
122 
123  m_err = inflateInit2(&m_zip_stream, static_cast<int>(window_size_));
124 
125  this->setg(&(m_buffer[0]) + 4, // beginning of putback area
126  &(m_buffer[0]) + 4, // read position
127  &(m_buffer[0]) + 4); // end position
128 }
129 
130 template <typename Elem,
131  typename Tr,
132  typename ElemA,
133  typename ByteT,
134  typename ByteAT>
135 basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::~basic_gz_istreambuf()
136 {
137  inflateEnd(&m_zip_stream);
138 }
139 
140 template <typename Elem,
141  typename Tr,
142  typename ElemA,
143  typename ByteT,
144  typename ByteAT>
145 typename basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::int_type
146 basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::underflow()
147 {
148  if (this->gptr() && (this->gptr() < this->egptr()))
149  return *reinterpret_cast<unsigned char *>(this->gptr());
150 
151  int n_putback = static_cast<int>(this->gptr() - this->eback());
152  if (n_putback > 4)
153  n_putback = 4;
154 
155  std::memmove(&(m_buffer[0]) + (4 - n_putback), this->gptr() - n_putback, n_putback * sizeof(char_type));
156 
157  int num = unzip_from_stream(&(m_buffer[0]) + 4,
158  static_cast<std::streamsize>((m_buffer.size() - 4) * sizeof(char_type)));
159 
160  if (num <= 0) // ERROR or EOF
161  return traits_type::eof();
162 
163  // reset buffer pointers
164  this->setg(&(m_buffer[0]) + (4 - n_putback), // beginning of putback area
165  &(m_buffer[0]) + 4, // read position
166  &(m_buffer[0]) + 4 + num); // end of buffer
167 
168  // return next character
169  return *reinterpret_cast<unsigned char *>(this->gptr());
170 }
171 
172 template <typename Elem,
173  typename Tr,
174  typename ElemA,
175  typename ByteT,
176  typename ByteAT>
177 std::streamsize basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::unzip_from_stream(
178  char_type * buffer_,
179  std::streamsize buffer_size_)
180 {
181  m_zip_stream.next_out = (byte_buffer_type)buffer_;
182  m_zip_stream.avail_out = static_cast<uInt>(buffer_size_ * sizeof(char_type));
183  size_t count = m_zip_stream.avail_in;
184 
185  do
186  {
187  if (m_zip_stream.avail_in == 0)
188  count = fill_input_buffer();
189 
190  if (m_zip_stream.avail_in)
191  m_err = inflate(&m_zip_stream, Z_SYNC_FLUSH);
192 
193  if (m_err == Z_STREAM_END)
194  inflateReset(&m_zip_stream);
195  else if (m_err < 0)
196  break;
197  }
198  while (m_zip_stream.avail_out > 0 && count > 0);
199 
200  std::streamsize n_read = buffer_size_ - m_zip_stream.avail_out / sizeof(char_type);
201 
202  // check if it is the end
203  if (m_zip_stream.avail_out > 0 && m_err == Z_STREAM_END)
204  put_back_from_zip_stream();
205 
206  return n_read;
207 }
208 
209 template <typename Elem,
210  typename Tr,
211  typename ElemA,
212  typename ByteT,
213  typename ByteAT>
214 size_t basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::fill_input_buffer()
215 {
216  m_zip_stream.next_in = &(m_input_buffer[0]);
217  m_istream.read((char_type *)(&(m_input_buffer[0])),
218  static_cast<std::streamsize>(m_input_buffer.size() / sizeof(char_type)));
219  return m_zip_stream.avail_in = m_istream.gcount() * sizeof(char_type);
220 }
221 
222 template <typename Elem,
223  typename Tr,
224  typename ElemA,
225  typename ByteT,
226  typename ByteAT>
227 void basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::put_back_from_zip_stream()
228 {
229  if (m_zip_stream.avail_in == 0)
230  return;
231 
232  m_istream.clear(std::ios::goodbit);
233  m_istream.seekg(-static_cast<int>(m_zip_stream.avail_in), std::ios_base::cur);
234 
235  m_zip_stream.avail_in = 0;
236 }
237 
238 // --------------------------------------------------------------------------
239 // Class basic_gz_istreambase
240 // --------------------------------------------------------------------------
241 // Base class for unzip istreams
242 // Contains a basic_gz_istreambuf.
243 
244 template <typename Elem,
245  typename Tr = std::char_traits<Elem>,
246  typename ElemA = std::allocator<Elem>,
247  typename ByteT = unsigned char,
248  typename ByteAT = std::allocator<ByteT>
249  >
250 class basic_gz_istreambase :
251  virtual public std::basic_ios<Elem, Tr>
252 {
253 public:
254  typedef std::basic_istream<Elem, Tr> & istream_reference;
255  typedef basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT> unzip_streambuf_type;
256 
257  basic_gz_istreambase(istream_reference ostream_,
258  size_t window_size_,
259  size_t read_buffer_size_,
260  size_t input_buffer_size_) :
261  m_buf(ostream_, window_size_, read_buffer_size_, input_buffer_size_)
262  {
263  this->init(&m_buf);
264  }
265 
266  // returns the underlying unzip istream object
267  unzip_streambuf_type * rdbuf() { return &m_buf; }
268 
269 private:
270  unzip_streambuf_type m_buf;
271 };
272 
273 // --------------------------------------------------------------------------
274 // Class basic_gz_istream
275 // --------------------------------------------------------------------------
276 // A zipper istream
277 //
278 // This class is a istream decorator that behaves 'almost' like any other ostream.
279 // At construction, it takes any istream that shall be used to input of the compressed data.
280 //
281 // Simlpe example:
282 //
283 // // create a stream on zip string
284 // istringstream istringstream_( ostringstream_.str());
285 // // create unzipper istream
286 // zip_istream unzipper( istringstream_);
287 // // read and unzip
288 // unzipper>>f_r>>d_r>>ui_r>>ul_r>>us_r>>c_r>>dum_r;
289 
290 template <typename Elem,
291  typename Tr = std::char_traits<Elem>,
292  typename ElemA = std::allocator<Elem>,
293  typename ByteT = unsigned char,
294  typename ByteAT = std::allocator<ByteT>
295  >
296 class basic_gz_istream :
297  public basic_gz_istreambase<Elem, Tr, ElemA, ByteT, ByteAT>,
298  public std::basic_istream<Elem, Tr>
299 {
300 public:
301  typedef basic_gz_istreambase<Elem, Tr, ElemA, ByteT, ByteAT> zip_istreambase_type;
302  typedef std::basic_istream<Elem, Tr> istream_type;
303  typedef istream_type & istream_reference;
304  typedef ByteT byte_type;
305  typedef Tr traits_type;
306 
307  // Construct a unzipper stream
308  //
309  // istream_ input buffer
310  // window_size_
311  // read_buffer_size_
312  // input_buffer_size_
313 
314  basic_gz_istream(istream_reference istream_,
315  size_t window_size_ = 31, // 15 (size) + 16 (gzip header)
316  size_t read_buffer_size_ = GZ_INPUT_DEFAULT_BUFFER_SIZE,
317  size_t input_buffer_size_ = GZ_INPUT_DEFAULT_BUFFER_SIZE) :
318  zip_istreambase_type(istream_, window_size_, read_buffer_size_, input_buffer_size_),
319  istream_type(this->rdbuf())
320  {}
321 
322 #ifdef _WIN32
323 private:
324  void _Add_vtordisp1() {} // Required to avoid VC++ warning C4250
325  void _Add_vtordisp2() {} // Required to avoid VC++ warning C4250
326 #endif
327 };
328 
329 // ===========================================================================
330 // Typedefs
331 // ===========================================================================
332 
333 // A typedef for basic_gz_istream<char>
334 typedef basic_gz_istream<char> gz_istream;
335 // A typedef for basic_gz_istream<wchart>
336 typedef basic_gz_istream<wchar_t> gz_wistream;
337 
338 } // namespace seqan3::contrib
std::basic_ios::rdbuf
T rdbuf(T... args)
cstring
output_stream_over::char_type
typename stream::char_type char_type
Declares the associated char type.
vector
std::basic_streambuf
iostream
std::basic_streambuf< Elem, std::char_traits< Elem > >::underflow
T underflow(T... args)
std::char_traits
std::streamsize
std::basic_ios::init
T init(T... args)
std::allocator
std::memmove
T memmove(T... args)
std::basic_istream
seqan3::pack_traits::count
constexpr ptrdiff_t count
Count the occurrences of a type in a pack.
Definition: traits.hpp:134
std::basic_ios