28#if !defined(SEQAN3_HAS_ZLIB) && !defined(SEQAN3_HEADER_TEST)
29# error "This file cannot be used when building without GZip-support."
32#if defined(SEQAN3_HAS_ZLIB)
36namespace seqan3::contrib
41[[maybe_unused]]
inline uint64_t bgzf_thread_count = 4;
54 '\x00',
'\x00',
'\x00',
'\x00',
55 '\x00',
'\xff',
'\x06',
'\x00',
56 '\x42',
'\x43',
'\x02',
'\x00',
57 '\x1b',
'\x00',
'\x03',
'\x00',
58 '\x00',
'\x00',
'\x00',
'\x00',
59 '\x00',
'\x00',
'\x00',
'\x00'}};
61template <
typename TAlgTag>
62struct CompressionContext {};
64template <
typename TAlgTag>
65struct DefaultPageSize;
68struct CompressionContext<detail::gz_compression>
79struct CompressionContext<detail::bgzf_compression>:
80 CompressionContext<detail::gz_compression>
82 static constexpr size_t BLOCK_HEADER_LENGTH = detail::bgzf_compression::magic_header.size();
83 unsigned char headerPos;
87struct DefaultPageSize<detail::bgzf_compression>
89 static const unsigned MAX_BLOCK_SIZE = 64 * 1024;
90 static const unsigned BLOCK_FOOTER_LENGTH = 8;
92 static const unsigned ZLIB_BLOCK_OVERHEAD = 5;
96 enum { BLOCK_HEADER_LENGTH = CompressionContext<detail::bgzf_compression>::BLOCK_HEADER_LENGTH };
97 static const unsigned VALUE = MAX_BLOCK_SIZE - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH - ZLIB_BLOCK_OVERHEAD;
109compressInit(CompressionContext<detail::gz_compression> & ctx)
111 const int GZIP_WINDOW_BITS = -15;
112 const int Z_DEFAULT_MEM_LEVEL = 8;
114 ctx.strm.zalloc = NULL;
115 ctx.strm.zfree = NULL;
121 int status = deflateInit2(&ctx.strm, Z_BEST_SPEED, Z_DEFLATED,
122 GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY);
124 throw io_error(
"Calling deflateInit2() failed for gz file.");
132compressInit(CompressionContext<detail::bgzf_compression> & ctx)
134 compressInit(
static_cast<CompressionContext<detail::gz_compression> &
>(ctx));
143_bgzfUnpack16(
char const * buffer)
147 return detail::to_little_endian(tmp);
151_bgzfUnpack32(
char const * buffer)
155 return detail::to_little_endian(tmp);
163_bgzfPack16(
char * buffer, uint16_t value)
165 value = detail::to_little_endian(value);
167 reinterpret_cast<char *
>(&value) +
sizeof(uint16_t),
172_bgzfPack32(
char * buffer, uint32_t value)
174 value = detail::to_little_endian(value);
176 reinterpret_cast<char *
>(&value) +
sizeof(uint32_t),
184template <
typename TDestValue,
typename TDestCapacity,
typename TSourceValue,
typename TSourceLength>
186_compressBlock(TDestValue *dstBegin, TDestCapacity dstCapacity,
187 TSourceValue *srcBegin, TSourceLength srcLength, CompressionContext<detail::bgzf_compression> & ctx)
189 const size_t BLOCK_HEADER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_HEADER_LENGTH;
190 const size_t BLOCK_FOOTER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_FOOTER_LENGTH;
192 assert(dstCapacity > BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH);
193 assert(
sizeof(TDestValue) == 1u);
194 assert(
sizeof(
unsigned) == 4u);
201 ctx.strm.next_in = (Bytef *)(srcBegin);
202 ctx.strm.next_out = (Bytef *)(dstBegin + BLOCK_HEADER_LENGTH);
203 ctx.strm.avail_in = srcLength *
sizeof(TSourceValue);
204 ctx.strm.avail_out = dstCapacity - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
206 int status = deflate(&ctx.strm, Z_FINISH);
207 if (status != Z_STREAM_END)
209 deflateEnd(&ctx.strm);
210 throw io_error(
"Deflation failed. Compressed BGZF data is too big.");
213 status = deflateEnd(&ctx.strm);
215 throw io_error(
"BGZF deflateEnd() failed.");
222 size_t len = dstCapacity - ctx.strm.avail_out;
223 _bgzfPack16(dstBegin + 16, len - 1);
225 dstBegin += len - BLOCK_FOOTER_LENGTH;
226 _bgzfPack32(dstBegin, crc32(crc32(0u, NULL, 0u), (Bytef *)(srcBegin), srcLength *
sizeof(TSourceValue)));
227 _bgzfPack32(dstBegin + 4, srcLength *
sizeof(TSourceValue));
229 return dstCapacity - ctx.strm.avail_out;
237decompressInit(CompressionContext<detail::gz_compression> & ctx)
239 const int GZIP_WINDOW_BITS = -15;
241 ctx.strm.zalloc = NULL;
242 ctx.strm.zfree = NULL;
243 int status = inflateInit2(&ctx.strm, GZIP_WINDOW_BITS);
245 throw io_error(
"GZip inflateInit2() failed.");
253decompressInit(CompressionContext<detail::bgzf_compression> & ctx)
255 decompressInit(
static_cast<CompressionContext<detail::gz_compression> &
>(ctx));
263template <
typename TDestValue,
typename TDestCapacity,
typename TSourceValue,
typename TSourceLength>
265_decompressBlock(TDestValue *dstBegin, TDestCapacity dstCapacity,
266 TSourceValue *srcBegin, TSourceLength srcLength, CompressionContext<detail::bgzf_compression> & ctx)
268 const size_t BLOCK_HEADER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_HEADER_LENGTH;
269 const size_t BLOCK_FOOTER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_FOOTER_LENGTH;
271 assert(
sizeof(TSourceValue) == 1u);
272 assert(
sizeof(
unsigned) == 4u);
276 if (srcLength <= BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH)
277 throw io_error(
"BGZF block too short.");
279 if (!detail::bgzf_compression::validate_header(
std::span{srcBegin, srcLength}))
280 throw io_error(
"Invalid BGZF block header.");
282 size_t compressedLen = _bgzfUnpack16(srcBegin + 16) + 1u;
283 if (compressedLen != srcLength)
284 throw io_error(
"BGZF compressed size mismatch.");
290 ctx.strm.next_in = (Bytef *)(srcBegin + BLOCK_HEADER_LENGTH);
291 ctx.strm.next_out = (Bytef *)(dstBegin);
292 ctx.strm.avail_in = srcLength - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
293 ctx.strm.avail_out = dstCapacity *
sizeof(TDestValue);
295 int status = inflate(&ctx.strm, Z_FINISH);
296 if (status != Z_STREAM_END)
298 inflateEnd(&ctx.strm);
299 throw io_error(
"Inflation failed. Decompressed BGZF data is too big.");
302 status = inflateEnd(&ctx.strm);
304 throw io_error(
"BGZF inflateEnd() failed.");
311 unsigned crc = crc32(crc32(0u, NULL, 0u), (Bytef *)(dstBegin), dstCapacity - ctx.strm.avail_out);
313 srcBegin += compressedLen - BLOCK_FOOTER_LENGTH;
314 if (_bgzfUnpack32(srcBegin) != crc)
315 throw io_error(
"BGZF wrong checksum.");
317 if (_bgzfUnpack32(srcBegin + 4) != dstCapacity - ctx.strm.avail_out)
318 throw io_error(
"BGZF size mismatch.");
320 return (dstCapacity - ctx.strm.avail_out) /
sizeof(TDestValue);
Provides various transformation traits used by the range module.
Provides exceptions used in the I/O module.
Provides utility functions for bit twiddling.
T uninitialized_copy(T... args)