24 #error "This file cannot be used when building without GZip-support."
25 #endif // SEQAN3_HAS_ZLIB
34 namespace seqan3::contrib
53 '\x00',
'\x00',
'\x00',
'\x00',
54 '\x00',
'\xff',
'\x06',
'\x00',
55 '\x42',
'\x43',
'\x02',
'\x00',
56 '\x1b',
'\x00',
'\x03',
'\x00',
57 '\x00',
'\x00',
'\x00',
'\x00',
58 '\x00',
'\x00',
'\x00',
'\x00'}};
60 template <
typename TAlgTag>
61 struct CompressionContext {};
63 template <
typename TAlgTag>
64 struct DefaultPageSize;
67 struct CompressionContext<detail::gz_compression>
78 struct CompressionContext<detail::bgzf_compression>:
79 CompressionContext<detail::gz_compression>
81 static constexpr
size_t BLOCK_HEADER_LENGTH = detail::bgzf_compression::magic_header.size();
82 unsigned char headerPos;
86 struct DefaultPageSize<detail::bgzf_compression>
88 static const unsigned MAX_BLOCK_SIZE = 64 * 1024;
89 static const unsigned BLOCK_FOOTER_LENGTH = 8;
91 static const unsigned ZLIB_BLOCK_OVERHEAD = 5;
95 enum { BLOCK_HEADER_LENGTH = CompressionContext<detail::bgzf_compression>::BLOCK_HEADER_LENGTH };
96 static const unsigned VALUE = MAX_BLOCK_SIZE - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH - ZLIB_BLOCK_OVERHEAD;
108 compressInit(CompressionContext<detail::gz_compression> & ctx)
110 const int GZIP_WINDOW_BITS = -15;
111 const int Z_DEFAULT_MEM_LEVEL = 8;
113 ctx.strm.zalloc = NULL;
114 ctx.strm.zfree = NULL;
120 int status = deflateInit2(&ctx.strm, Z_BEST_SPEED, Z_DEFLATED,
121 GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY);
123 throw io_error(
"Calling deflateInit2() failed for gz file.");
131 compressInit(CompressionContext<detail::bgzf_compression> & ctx)
133 compressInit(
static_cast<CompressionContext<detail::gz_compression> &
>(ctx));
142 _bgzfUnpack16(
char const * buffer)
146 return detail::to_little_endian(tmp);
150 _bgzfUnpack32(
char const * buffer)
154 return detail::to_little_endian(tmp);
162 _bgzfPack16(
char * buffer, uint16_t value)
164 value = detail::to_little_endian(value);
166 reinterpret_cast<char *
>(&value) +
sizeof(uint16_t),
171 _bgzfPack32(
char * buffer, uint32_t value)
173 value = detail::to_little_endian(value);
175 reinterpret_cast<char *
>(&value) +
sizeof(uint32_t),
183 template <
typename TDestValue,
typename TDestCapacity,
typename TSourceValue,
typename TSourceLength>
185 _compressBlock(TDestValue *dstBegin, TDestCapacity dstCapacity,
186 TSourceValue *srcBegin, TSourceLength srcLength, CompressionContext<detail::bgzf_compression> & ctx)
188 const size_t BLOCK_HEADER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_HEADER_LENGTH;
189 const size_t BLOCK_FOOTER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_FOOTER_LENGTH;
191 assert(dstCapacity > BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH);
192 assert(
sizeof(TDestValue) == 1u);
193 assert(
sizeof(
unsigned) == 4u);
196 std::ranges::copy(detail::bgzf_compression::magic_header, dstBegin);
200 ctx.strm.next_in = (Bytef *)(srcBegin);
201 ctx.strm.next_out = (Bytef *)(dstBegin + BLOCK_HEADER_LENGTH);
202 ctx.strm.avail_in = srcLength *
sizeof(TSourceValue);
203 ctx.strm.avail_out = dstCapacity - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
205 int status = deflate(&ctx.strm, Z_FINISH);
206 if (status != Z_STREAM_END)
208 deflateEnd(&ctx.strm);
209 throw io_error(
"Deflation failed. Compressed BGZF data is too big.");
212 status = deflateEnd(&ctx.strm);
214 throw io_error(
"BGZF deflateEnd() failed.");
221 size_t len = dstCapacity - ctx.strm.avail_out;
222 _bgzfPack16(dstBegin + 16, len - 1);
224 dstBegin += len - BLOCK_FOOTER_LENGTH;
225 _bgzfPack32(dstBegin, crc32(crc32(0u, NULL, 0u), (Bytef *)(srcBegin), srcLength *
sizeof(TSourceValue)));
226 _bgzfPack32(dstBegin + 4, srcLength *
sizeof(TSourceValue));
228 return dstCapacity - ctx.strm.avail_out;
236 decompressInit(CompressionContext<detail::gz_compression> & ctx)
238 const int GZIP_WINDOW_BITS = -15;
240 ctx.strm.zalloc = NULL;
241 ctx.strm.zfree = NULL;
242 int status = inflateInit2(&ctx.strm, GZIP_WINDOW_BITS);
244 throw io_error(
"GZip inflateInit2() failed.");
252 decompressInit(CompressionContext<detail::bgzf_compression> & ctx)
254 decompressInit(
static_cast<CompressionContext<detail::gz_compression> &
>(ctx));
262 template <
typename TDestValue,
typename TDestCapacity,
typename TSourceValue,
typename TSourceLength>
264 _decompressBlock(TDestValue *dstBegin, TDestCapacity dstCapacity,
265 TSourceValue *srcBegin, TSourceLength srcLength, CompressionContext<detail::bgzf_compression> & ctx)
267 const size_t BLOCK_HEADER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_HEADER_LENGTH;
268 const size_t BLOCK_FOOTER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_FOOTER_LENGTH;
270 assert(
sizeof(TSourceValue) == 1u);
271 assert(
sizeof(
unsigned) == 4u);
275 if (srcLength <= BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH)
276 throw io_error(
"BGZF block too short.");
278 if (!detail::bgzf_compression::validate_header(
std::span{srcBegin, srcLength}))
279 throw io_error(
"Invalid BGZF block header.");
281 size_t compressedLen = _bgzfUnpack16(srcBegin + 16) + 1u;
282 if (compressedLen != srcLength)
283 throw io_error(
"BGZF compressed size mismatch.");
289 ctx.strm.next_in = (Bytef *)(srcBegin + BLOCK_HEADER_LENGTH);
290 ctx.strm.next_out = (Bytef *)(dstBegin);
291 ctx.strm.avail_in = srcLength - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
292 ctx.strm.avail_out = dstCapacity *
sizeof(TDestValue);
294 int status = inflate(&ctx.strm, Z_FINISH);
295 if (status != Z_STREAM_END)
297 inflateEnd(&ctx.strm);
298 throw io_error(
"Inflation failed. Decompressed BGZF data is too big.");
301 status = inflateEnd(&ctx.strm);
303 throw io_error(
"BGZF inflateEnd() failed.");
310 unsigned crc = crc32(crc32(0u, NULL, 0u), (Bytef *)(dstBegin), dstCapacity - ctx.strm.avail_out);
312 srcBegin += compressedLen - BLOCK_FOOTER_LENGTH;
313 if (_bgzfUnpack32(srcBegin) != crc)
314 throw io_error(
"BGZF wrong checksum.");
316 if (_bgzfUnpack32(srcBegin + 4) != dstCapacity - ctx.strm.avail_out)
317 throw io_error(
"BGZF size mismatch.");
319 return (dstCapacity - ctx.strm.avail_out) /
sizeof(TDestValue);