SeqAn3 3.4.0-rc.4
The Modern C++ library for sequence analysis.
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages Concepts
misc_input.hpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: 2006-2025 Knut Reinert & Freie Universität Berlin
2// SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik
3// SPDX-License-Identifier: BSD-3-Clause
4
10#pragma once
11
12#include <algorithm>
13#include <concepts>
14#include <filesystem>
15#include <iostream>
16#include <ranges>
17#include <span>
18#include <string>
19#include <tuple>
20
21#include <seqan3/contrib/stream/bgzf.hpp>
22#include <seqan3/contrib/stream/bgzf_istream.hpp>
23#include <seqan3/contrib/stream/bgzf_stream_util.hpp>
24#include <seqan3/contrib/stream/bz2_istream.hpp>
25#include <seqan3/contrib/stream/gz_istream.hpp>
29
30namespace seqan3::detail
31{
32
38template <std::ranges::forward_range ref_t, std::ranges::forward_range query_t>
39inline bool starts_with(ref_t && reference, query_t && query)
40 requires std::equality_comparable_with<std::ranges::range_reference_t<ref_t>,
41 std::ranges::range_reference_t<query_t>>
42{
43 auto rit = std::ranges::begin(reference);
44 auto rend = std::ranges::end(reference);
45
46 auto qit = std::ranges::begin(query);
47 auto qend = std::ranges::end(query);
48
49 while (true)
50 {
51 if (qit == qend)
52 return true;
53
54 if (rit == rend)
55 return false;
56
57 if (*qit != *rit)
58 return false;
59
60 ++qit;
61 ++rit;
62 }
63}
64
72template <builtin_character char_t>
73inline auto make_secondary_istream(std::basic_istream<char_t> & primary_stream, std::filesystem::path & filename)
75{
76 assert(primary_stream.good());
77
78 // don't assume ownership
79 constexpr auto stream_deleter_noop = [](std::basic_istream<char_t> *) {};
80 // assume ownership
81 [[maybe_unused]] constexpr auto stream_deleter_default = [](std::basic_istream<char_t> * ptr)
82 {
83 delete ptr;
84 };
85
86 // extract "magic header"
87 std::istreambuf_iterator<char_t> it{primary_stream};
88 std::array<char, bgzf_compression::magic_header.size()> magic_number{}; // Largest magic header from bgzf
89 size_t read_chars = 0;
90 for (; read_chars < magic_number.size(); ++read_chars)
91 {
93 break;
94
95 magic_number[read_chars] = *it;
96 ++it;
97 }
98
99 // unget all read chars.
100 for (size_t i = 0; i < read_chars; ++i)
101 primary_stream.unget(); // If you unget() more characters than are present in the get area, badbit is set.
102
103 assert(primary_stream.good() && "`unget()` was called too many times on primary_stream.");
104
105 std::string extension{};
106 if (filename.has_extension())
107 extension = filename.extension().string().substr(1);
108
109 // tests whether the given extension matches with one of the given compression tags.
110 [[maybe_unused]] auto contains_extension = [](auto compression_tag, auto const & extension) constexpr
111 {
112 return std::ranges::find(decltype(compression_tag)::file_extensions, extension)
113 != std::ranges::end(decltype(compression_tag)::file_extensions);
114 };
115
116 // set return value appropriately
117 if (read_chars == magic_number.size() && bgzf_compression::validate_header(std::span{magic_number})) // BGZF
118 {
119#if SEQAN3_HAS_ZLIB
120 if (contains_extension(gz_compression{}, extension) || contains_extension(bgzf_compression{}, extension))
121 filename.replace_extension();
122
123 return {new contrib::basic_bgzf_istream<char_t>{primary_stream}, stream_deleter_default};
124#else
125 throw file_open_error{"Trying to read from a bgzf file, but no ZLIB available."};
126#endif // SEQAN3_HAS_ZLIB
127 }
128 else if (starts_with(magic_number, gz_compression::magic_header)) // GZIP
129 {
130#if SEQAN3_HAS_ZLIB
131 if (contains_extension(gz_compression{}, extension) || contains_extension(bgzf_compression{}, extension))
132 filename.replace_extension();
133
134 return {new contrib::basic_gz_istream<char_t>{primary_stream}, stream_deleter_default};
135#else
136 throw file_open_error{"Trying to read from a gzipped file, but no ZLIB available."};
137#endif // SEQAN3_HAS_ZLIB
138 }
139 else if (starts_with(magic_number, bz2_compression::magic_header)) // BZip2
140 {
141#if SEQAN3_HAS_BZIP2
142 if (contains_extension(bz2_compression{}, extension))
143 filename.replace_extension();
144
145 return {new contrib::basic_bz2_istream<char_t>{primary_stream}, stream_deleter_default};
146#else
147 throw file_open_error{"Trying to read from a bzipped file, but no libbz2 available."};
148#endif // SEQAN3_HAS_BZIP2
149 }
150 else if (starts_with(magic_number, zstd_compression::magic_header)) // ZStd
151 {
152 throw file_open_error{"Trying to read from a zst'ed file, but SeqAn does not yet support this."};
153 }
154
155 return {&primary_stream, stream_deleter_noop};
156}
157
159template <builtin_character char_t>
160inline auto make_secondary_istream(std::basic_istream<char_t> & primary_stream)
161{
163 return make_secondary_istream(primary_stream, p);
164}
165
166} // namespace seqan3::detail
T begin(T... args)
T find(T... args)
Provides exceptions used in the I/O module.
Provides seqan3::detail::magic_header.
T rend(T... args)
T size(T... args)
Provides concepts that do not have equivalents in C++20.
Hide me