SeqAn3 3.1.0
The Modern C++ library for sequence analysis.
misc_input.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <seqan3/std/algorithm>
16#include <seqan3/std/concepts>
17#include <seqan3/std/filesystem>
18#include <iostream>
19#include <seqan3/std/ranges>
20#include <seqan3/std/span>
21#include <string>
22#include <tuple>
23
24#if defined(SEQAN3_HAS_BZIP2)
25 #include <seqan3/contrib/stream/bz2_istream.hpp>
26#endif
27#if defined(SEQAN3_HAS_ZLIB)
28 #include <seqan3/contrib/stream/bgzf_istream.hpp>
30 #include <seqan3/contrib/stream/gz_istream.hpp>
31#endif
35
36namespace seqan3::detail
37{
38
44template <std::ranges::forward_range ref_t, std::ranges::forward_range query_t>
45inline bool starts_with(ref_t && reference, query_t && query)
47 requires std::equality_comparable_with<std::ranges::range_reference_t<ref_t>,
48 std::ranges::range_reference_t<query_t>>
50{
51 auto rit = std::ranges::begin(reference);
52 auto rend = std::ranges::end(reference);
53
54 auto qit = std::ranges::begin(query);
55 auto qend = std::ranges::end(query);
56
57 while (true)
58 {
59 if (qit == qend)
60 return true;
61
62 if (rit == rend)
63 return false;
64
65 if (*qit != *rit)
66 return false;
67
68 ++qit;
69 ++rit;
70 }
71}
72
80template <builtin_character char_t>
81inline auto make_secondary_istream(std::basic_istream<char_t> & primary_stream, std::filesystem::path & filename)
83{
84 assert(primary_stream.good());
85
86 // don't assume ownership
87 constexpr auto stream_deleter_noop = [] (std::basic_istream<char_t> *) {};
88 // assume ownership
89 [[maybe_unused]] constexpr auto stream_deleter_default = [] (std::basic_istream<char_t> * ptr) { delete ptr; };
90
91 // extract "magic header"
92 std::istreambuf_iterator<char_t> it{primary_stream};
93 std::array<char, bgzf_compression::magic_header.size()> magic_number{}; // Largest magic header from bgzf
94 size_t read_chars = 0;
95 for (; read_chars < magic_number.size(); ++read_chars)
96 {
98 break;
99
100 magic_number[read_chars] = *it;
101 ++it;
102 }
103
104 // unget all read chars.
105 for (size_t i = 0 ; i < read_chars; ++i)
106 primary_stream.unget();
107
108 std::string extension{};
109 if (filename.has_extension())
110 extension = filename.extension().string().substr(1);
111
112 // tests whether the given extension matches with one of the given compression tags.
113 [[maybe_unused]] auto contains_extension = [] (auto compression_tag, auto const & extension) constexpr
114 {
115 return std::ranges::find(decltype(compression_tag)::file_extensions, extension) !=
116 std::ranges::end(decltype(compression_tag)::file_extensions);
117 };
118
119 // set return value appropriately
120 if (read_chars == magic_number.size() && bgzf_compression::validate_header(std::span{magic_number})) // BGZF
121 {
122 #if defined(SEQAN3_HAS_ZLIB)
123 if (contains_extension(gz_compression{}, extension) || contains_extension(bgzf_compression{}, extension))
124 filename.replace_extension();
125
126 return {new contrib::basic_bgzf_istream<char_t>{primary_stream},
127 stream_deleter_default};
128 #else
129 throw file_open_error{"Trying to read from a bgzf file, but no ZLIB available."};
130 #endif
131 }
132 else if (starts_with(magic_number, gz_compression::magic_header)) // GZIP
133 {
134 #if defined(SEQAN3_HAS_ZLIB)
135 if (contains_extension(gz_compression{}, extension) || contains_extension(bgzf_compression{}, extension))
136 filename.replace_extension();
137
138 return {new contrib::basic_gz_istream<char_t>{primary_stream}, stream_deleter_default};
139 #else
140 throw file_open_error{"Trying to read from a gzipped file, but no ZLIB available."};
141 #endif
142 }
143 else if (starts_with(magic_number, bz2_compression::magic_header)) // BZip2
144 {
145 #if defined(SEQAN3_HAS_BZIP2)
146 if (contains_extension(bz2_compression{}, extension))
147 filename.replace_extension();
148
149 return {new contrib::basic_bz2_istream<char_t>{primary_stream}, stream_deleter_default};
150 #else
151 throw file_open_error{"Trying to read from a bzipped file, but no libbz2 available."};
152 #endif
153 }
154 else if (starts_with(magic_number, zstd_compression::magic_header)) // ZStd
155 {
156 throw file_open_error{"Trying to read from a zst'ed file, but SeqAn does not yet support this."};
157 }
158
159 return {&primary_stream, stream_deleter_noop};
160}
161
163template <builtin_character char_t>
164inline auto make_secondary_istream(std::basic_istream<char_t> & primary_stream)
165{
167 return make_secondary_istream(primary_stream, p);
168}
169
170} // namespace seqan3::detail
The <algorithm> header from C++20's standard library.
T begin(T... args)
Provides stream compression utilities.
The <concepts> header from C++20's standard library.
Provides concepts for core language types and relations that don't have concepts in C++20 (yet).
The <filesystem> header from C++17's standard library.
constexpr ptrdiff_t find
Get the index of the first occurrence of a type in a pack.
Definition: traits.hpp:187
Provides exceptions used in the I/O module.
Provides seqan3::detail::magic_header.
The <ranges> header from C++20's standard library.
T rend(T... args)
T size(T... args)
Provides std::span from the C++20 standard library.