SeqAn3  3.0.1
The Modern C++ library for sequence analysis.
misc_input.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <iostream>
16 #include <string>
17 #include <tuple>
18 
20 #ifdef SEQAN3_HAS_BZIP2
21  #include <seqan3/contrib/stream/bz2_istream.hpp>
22 #endif
23 #ifdef SEQAN3_HAS_ZLIB
24  #include <seqan3/contrib/stream/bgzf_istream.hpp>
26  #include <seqan3/contrib/stream/gz_istream.hpp>
27 #endif
29 #include <seqan3/std/algorithm>
30 #include <seqan3/std/concepts>
31 #include <seqan3/std/filesystem>
32 #include <seqan3/std/ranges>
33 #include <seqan3/std/span>
34 
35 namespace seqan3::detail
36 {
37 
42 template <std::ranges::forward_range ref_t, std::ranges::forward_range query_t>
43 inline bool starts_with(ref_t && reference, query_t && query)
46  std::ranges::range_reference_t<query_t>>
48 {
49  auto rit = std::ranges::begin(reference);
50  auto rend = std::ranges::end(reference);
51 
52  auto qit = std::ranges::begin(query);
53  auto qend = std::ranges::end(query);
54 
55  while (true)
56  {
57  if (qit == qend)
58  return true;
59 
60  if (rit == rend)
61  return false;
62 
63  if (*qit != *rit)
64  return false;
65 
66  ++qit;
67  ++rit;
68  }
69 }
70 
77 template <builtin_character char_t>
78 inline auto make_secondary_istream(std::basic_istream<char_t> & primary_stream, std::filesystem::path & filename)
80 {
81  assert(primary_stream.good());
82 
83  // don't assume ownership
84  constexpr auto stream_deleter_noop = [] (std::basic_istream<char_t> *) {};
85  // assume ownership
86  [[maybe_unused]] constexpr auto stream_deleter_default = [] (std::basic_istream<char_t> * ptr) { delete ptr; };
87 
88  // extract "magic header"
89  std::istreambuf_iterator<char_t> it{primary_stream};
90  std::array<char, bgzf_compression::magic_header.size()> magic_number{}; // Largest magic header from bgzf
91  size_t read_chars = 0;
92  for (; read_chars < magic_number.size(); ++read_chars)
93  {
95  break;
96 
97  magic_number[read_chars] = *it;
98  ++it;
99  }
100 
101  // unget all read chars.
102  for (size_t i = 0 ; i < read_chars; ++i)
103  primary_stream.unget();
104 
105  std::string extension{};
106  if (filename.has_extension())
107  extension = filename.extension().string().substr(1);
108 
109  // tests whether the given extension matches with one of the given compression tags.
110  [[maybe_unused]] auto contains_extension = [] (auto compression_tag, auto const & extension) constexpr
111  {
112  return std::ranges::find(decltype(compression_tag)::file_extensions, extension) !=
113  std::ranges::end(decltype(compression_tag)::file_extensions);
114  };
115 
116  // set return value appropriately
117  if (read_chars == magic_number.size() && bgzf_compression::validate_header(std::span{magic_number})) // BGZF
118  {
119  #ifdef SEQAN3_HAS_ZLIB
120  if (contains_extension(gz_compression{}, extension) || contains_extension(bgzf_compression{}, extension))
121  filename.replace_extension();
122 
123  return {new contrib::basic_bgzf_istream<char_t>{primary_stream},
124  stream_deleter_default};
125  #else
126  throw file_open_error{"Trying to read from a bgzf file, but no ZLIB available."};
127  #endif
128  }
129  else if (starts_with(magic_number, gz_compression::magic_header)) // GZIP
130  {
131  #ifdef SEQAN3_HAS_ZLIB
132  if (contains_extension(gz_compression{}, extension) || contains_extension(bgzf_compression{}, extension))
133  filename.replace_extension();
134 
135  return {new contrib::basic_gz_istream<char_t>{primary_stream}, stream_deleter_default};
136  #else
137  throw file_open_error{"Trying to read from a gzipped file, but no ZLIB available."};
138  #endif
139  }
140  else if (starts_with(magic_number, bz2_compression::magic_header)) // BZip2
141  {
142  #ifdef SEQAN3_HAS_BZIP2
143  if (contains_extension(bz2_compression{}, extension))
144  filename.replace_extension();
145 
146  return {new contrib::basic_bz2_istream<char_t>{primary_stream}, stream_deleter_default};
147  #else
148  throw file_open_error{"Trying to read from a bzipped file, but no libbz2 available."};
149  #endif
150  }
151  else if (starts_with(magic_number, zstd_compression::magic_header)) // ZStd
152  {
153  throw file_open_error{"Trying to read from a zst'ed file, but SeqAn does not yet support this."};
154  }
155 
156  return {&primary_stream, stream_deleter_noop};
157 }
158 
160 template <builtin_character char_t>
161 inline auto make_secondary_istream(std::basic_istream<char_t> & primary_stream)
162 {
164  return make_secondary_istream(primary_stream, p);
165 }
166 
167 } // namespace seqan3::detail
span
Provides std::span from the C++20 standard library.
std::string
std::array::size
T size(T... args)
bgzf_stream_util.hpp
Provides stream compression utilities.
seqan3::pack_traits::find
constexpr ptrdiff_t find
Get the index of the first occurrence of a type in a pack.
Definition: traits.hpp:152
tuple
std::function
filesystem
This header includes C++17 filesystem support and imports it into namespace seqan3::filesystem (indep...
iostream
std::filesystem::path
algorithm
Adaptations of algorithms from the Ranges TS.
concepts
The Concepts library.
core_language.hpp
Provides concepts for core language types and relations that don't have concepts in C++20 (yet).
std::array
magic_header.hpp
Provides seqan3::detail::magic_header.
std::istreambuf_iterator
std::rend
T rend(T... args)
ranges
Adaptations of concepts from the Ranges TS.
std::ranges::begin
T begin(T... args)
equality_comparable_with
Requires seqan3::detail::weakly_equality_comparable_witht<t1,t2>, but also that t1 and t2,...
std::basic_istream
std::unique_ptr
string