SeqAn3  3.0.3
The Modern C++ library for sequence analysis.
misc_input.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <seqan3/std/algorithm>
16 #include <seqan3/std/concepts>
17 #include <seqan3/std/filesystem>
18 #include <iostream>
19 #include <seqan3/std/ranges>
20 #include <seqan3/std/span>
21 #include <string>
22 #include <tuple>
23 
24 #ifdef SEQAN3_HAS_BZIP2
25  #include <seqan3/contrib/stream/bz2_istream.hpp>
26 #endif
27 #ifdef SEQAN3_HAS_ZLIB
28  #include <seqan3/contrib/stream/bgzf_istream.hpp>
30  #include <seqan3/contrib/stream/gz_istream.hpp>
31 #endif
33 #include <seqan3/io/exception.hpp>
35 
36 namespace seqan3::detail
37 {
38 
43 template <std::ranges::forward_range ref_t, std::ranges::forward_range query_t>
44 inline bool starts_with(ref_t && reference, query_t && query)
46  requires std::equality_comparable_with<std::ranges::range_reference_t<ref_t>,
47  std::ranges::range_reference_t<query_t>>
49 {
50  auto rit = std::ranges::begin(reference);
51  auto rend = std::ranges::end(reference);
52 
53  auto qit = std::ranges::begin(query);
54  auto qend = std::ranges::end(query);
55 
56  while (true)
57  {
58  if (qit == qend)
59  return true;
60 
61  if (rit == rend)
62  return false;
63 
64  if (*qit != *rit)
65  return false;
66 
67  ++qit;
68  ++rit;
69  }
70 }
71 
78 template <builtin_character char_t>
79 inline auto make_secondary_istream(std::basic_istream<char_t> & primary_stream, std::filesystem::path & filename)
81 {
82  assert(primary_stream.good());
83 
84  // don't assume ownership
85  constexpr auto stream_deleter_noop = [] (std::basic_istream<char_t> *) {};
86  // assume ownership
87  [[maybe_unused]] constexpr auto stream_deleter_default = [] (std::basic_istream<char_t> * ptr) { delete ptr; };
88 
89  // extract "magic header"
90  std::istreambuf_iterator<char_t> it{primary_stream};
91  std::array<char, bgzf_compression::magic_header.size()> magic_number{}; // Largest magic header from bgzf
92  size_t read_chars = 0;
93  for (; read_chars < magic_number.size(); ++read_chars)
94  {
96  break;
97 
98  magic_number[read_chars] = *it;
99  ++it;
100  }
101 
102  // unget all read chars.
103  for (size_t i = 0 ; i < read_chars; ++i)
104  primary_stream.unget();
105 
106  std::string extension{};
107  if (filename.has_extension())
108  extension = filename.extension().string().substr(1);
109 
110  // tests whether the given extension matches with one of the given compression tags.
111  [[maybe_unused]] auto contains_extension = [] (auto compression_tag, auto const & extension) constexpr
112  {
113  return std::ranges::find(decltype(compression_tag)::file_extensions, extension) !=
114  std::ranges::end(decltype(compression_tag)::file_extensions);
115  };
116 
117  // set return value appropriately
118  if (read_chars == magic_number.size() && bgzf_compression::validate_header(std::span{magic_number})) // BGZF
119  {
120  #ifdef SEQAN3_HAS_ZLIB
121  if (contains_extension(gz_compression{}, extension) || contains_extension(bgzf_compression{}, extension))
122  filename.replace_extension();
123 
124  return {new contrib::basic_bgzf_istream<char_t>{primary_stream},
125  stream_deleter_default};
126  #else
127  throw file_open_error{"Trying to read from a bgzf file, but no ZLIB available."};
128  #endif
129  }
130  else if (starts_with(magic_number, gz_compression::magic_header)) // GZIP
131  {
132  #ifdef SEQAN3_HAS_ZLIB
133  if (contains_extension(gz_compression{}, extension) || contains_extension(bgzf_compression{}, extension))
134  filename.replace_extension();
135 
136  return {new contrib::basic_gz_istream<char_t>{primary_stream}, stream_deleter_default};
137  #else
138  throw file_open_error{"Trying to read from a gzipped file, but no ZLIB available."};
139  #endif
140  }
141  else if (starts_with(magic_number, bz2_compression::magic_header)) // BZip2
142  {
143  #ifdef SEQAN3_HAS_BZIP2
144  if (contains_extension(bz2_compression{}, extension))
145  filename.replace_extension();
146 
147  return {new contrib::basic_bz2_istream<char_t>{primary_stream}, stream_deleter_default};
148  #else
149  throw file_open_error{"Trying to read from a bzipped file, but no libbz2 available."};
150  #endif
151  }
152  else if (starts_with(magic_number, zstd_compression::magic_header)) // ZStd
153  {
154  throw file_open_error{"Trying to read from a zst'ed file, but SeqAn does not yet support this."};
155  }
156 
157  return {&primary_stream, stream_deleter_noop};
158 }
159 
161 template <builtin_character char_t>
162 inline auto make_secondary_istream(std::basic_istream<char_t> & primary_stream)
163 {
165  return make_secondary_istream(primary_stream, p);
166 }
167 
168 } // namespace seqan3::detail
Adaptations of algorithms from the Ranges TS.
T begin(T... args)
Provides stream compression utilities.
The Concepts library.
Provides concepts for core language types and relations that don't have concepts in C++20 (yet).
This header includes C++17 filesystem support and imports it into namespace std::filesystem (independ...
constexpr ptrdiff_t find
Get the index of the first occurrence of a type in a pack.
Definition: traits.hpp:187
Provides exceptions used in the I/O module.
Provides seqan3::detail::magic_header.
Adaptations of concepts from the Ranges TS.
T rend(T... args)
T size(T... args)
Provides std::span from the C++20 standard library.