SeqAn3  3.0.1
The Modern C++ library for sequence analysis.
output.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <cassert>
16 #include <fstream>
17 #include <string>
18 #include <variant>
19 #include <vector>
20 
21 // remove the following after range-v3 is updated to 1.0
22 #pragma GCC diagnostic push
23 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
24 
29 #include <seqan3/io/exception.hpp>
30 #include <seqan3/std/filesystem>
31 #include <seqan3/io/record.hpp>
35 #include <seqan3/io/detail/record.hpp>
46 #include <seqan3/std/ranges>
47 
48 namespace seqan3
49 {
50 
51 // ----------------------------------------------------------------------------
52 // sequence_file_output
53 // ----------------------------------------------------------------------------
54 
169 template <detail::fields_specialisation selected_field_ids_ = fields<field::seq, field::id, field::qual>,
170  detail::type_list_of_sequence_file_output_formats valid_formats_ =
171  type_list<format_embl, format_fasta, format_fastq, format_genbank, format_sam>>
173 {
174 public:
179  using selected_field_ids = selected_field_ids_;
182  using valid_formats = valid_formats_;
184  using stream_char_type = char;
186 
189 
190  static_assert([] () constexpr
191  {
192  for (field f : selected_field_ids::as_array)
193  if (!field_ids::contains(f))
194  return false;
195  return true;
196  }(),
197  "You selected a field that is not valid for sequence files, please refer to the documentation "
198  "of sequence_file_output::field_ids for the accepted values.");
199 
200  static_assert([] () constexpr
201  {
205  }(),
206  "You may not select field::seq_qual and either of field::seq and field::qual at the same time.");
207 
213  using value_type = void;
216  using reference = void;
218  using const_reference = void;
220  using size_type = void;
224  using iterator = detail::out_file_iterator<sequence_file_output>;
226  using const_iterator = void;
228  using sentinel = std::ranges::default_sentinel_t;
230 
234  sequence_file_output() = delete;
237  sequence_file_output(sequence_file_output const &) = delete;
245  ~sequence_file_output() = default;
246 
263  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
264  primary_stream{new std::ofstream{filename, std::ios_base::out | std::ios::binary}, stream_deleter_default}
265  {
266  if (!primary_stream->good())
267  throw file_open_error{"Could not open file " + filename.string() + " for writing."};
268 
269  // possibly add intermediate compression stream
270  secondary_stream = detail::make_secondary_ostream(*primary_stream, filename);
271 
272  // initialise format handler or throw if format is not found
273  detail::set_format(format, filename);
274  }
275 
291  template <output_stream stream_t,
292  sequence_file_output_format file_format>
296  sequence_file_output(stream_t & stream,
297  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
298  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
299  primary_stream{&stream, stream_deleter_noop},
300  secondary_stream{&stream, stream_deleter_noop},
301  format{detail::sequence_file_output_format_exposer<file_format>{}}
302  {
303  static_assert(list_traits::contains<file_format, valid_formats>,
304  "You selected a format that is not in the valid_formats of this file.");
305  }
306 
308  template <output_stream stream_t,
309  sequence_file_output_format file_format>
313  sequence_file_output(stream_t && stream,
314  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
315  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
316  primary_stream{new stream_t{std::move(stream)}, stream_deleter_default},
317  secondary_stream{&*primary_stream, stream_deleter_noop},
318  format{detail::sequence_file_output_format_exposer<file_format>{}}
319  {
320  static_assert(list_traits::contains<file_format, valid_formats>,
321  "You selected a format that is not in the valid_formats of this file.");
322  }
324 
346  iterator begin() noexcept
347  {
348  return {*this};
349  }
350 
365  sentinel end() noexcept
366  {
367  return {};
368  }
369 
388  template <typename record_t>
389  void push_back(record_t && r)
390  requires tuple_like<record_t> &&
391  requires { requires detail::is_type_specialisation_of_v<remove_cvref_t<record_t>, record>; }
392  {
393  write_record(detail::get_or_ignore<field::seq>(r),
394  detail::get_or_ignore<field::id>(r),
395  detail::get_or_ignore<field::qual>(r),
396  detail::get_or_ignore<field::seq_qual>(r));
397 
398  }
399 
421  template <typename tuple_t>
422  void push_back(tuple_t && t)
423  requires tuple_like<tuple_t>
424  {
425  // index_of might return npos, but this will be handled well by get_or_ignore (and just return ignore)
426  write_record(detail::get_or_ignore<selected_field_ids::index_of(field::seq)>(t),
427  detail::get_or_ignore<selected_field_ids::index_of(field::id)>(t),
428  detail::get_or_ignore<selected_field_ids::index_of(field::qual)>(t),
429  detail::get_or_ignore<selected_field_ids::index_of(field::seq_qual)>(t));
430  }
431 
455  template <typename arg_t, typename ...arg_types>
456  void emplace_back(arg_t && arg, arg_types && ... args)
457  {
458  push_back(std::tie(arg, args...));
459  }
460 
482  template <std::ranges::input_range rng_t>
483  sequence_file_output & operator=(rng_t && range)
485  {
486  for (auto && record : range)
487  push_back(std::forward<decltype(record)>(record));
488  return *this;
489  }
490 
518  template <std::ranges::input_range rng_t>
521  {
522  f = range;
523  return f;
524  }
525 
527  template <std::ranges::input_range rng_t>
530  {
531  #if defined(__GNUC__) && (__GNUC__ == 9) // an unreported build problem of GCC9
532  for (auto && record : range)
533  f.push_back(std::forward<decltype(record)>(record));
534  #else // ^^^ workaround | regular solution ↓↓↓
535  f = range;
536  #endif
537  return std::move(f);
538  }
540 
543 
548  {
549  return *secondary_stream;
550  }
552 protected:
554 
562  static void stream_deleter_noop(std::basic_ostream<stream_char_type> *) {}
564  static void stream_deleter_default(std::basic_ostream<stream_char_type> * ptr) { delete ptr; }
565 
567  stream_ptr_t primary_stream{nullptr, stream_deleter_noop};
569  stream_ptr_t secondary_stream{nullptr, stream_deleter_noop};
570 
572  using format_type = typename detail::variant_from_tags<valid_formats,
573  detail::sequence_file_output_format_exposer>::type;
575  format_type format;
577 
579  template <typename seq_t, typename id_t, typename qual_t, typename seq_qual_t>
580  void write_record(seq_t && seq, id_t && id, qual_t && qual, seq_qual_t && seq_qual)
581  {
582  static_assert(detail::decays_to_ignore_v<seq_qual_t> ||
583  (detail::decays_to_ignore_v<seq_t> && detail::decays_to_ignore_v<qual_t>),
584  "You may not select field::seq_qual and either of field::seq and field::qual at the same time.");
585 
586  if constexpr (!detail::decays_to_ignore_v<seq_qual_t>)
587  static_assert(detail::is_type_specialisation_of_v<value_type_t<seq_qual_t>, qualified>,
588  "The SEQ_QUAL field must contain a range over the seqan3::qualified alphabet.");
589 
590  assert(!format.valueless_by_exception());
591  std::visit([&] (auto & f)
592  {
593  if constexpr (!detail::decays_to_ignore_v<seq_qual_t>)
594  {
595  f.write_sequence_record(*secondary_stream,
596  options,
597  seq_qual | views::get<0>,
598  id,
599  seq_qual | views::get<1>);
600  }
601  else
602  {
603  f.write_sequence_record(*secondary_stream,
604  options,
605  seq,
606  id,
607  qual);
608  }
609  }, format);
610  }
611 
613  friend iterator;
614 };
615 
621 template <output_stream stream_t,
623  sequence_file_output_format file_format>
624 sequence_file_output(stream_t &,
625  file_format const &)
627  type_list<file_format>>;
628 
630 template <output_stream stream_t,
631  sequence_file_output_format file_format>
632 sequence_file_output(stream_t &&,
633  file_format const &)
635  type_list<file_format>>;
636 
638 template <output_stream stream_t,
639  sequence_file_output_format file_format,
640  detail::fields_specialisation selected_field_ids>
641 sequence_file_output(stream_t &&,
642  file_format const &,
643  selected_field_ids const &)
644  -> sequence_file_output<selected_field_ids,
645  type_list<file_format>>;
646 
648 template <output_stream stream_t,
649  sequence_file_output_format file_format,
650  detail::fields_specialisation selected_field_ids>
651 sequence_file_output(stream_t &,
652  file_format const &,
653  selected_field_ids const &)
654  -> sequence_file_output<selected_field_ids,
655  type_list<file_format>>;
657 } // namespace seqan3
658 
659 #pragma GCC diagnostic pop
seqan3::sequence_file_output::sentinel
std::ranges::default_sentinel_t sentinel
The type returned by end().
Definition: output.hpp:228
seqan3::field::seq_qual
Sequence and qualities combined in one range.
seqan3::sequence_file_output::reference
void reference
The reference type (void).
Definition: output.hpp:216
zip.hpp
Provides seqan3::views::zip.
seqan3::field::seq
The "sequence", usually a range of nucleotides or amino acids.
seqan3::sequence_file_output::push_back
void push_back(tuple_t &&t) requires tuple_like< tuple_t >
Write a record in form of a std::tuple to the file.
Definition: output.hpp:422
seqan3::sequence_file_output::options
sequence_file_output_options options
The options are public and its members can be set directly.
Definition: output.hpp:542
fstream
pack_algorithm.hpp
Provides algorithms for meta programming, parameter packs and seqan3::type_list.
misc_output.hpp
Provides various utility functions required only for output.
tuple.hpp
Provides seqan3::tuple_like.
concept.hpp
Stream concepts.
vector
seqan3::sequence_file_output::value_type
void value_type
The value type (void).
Definition: output.hpp:214
convert.hpp
Provides seqan3::views::convert.
seqan3::field::id
The identifier, usually a string.
format_sam.hpp
Provides the seqan3::format_sam.
seqan3::views::move
const auto move
A view that turns lvalue-references into rvalue-references.
Definition: move.hpp:68
seqan3::sequence_file_output::begin
iterator begin() noexcept
Returns an iterator to current position in the file.
Definition: output.hpp:346
record.hpp
Provides the seqan3::record template and the seqan3::field enum.
std::function
seqan3::sequence_file_output::operator=
sequence_file_output & operator=(rng_t &&range) requires tuple_like< reference_t< rng_t >>
Write a range of records (or tuples) to the file.
Definition: output.hpp:483
filesystem
This header includes C++17 filesystem support and imports it into namespace seqan3::filesystem (indep...
seqan3::sequence_file_output::operator|
friend sequence_file_output & operator|(rng_t &&range, sequence_file_output &f) requires tuple_like< reference_t< rng_t >>
Write a range of records (or tuples) to the file.
Definition: output.hpp:519
seqan3::sequence_file_output::valid_formats
valid_formats_ valid_formats
A seqan3::type_list with the possible formats.
Definition: output.hpp:182
std::filesystem::path
seqan3::pack_traits::contains
constexpr bool contains
Whether a type occurs in a pack or not.
Definition: traits.hpp:193
seqan3::sequence_file_output::operator|
friend sequence_file_output operator|(rng_t &&range, sequence_file_output &&f) requires tuple_like< reference_t< rng_t >>
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: output.hpp:528
seqan3::fields
A class template that holds a choice of seqan3::field.
Definition: record.hpp:165
std::tie
T tie(T... args)
seqan3::sequence_file_output::operator=
sequence_file_output & operator=(sequence_file_output const &)=delete
Copy assignment is explicitly deleted, because you can't have multiple access to the same file.
same_as
The concept std::same_as<T, U> is satisfied if and only if T and U denote the same type.
format_fastq.hpp
std::basic_ostream
std::forward
T forward(T... args)
std::ofstream
seqan3::sequence_file_output::end
sentinel end() noexcept
Returns a sentinel for comparison with iterator.
Definition: output.hpp:365
seqan3::sequence_file_output::selected_field_ids
selected_field_ids_ selected_field_ids
A seqan3::fields list with the fields selected for the record.
Definition: output.hpp:180
seqan3::sequence_file_output::push_back
void push_back(record_t &&r) requires tuple_like< record_t > &&requires
Write a seqan3::record to the file.
Definition: output.hpp:389
exception.hpp
Provides exceptions used in the I/O module.
seqan3
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:36
seqan3::sequence_file_output
A class for writing sequence files, e.g. FASTA, FASTQ ...
Definition: output.hpp:172
seqan3::sequence_file_output_options
The options type defines various option members that influence the behaviour of all or some formats.
Definition: output_options.hpp:21
output_options.hpp
Provides seqan3::sequence_file_output_options.
seqan3::sequence_file_output::const_reference
void const_reference
The const reference type (void).
Definition: output.hpp:218
format_fasta.hpp
tuple_like
Whether a type behaves like a tuple.
seqan3::sequence_file_output::sequence_file_output
sequence_file_output(stream_t &stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: output.hpp:296
output_format_concept.hpp
Provides seqan3::SequenceFileFormatOut and auxiliary classes.
ranges
Adaptations of concepts from the Ranges TS.
sequence_file_output_format
The generic concept for sequence file out formats.
seqan3::sequence_file_output::emplace_back
void emplace_back(arg_t &&arg, arg_types &&... args)
Write a record to the file by passing individual fields.
Definition: output.hpp:456
seqan3::sequence_file_output::const_iterator
void const_iterator
The const iterator type is void, because files are not const-iterable.
Definition: output.hpp:226
seqan3::sequence_file_output::iterator
detail::out_file_iterator< sequence_file_output > iterator
The iterator type of this view (an output iterator).
Definition: output.hpp:224
seqan3::sequence_file_output::~sequence_file_output
~sequence_file_output()=default
Destructor is defaulted.
seqan3::sequence_file_output::size_type
void size_type
The size type (void).
Definition: output.hpp:220
cassert
seqan3::field
field
An enumerator for the fields used in file formats.
Definition: record.hpp:64
format_embl.hpp
seqan3::sequence_file_output::sequence_file_output
sequence_file_output(stream_t &&stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: output.hpp:313
seqan3::record
The class template that file records are based on; behaves like an std::tuple.
Definition: record.hpp:225
std::ptrdiff_t
seqan3::sequence_file_output::sequence_file_output
sequence_file_output()=delete
Default constructor is explicitly deleted, you need to give a stream or file name.
std::visit
T visit(T... args)
seqan3::field::qual
The qualities, usually in phred-score notation.
seqan3::sequence_file_output::stream_char_type
char stream_char_type
Character type of the stream(s).
Definition: output.hpp:184
out_file_iterator.hpp
Provides the seqan3::detail::out_file_iterator class template.
seqan3::sequence_file_output::sequence_file_output
sequence_file_output(std::filesystem::path filename, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename.
Definition: output.hpp:262
misc.hpp
Provides various utility functions.
traits.hpp
Provides traits for seqan3::type_list.
std::unique_ptr
format_genbank.hpp
Provides the seqan3::sequence_file_format_genbank class.
get.hpp
Provides seqan3::views::get.
variant
string