SeqAn3  3.0.3
The Modern C++ library for sequence analysis.
output.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <cassert>
16 #include <seqan3/std/filesystem>
17 #include <fstream>
18 #include <seqan3/std/ranges>
19 #include <string>
20 #include <variant>
21 #include <vector>
22 
27 #include <seqan3/io/detail/record.hpp>
29 #include <seqan3/io/exception.hpp>
30 #include <seqan3/io/record.hpp>
44 
45 namespace seqan3
46 {
47 
48 // ----------------------------------------------------------------------------
49 // sequence_file_output
50 // ----------------------------------------------------------------------------
51 
164 template <detail::fields_specialisation selected_field_ids_ = fields<field::seq, field::id, field::qual>,
165  detail::type_list_of_sequence_file_output_formats valid_formats_ =
166  type_list<format_embl, format_fasta, format_fastq, format_genbank, format_sam>>
168 {
169 public:
175  using selected_field_ids = selected_field_ids_;
177  using valid_formats = valid_formats_;
179  using stream_char_type = char;
181 
183 #ifdef SEQAN3_DEPRECATED_310
185 #else // ^^^ before seqan 3.1 / after seqan 3.1 vvv
187 #endif // SEQAN3_DEPRECATED_310
188 
189  static_assert([] () constexpr
190  {
191  for (field f : selected_field_ids::as_array)
192  if (!field_ids::contains(f))
193  return false;
194  return true;
195  }(),
196  "You selected a field that is not valid for sequence files, please refer to the documentation "
197  "of sequence_file_output::field_ids for the accepted values.");
198 
199 #ifdef SEQAN3_DEPRECATED_310
200  static_assert([] () constexpr
201  {
205  }(),
206  "You may not select field::seq_qual and either of field::seq and field::qual at the same time.");
207 #endif // SEQAN3_DEPRECATED_310
208 
215  using value_type = void;
217  using reference = void;
219  using const_reference = void;
221  using size_type = void;
225  using iterator = detail::out_file_iterator<sequence_file_output>;
227  using const_iterator = void;
229  using sentinel = std::default_sentinel_t;
231 
247 
264  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
265  primary_stream{new std::ofstream{}, stream_deleter_default}
266  {
267  primary_stream->rdbuf()->pubsetbuf(stream_buffer.data(), stream_buffer.size());
268  static_cast<std::basic_ofstream<char> *>(primary_stream.get())->open(filename,
269  std::ios_base::out | std::ios::binary);
270 
271  if (!primary_stream->good())
272  throw file_open_error{"Could not open file " + filename.string() + " for writing."};
273 
274  // possibly add intermediate compression stream
275  secondary_stream = detail::make_secondary_ostream(*primary_stream, filename);
276 
277  // initialise format handler or throw if format is not found
278  detail::set_format(format, filename);
279  }
280 
296  template <output_stream stream_t,
297  sequence_file_output_format file_format>
299  requires std::same_as<typename std::remove_reference_t<stream_t>::char_type, stream_char_type>
301  sequence_file_output(stream_t & stream,
302  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
303  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
304  primary_stream{&stream, stream_deleter_noop},
305  secondary_stream{&stream, stream_deleter_noop},
306  format{detail::sequence_file_output_format_exposer<file_format>{}}
307  {
308  static_assert(list_traits::contains<file_format, valid_formats>,
309  "You selected a format that is not in the valid_formats of this file.");
310  }
311 
313  template <output_stream stream_t,
314  sequence_file_output_format file_format>
316  requires std::same_as<typename std::remove_reference_t<stream_t>::char_type, stream_char_type>
318  sequence_file_output(stream_t && stream,
319  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
320  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
321  primary_stream{new stream_t{std::move(stream)}, stream_deleter_default},
322  secondary_stream{&*primary_stream, stream_deleter_noop},
323  format{detail::sequence_file_output_format_exposer<file_format>{}}
324  {
325  static_assert(list_traits::contains<file_format, valid_formats>,
326  "You selected a format that is not in the valid_formats of this file.");
327  }
329 
351  iterator begin() noexcept
352  {
353  return {*this};
354  }
355 
370  sentinel end() noexcept
371  {
372  return {};
373  }
374 
393  template <typename record_t>
394  void push_back(record_t && r)
396  requires detail::record_like<record_t>
398  {
399 #ifdef SEQAN3_DEPRECATED_310
400  write_record(detail::get_or_ignore<field::seq>(r),
401  detail::get_or_ignore<field::id>(r),
402  detail::get_or_ignore<field::qual>(r),
403  detail::get_or_ignore<field::_seq_qual_deprecated>(r));
404 #else // ^^^ before seqan 3.1 / after seqan 3.1 vvv
405  write_record(detail::get_or_ignore<field::seq>(r),
406  detail::get_or_ignore<field::id>(r),
407  detail::get_or_ignore<field::qual>(r));
408 #endif // SEQAN3_DEPRECATED_310
409  }
410 
432  template <typename tuple_t>
433  void push_back(tuple_t && t)
435  requires tuple_like<tuple_t> && (!detail::record_like<tuple_t>)
437  {
438  // index_of might return npos, but this will be handled well by get_or_ignore (and just return ignore)
439 #ifdef SEQAN3_DEPRECATED_310
440  write_record(detail::get_or_ignore<selected_field_ids::index_of(field::seq)>(t),
441  detail::get_or_ignore<selected_field_ids::index_of(field::id)>(t),
442  detail::get_or_ignore<selected_field_ids::index_of(field::qual)>(t),
443  detail::get_or_ignore<selected_field_ids::index_of(field::_seq_qual_deprecated)>(t));
444 #else // ^^^ before seqan 3.1 / after seqan 3.1 vvv
445  write_record(detail::get_or_ignore<selected_field_ids::index_of(field::seq)>(t),
446  detail::get_or_ignore<selected_field_ids::index_of(field::id)>(t),
447  detail::get_or_ignore<selected_field_ids::index_of(field::qual)>(t));
448 #endif // SEQAN3_DEPRECATED_310
449  }
450 
474  template <typename arg_t, typename ...arg_types>
475  void emplace_back(arg_t && arg, arg_types && ... args)
476  {
477  push_back(std::tie(arg, args...));
478  }
479 
501  template <std::ranges::input_range rng_t>
502  sequence_file_output & operator=(rng_t && range)
506  {
507  for (auto && record : range)
508  push_back(std::forward<decltype(record)>(record));
509  return *this;
510  }
511 
539  template <std::ranges::input_range rng_t>
544  {
545  f = range;
546  return f;
547  }
548 
550  template <std::ranges::input_range rng_t>
555  {
556  #if defined(__GNUC__) && (__GNUC__ == 9) // an unreported build problem of GCC9
557  for (auto && record : range)
558  f.push_back(std::forward<decltype(record)>(record));
559  #else // ^^^ workaround | regular solution ↓↓↓
560  f = range;
561  #endif
562  return std::move(f);
563  }
565 
568 
573  {
574  return *secondary_stream;
575  }
577 protected:
580  std::vector<char> stream_buffer{std::vector<char>(1'000'000)};
581 
589  static void stream_deleter_noop(std::basic_ostream<stream_char_type> *) {}
591  static void stream_deleter_default(std::basic_ostream<stream_char_type> * ptr) { delete ptr; }
592 
594  stream_ptr_t primary_stream{nullptr, stream_deleter_noop};
596  stream_ptr_t secondary_stream{nullptr, stream_deleter_noop};
597 
599  using format_type = typename detail::variant_from_tags<valid_formats,
600  detail::sequence_file_output_format_exposer>::type;
602  format_type format;
604 
606 #ifdef SEQAN3_DEPRECATED_310
607  template <typename seq_t, typename id_t, typename qual_t, typename seq_qual_t>
608  void write_record(seq_t && seq, id_t && id, qual_t && qual, seq_qual_t && seq_qual)
609 #else // ^^^ before seqan 3.1 / after seqan 3.1 vvv
610  template <typename seq_t, typename id_t, typename qual_t>
611  void write_record(seq_t && seq, id_t && id, qual_t && qual)
612 #endif // SEQAN3_DEPRECATED_310
613  {
614 #ifdef SEQAN3_DEPRECATED_310
615  static_assert(detail::decays_to_ignore_v<seq_qual_t> ||
616  (detail::decays_to_ignore_v<seq_t> && detail::decays_to_ignore_v<qual_t>),
617  "You may not select field::seq_qual and either of field::seq and field::qual at the same time.");
618 
619  if constexpr (!detail::decays_to_ignore_v<seq_qual_t>)
620  static_assert(detail::is_type_specialisation_of_v<std::ranges::range_value_t<seq_qual_t>, qualified>,
621  "The SEQ_QUAL field must contain a range over the seqan3::qualified alphabet.");
622 #endif // SEQAN3_DEPRECATED_310
623 
624  assert(!format.valueless_by_exception());
625  std::visit([&] (auto & f)
626  {
627 #ifdef SEQAN3_DEPRECATED_310
628  if constexpr (!detail::decays_to_ignore_v<seq_qual_t>)
629  {
630  f.write_sequence_record(*secondary_stream,
631  options,
632  seq_qual | views::elements<0>,
633  id,
634  seq_qual | views::elements<1>);
635  }
636  else
637 #endif // SEQAN3_DEPRECATED_310
638  {
639  f.write_sequence_record(*secondary_stream,
640  options,
641  seq,
642  id,
643  qual);
644  }
645  }, format);
646  }
647 
649  friend iterator;
650 };
651 
658 template <output_stream stream_t,
659  sequence_file_output_format file_format>
661  file_format const &)
664 
666 template <output_stream stream_t,
667  sequence_file_output_format file_format>
669  file_format const &)
672 
674 template <output_stream stream_t,
675  sequence_file_output_format file_format,
676  detail::fields_specialisation selected_field_ids>
678  file_format const &,
679  selected_field_ids const &)
682 
684 template <output_stream stream_t,
685  sequence_file_output_format file_format,
686  detail::fields_specialisation selected_field_ids>
688  file_format const &,
689  selected_field_ids const &)
693 } // namespace seqan3
A class for writing sequence files, e.g. FASTA, FASTQ ...
Definition: output.hpp:168
void push_back(record_t &&r)
Write a seqan3::record to the file.
Definition: output.hpp:394
sequence_file_output(std::filesystem::path filename, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename.
Definition: output.hpp:263
void const_iterator
The const iterator type is void, because files are not const-iterable.
Definition: output.hpp:227
sequence_file_output & operator=(sequence_file_output const &)=delete
Copy assignment is explicitly deleted, because you can't have multiple access to the same file.
char stream_char_type
Character type of the stream(s).
Definition: output.hpp:179
std::default_sentinel_t sentinel
The type returned by end().
Definition: output.hpp:229
sequence_file_output(stream_t &&, file_format const &, selected_field_ids const &) -> sequence_file_output< selected_field_ids, type_list< file_format >>
Deduction guide for given stream, file format and field ids.
friend sequence_file_output operator|(rng_t &&range, sequence_file_output &&f)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: output.hpp:551
sequence_file_output(stream_t &&, file_format const &) -> sequence_file_output< typename sequence_file_output<>::selected_field_ids, type_list< file_format >>
This is an overloaded member function, provided for convenience. It differs from the above function o...
sequence_file_output & operator=(rng_t &&range)
Write a range of records (or tuples) to the file.
Definition: output.hpp:502
sequence_file_output(sequence_file_output &&)=default
Move construction is defaulted.
void push_back(tuple_t &&t)
Write a record in form of a std::tuple to the file.
Definition: output.hpp:433
sequence_file_output(stream_t &, file_format const &) -> sequence_file_output< typename sequence_file_output<>::selected_field_ids, type_list< file_format >>
Deduction guide for given stream and file format.
friend sequence_file_output & operator|(rng_t &&range, sequence_file_output &f)
Write a range of records (or tuples) to the file.
Definition: output.hpp:540
sequence_file_output(sequence_file_output const &)=delete
Copy construction is explicitly deleted, because you can't have multiple access to the same file.
valid_formats_ valid_formats
A seqan3::type_list with the possible formats.
Definition: output.hpp:177
sequence_file_output(stream_t &, file_format const &, selected_field_ids const &) -> sequence_file_output< selected_field_ids, type_list< file_format >>
This is an overloaded member function, provided for convenience. It differs from the above function o...
selected_field_ids_ selected_field_ids
A seqan3::fields list with the fields selected for the record.
Definition: output.hpp:175
~sequence_file_output()=default
Destructor is defaulted.
sequence_file_output_options options
The options are public and its members can be set directly.
Definition: output.hpp:567
void emplace_back(arg_t &&arg, arg_types &&... args)
Write a record to the file by passing individual fields.
Definition: output.hpp:475
void value_type
The value type (void).
Definition: output.hpp:215
sentinel end() noexcept
Returns a sentinel for comparison with iterator.
Definition: output.hpp:370
void reference
The reference type (void).
Definition: output.hpp:217
sequence_file_output & operator=(sequence_file_output &&)=default
Move assignment is defaulted.
void const_reference
The const reference type (void).
Definition: output.hpp:219
sequence_file_output()=delete
Default constructor is explicitly deleted, you need to give a stream or file name.
detail::out_file_iterator< sequence_file_output > iterator
The iterator type of this view (an output iterator).
Definition: output.hpp:225
sequence_file_output(stream_t &stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: output.hpp:301
sequence_file_output(stream_t &&stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: output.hpp:318
iterator begin() noexcept
Returns an iterator to current position in the file.
Definition: output.hpp:351
void size_type
The size type (void).
Definition: output.hpp:221
T data(T... args)
Provides seqan3::views::elements.
This header includes C++17 filesystem support and imports it into namespace std::filesystem (independ...
Provides the seqan3::sequence_file_format_genbank class.
T format(T... args)
T forward(T... args)
T get(T... args)
field
An enumerator for the fields used in file formats.
Definition: record.hpp:63
@ _seq_qual_deprecated
[DEPRECATED] Sequence and qualities combined in one range. Use field::seq and field::qual instead.
@ id
The identifier, usually a string.
@ seq
The "sequence", usually a range of nucleotides or amino acids.
@ qual
The qualities, usually in Phred score notation.
constexpr bool contains
Whether a type occurs in a type list or not.
Definition: traits.hpp:231
auto const move
A view that turns lvalue-references into rvalue-references.
Definition: move.hpp:74
The generic concept for sequence file out formats.
Whether a type behaves like a tuple.
Provides various utility functions.
Provides exceptions used in the I/O module.
Stream concepts.
Provides various utility functions required only for output.
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
Provides the seqan3::detail::out_file_iterator class template.
Provides algorithms for meta programming, parameter packs and seqan3::type_list.
Adaptations of concepts from the Ranges TS.
Provides the seqan3::record template and the seqan3::field enum.
Provides seqan3::detail::record_like.
Provides the seqan3::format_sam.
Provides seqan3::sequence_file_output_format and auxiliary classes.
Provides seqan3::sequence_file_output_options.
T size(T... args)
A class template that holds a choice of seqan3::field.
Definition: record.hpp:172
The class template that file records are based on; behaves like an std::tuple.
Definition: record.hpp:235
The options type defines various option members that influence the behaviour of all or some formats.
Definition: output_options.hpp:22
Type that contains multiple types.
Definition: type_list.hpp:29
T tie(T... args)
Provides seqan3::tuple_like.
Provides traits for seqan3::type_list.
Provides seqan3::views::convert.
Provides seqan3::views::zip.
T visit(T... args)