SeqAn3  3.0.0
The Modern C++ library for sequence analysis.
output.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2019, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2019, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <cassert>
16 #include <fstream>
17 #include <string>
18 #include <variant>
19 #include <vector>
20 
21 #include <range/v3/algorithm/equal.hpp>
22 #include <range/v3/view/zip.hpp>
23 
28 #include <seqan3/io/exception.hpp>
29 #include <seqan3/std/filesystem>
30 #include <seqan3/io/record.hpp>
34 #include <seqan3/io/detail/record.hpp>
44 #include <seqan3/std/ranges>
45 
46 namespace seqan3
47 {
48 
49 // ----------------------------------------------------------------------------
50 // sequence_file_output
51 // ----------------------------------------------------------------------------
52 
168 template <detail::Fields selected_field_ids_ = fields<field::SEQ, field::ID, field::QUAL>,
169  detail::TypeListOfSequenceFileOutputFormats valid_formats_ =
170  type_list<format_embl, format_fasta, format_fastq, format_genbank, format_sam>,
171  Char stream_char_type_ = char>
173 {
174 public:
179  using selected_field_ids = selected_field_ids_;
182  using valid_formats = valid_formats_;
184  using stream_char_type = stream_char_type_;
186 
189 
190  static_assert([] () constexpr
191  {
192  for (field f : selected_field_ids::as_array)
193  if (!field_ids::contains(f))
194  return false;
195  return true;
196  }(),
197  "You selected a field that is not valid for sequence files, please refer to the documentation "
198  "of sequence_file_output::field_ids for the accepted values.");
199 
200  static_assert([] () constexpr
201  {
202  return !(selected_field_ids::contains(field::SEQ_QUAL) &&
203  (selected_field_ids::contains(field::SEQ) ||
204  (selected_field_ids::contains(field::QUAL))));
205  }(),
206  "You may not select field::SEQ_QUAL and either of field::SEQ and field::QUAL at the same time.");
207 
213  using value_type = void;
216  using reference = void;
218  using const_reference = void;
220  using size_type = void;
224  using iterator = detail::out_file_iterator<sequence_file_output>;
226  using const_iterator = void;
230 
234  sequence_file_output() = delete;
237  sequence_file_output(sequence_file_output const &) = delete;
245  ~sequence_file_output() = default;
246 
263  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
264  primary_stream{new std::ofstream{filename, std::ios_base::out | std::ios::binary}, stream_deleter_default}
265  {
266  if (!primary_stream->good())
267  throw file_open_error{"Could not open file " + filename.string() + " for writing."};
268 
269  // possibly add intermediate compression stream
270  secondary_stream = detail::make_secondary_ostream(*primary_stream, filename);
271 
272  // initialise format handler or throw if format is not found
273  detail::set_format(format, filename);
274  }
275 
291  template <OStream2 stream_t,
292  SequenceFileOutputFormat file_format>
293  sequence_file_output(stream_t & stream,
294  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
295  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
296  primary_stream{&stream, stream_deleter_noop},
297  secondary_stream{&stream, stream_deleter_noop},
298  format{detail::sequence_file_output_format<file_format>{}}
299  {
300  static_assert(meta::in<valid_formats, file_format>::value,
301  "You selected a format that is not in the valid_formats of this file.");
302  }
303 
305  template <OStream2 stream_t,
306  SequenceFileOutputFormat file_format>
307  sequence_file_output(stream_t && stream,
308  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
309  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
310  primary_stream{new stream_t{std::move(stream)}, stream_deleter_default},
311  secondary_stream{&*primary_stream, stream_deleter_noop},
312  format{detail::sequence_file_output_format<file_format>{}}
313  {
314  static_assert(meta::in<valid_formats, file_format>::value,
315  "You selected a format that is not in the valid_formats of this file.");
316  }
318 
340  iterator begin() noexcept
341  {
342  return {*this};
343  }
344 
359  sentinel end() noexcept
360  {
361  return {};
362  }
363 
382  template <typename record_t>
383  void push_back(record_t && r)
384  requires TupleLike<record_t> &&
385  requires { requires detail::is_type_specialisation_of_v<remove_cvref_t<record_t>, record>; }
386  {
387  write_record(detail::get_or_ignore<field::SEQ>(r),
388  detail::get_or_ignore<field::ID>(r),
389  detail::get_or_ignore<field::QUAL>(r),
390  detail::get_or_ignore<field::SEQ_QUAL>(r));
391 
392  }
393 
415  template <typename tuple_t>
416  void push_back(tuple_t && t)
417  requires TupleLike<tuple_t>
418  {
419  // index_of might return npos, but this will be handled well by get_or_ignore (and just return ignore)
420  write_record(detail::get_or_ignore<selected_field_ids::index_of(field::SEQ)>(t),
421  detail::get_or_ignore<selected_field_ids::index_of(field::ID)>(t),
422  detail::get_or_ignore<selected_field_ids::index_of(field::QUAL)>(t),
423  detail::get_or_ignore<selected_field_ids::index_of(field::SEQ_QUAL)>(t));
424  }
425 
449  template <typename arg_t, typename ... arg_types>
450  void emplace_back(arg_t && arg, arg_types && ... args)
451  {
452  push_back(std::tie(arg, args...));
453  }
454 
476  template <std::ranges::InputRange rng_t>
477  sequence_file_output & operator=(rng_t && range)
479  {
480  for (auto && record : range)
481  push_back(std::forward<decltype(record)>(record));
482  return *this;
483  }
484 
512  template <std::ranges::InputRange rng_t>
515  {
516  f = range;
517  return f;
518  }
519 
521  template <std::ranges::InputRange rng_t>
524  {
525  #if defined(__GNUC__) && (__GNUC__ == 9) // an unreported build problem of GCC9
526  for (auto && record : range)
527  f.push_back(std::forward<decltype(record)>(record));
528  #else // ^^^ workaround | regular solution ↓↓↓
529  f = range;
530  #endif
531  return std::move(f);
532  }
534 
561  template <typename typelist, typename field_ids>
563  {
564  write_columns(detail::range_wrap_ignore(detail::get_or_ignore<field::SEQ>(r)),
565  detail::range_wrap_ignore(detail::get_or_ignore<field::ID>(r)),
566  detail::range_wrap_ignore(detail::get_or_ignore<field::QUAL>(r)),
567  detail::range_wrap_ignore(detail::get_or_ignore<field::SEQ_QUAL>(r)));
568  return *this;
569  }
570 
592  template <typename ... arg_types>
594  {
595  // index_of might return npos, but this will be handled well by get_or_ignore (and just return ignore)
596  write_columns(
597  detail::range_wrap_ignore(detail::get_or_ignore<selected_field_ids::index_of(field::SEQ)>(t)),
598  detail::range_wrap_ignore(detail::get_or_ignore<selected_field_ids::index_of(field::ID)>(t)),
599  detail::range_wrap_ignore(detail::get_or_ignore<selected_field_ids::index_of(field::QUAL)>(t)),
600  detail::range_wrap_ignore(detail::get_or_ignore<selected_field_ids::index_of(field::SEQ_QUAL)>(t)));
601  return *this;
602  }
604 
607 
612  {
613  return *secondary_stream;
614  }
616 protected:
618 
626  static void stream_deleter_noop(std::basic_ostream<stream_char_type> *) {}
628  static void stream_deleter_default(std::basic_ostream<stream_char_type> * ptr) { delete ptr; }
629 
631  stream_ptr_t primary_stream{nullptr, stream_deleter_noop};
633  stream_ptr_t secondary_stream{nullptr, stream_deleter_noop};
634 
636  using format_type = typename detail::variant_from_tags<valid_formats, detail::sequence_file_output_format>::type;
638  format_type format;
640 
642  template <typename seq_t, typename id_t, typename qual_t, typename seq_qual_t>
643  void write_record(seq_t && seq, id_t && id, qual_t && qual, seq_qual_t && seq_qual)
644  {
645  static_assert(detail::decays_to_ignore_v<seq_qual_t> ||
646  (detail::decays_to_ignore_v<seq_t> && detail::decays_to_ignore_v<qual_t>),
647  "You may not select field::SEQ_QUAL and either of field::SEQ and field::QUAL at the same time.");
648 
649  if constexpr (!detail::decays_to_ignore_v<seq_qual_t>)
650  static_assert(detail::is_type_specialisation_of_v<value_type_t<seq_qual_t>, qualified>,
651  "The SEQ_QUAL field must contain a range over the seqan3::qualified alphabet.");
652 
653  assert(!format.valueless_by_exception());
654  std::visit([&] (auto & f)
655  {
656  if constexpr (!detail::decays_to_ignore_v<seq_qual_t>)
657  {
658  f.write(*secondary_stream,
659  options,
660  seq_qual | view::get<0>,
661  id,
662  seq_qual | view::get<1>);
663  }
664  else
665  {
666  f.write(*secondary_stream,
667  options,
668  seq,
669  id,
670  qual);
671  }
672  }, format);
673  }
674 
676  template <std::ranges::InputRange seqs_t,
678  std::ranges::InputRange quals_t,
679  std::ranges::InputRange seq_quals_t>
680  void write_columns(seqs_t && seqs,
681  ids_t && ids,
682  quals_t && quals,
683  seq_quals_t && seq_quals)
684  {
685  static_assert(!(detail::decays_to_ignore_v<reference_t<seqs_t>> &&
686  detail::decays_to_ignore_v<reference_t<ids_t>> &&
687  detail::decays_to_ignore_v<reference_t<quals_t>> &&
688  detail::decays_to_ignore_v<reference_t<seq_quals_t>>),
689  "At least one of the columns must not be set to std::ignore.");
690 
691  static_assert(detail::decays_to_ignore_v<reference_t<seq_quals_t>> ||
692  (detail::decays_to_ignore_v<reference_t<seqs_t>> &&
693  detail::decays_to_ignore_v<reference_t<quals_t>>),
694  "You may not select field::SEQ_QUAL and either of field::SEQ and field::QUAL at the same time.");
695 
696  if constexpr (!detail::decays_to_ignore_v<reference_t<seq_quals_t>>)
697  static_assert(detail::is_type_specialisation_of_v<value_type_t<reference_t<seq_quals_t>>, qualified>,
698  "The SEQ_QUAL field must contain a range over the seqan3::qualified alphabet.");
699 
700  assert(!format.valueless_by_exception());
701  std::visit([&] (auto & f)
702  {
703  if constexpr (!detail::decays_to_ignore_v<reference_t<seq_quals_t>>)
704  {
705  auto zipped = std::view::zip(seq_quals, ids);
706 
707  for (auto && v : zipped)
708  f.write(*secondary_stream,
709  options,
710  std::get<0>(v) | view::get<0>,
711  std::get<1>(v),
712  std::get<0>(v) | view::get<1>);
713  }
714  else
715  {
716  auto zipped = std::view::zip(seqs, ids, quals);
717 
718  for (auto && v : zipped)
719  f.write(*secondary_stream, options, std::get<0>(v), std::get<1>(v), std::get<2>(v));
720  }
721  }, format);
722  }
723 
725  friend iterator;
726 };
727 
733 template <OStream2 stream_t,
735  SequenceFileOutputFormat file_format,
736  detail::Fields selected_field_ids>
737 sequence_file_output(stream_t &&,
738  file_format const &,
739  selected_field_ids const &)
740  -> sequence_file_output<selected_field_ids,
741  type_list<file_format>,
743 
745 template <OStream2 stream_t,
746  SequenceFileOutputFormat file_format,
747  detail::Fields selected_field_ids>
748 sequence_file_output(stream_t &,
749  file_format const &,
750  selected_field_ids const &)
751  -> sequence_file_output<selected_field_ids,
752  type_list<file_format>,
755 } // namespace seqan3
void reference
The reference type (void).
Definition: output.hpp:216
Provides seqan3::view::get.
sequence_file_output & operator=(sequence_file_output const &)=delete
Copy assignment is explicitly deleted, because you can&#39;t have multiple access to the same file...
Provides the seqan3::format_embl tag and the seqan3::sequence_file_input_format and seqan3::sequence_...
T visit(T... args)
typename value_type< t >::type value_type_t
Shortcut for seqan3::value_type (TransformationTrait shortcut).
Definition: pre.hpp:48
The "sequence", usually a range of nucleotides or amino acids.
A class for writing sequence files, e.g. FASTA, FASTQ ...
Definition: output.hpp:172
~sequence_file_output()=default
Destructor is defaulted.
constexpr sequenced_policy seq
Global execution policy object for sequenced execution policy.
Definition: execution.hpp:54
Provides exceptions used in the I/O module.
T tie(T... args)
selected_field_ids_ selected_field_ids
A seqan3::fields list with the fields selected for the record.
Definition: output.hpp:180
Provides the seqan3::format_fastq tag and the seqan3::sequence_file_input_format and seqan3::sequence...
friend sequence_file_output operator|(rng_t &&range, sequence_file_output &&f) requires TupleLike< reference_t< rng_t >>
Definition: output.hpp:522
constexpr auto zip
A range adaptor that transforms a tuple of range into a range of tuples.
Definition: ranges:948
sequence_file_output & operator=(record< typelist, field_ids > const &r)
Write columns (wrapped in a seqan3::record) to the file.
Definition: output.hpp:562
void size_type
The size type (void).
Definition: output.hpp:220
The class template that file records are based on; behaves like an std::tuple.
Definition: record.hpp:187
Provides various utility functions required only for output.
Provides seqan3::type_list and auxiliary type traits.
void const_reference
The const reference type (void).
Definition: output.hpp:218
sequence_file_output & operator=(rng_t &&range) requires TupleLike< reference_t< rng_t >>
Write a range of records (or tuples) to the file.
Definition: output.hpp:477
SeqAn specific customisations in the standard namespace.
Provides seqan3::view::convert.
void emplace_back(arg_t &&arg, arg_types &&... args)
Write a record to the file by passing individual fields.
Definition: output.hpp:450
sequence_file_output & operator=(std::tuple< arg_types... > const &t)
Write columns (wrapped in a std::tuple) to the file.
Definition: output.hpp:593
stream_char_type_ stream_char_type
Character type of the stream(s), usually char.
Definition: output.hpp:184
The main SeqAn3 namespace.
The qualities, usually in phred-score notation.
void push_back(tuple_t &&t) requires TupleLike< tuple_t >
Write a record in form of a std::tuple to the file.
Definition: output.hpp:416
A class template that holds a choice of seqan3::field.
Definition: record.hpp:127
void const_iterator
The const iterator type is void, because files are not const-iterable.
Definition: output.hpp:226
Sequence and qualities combined in one range.
Provides seqan3::sequence_file_output_options.
Provides seqan3::TupleLike.
Provides various utility functions.
Provides the seqan3::record template and the seqan3::field enum.
The identifier, usually a string.
sentinel end() noexcept
Returns a sentinel for comparison with iterator.
Definition: output.hpp:359
sequence_file_output_options options
The options are public and its members can be set directly.
Definition: output.hpp:606
Adaptations of concepts from the Ranges TS.
detail::out_file_iterator< sequence_file_output > iterator
The iterator type of this view (an output iterator).
Definition: output.hpp:224
sequence_file_output(stream_t &&stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Definition: output.hpp:307
Stream concepts.
sequence_file_output(stream_t &stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: output.hpp:293
Specifies requirements of a Range type for which begin returns a type that models std::InputIterator...
Provides various type traits on generic types.
Provides seqan3::SequenceFileFormatOut and auxiliary classes.
Provides the seqan3::detail::out_file_iterator class template.
sequence_file_output()=delete
Default constructor is explicitly deleted, you need to give a stream or file name.
sequence_file_output(std::filesystem::path filename, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename.
Definition: output.hpp:262
::ranges::default_sentinel_t default_sentinel_t
Alias for ranges::default_sentinel_t. Type of ranges::default_sentinel.
Definition: iterator:351
The options type defines various option members that influence the behaviour of all or some formats...
Definition: output_options.hpp:21
std::ranges::default_sentinel_t sentinel
The type returned by end().
Definition: output.hpp:228
friend sequence_file_output & operator|(rng_t &&range, sequence_file_output &f) requires TupleLike< reference_t< rng_t >>
Write a range of records (or tuples) to the file.
Definition: output.hpp:513
iterator begin() noexcept
Returns an iterator to current position in the file.
Definition: output.hpp:340
Provides the seqan3::sequence_file_format_genbank class.
field
An enumerator for the fields used in file formats.Some of the fields are shared between formats...
Definition: record.hpp:63
typename reference< t >::type reference_t
Shortcut for seqan3::reference (TransformationTrait shortcut).
Definition: pre.hpp:77
void value_type
The value type (void).
Definition: output.hpp:214
valid_formats_ valid_formats
A seqan3::type_list with the possible formats.
Definition: output.hpp:182
Provides the seqan3::format_sam tag and the seqan3::sequence_file_input_format and seqan3::sequence_f...
T forward(T... args)
Provides the seqan3::format_fasta tag and the seqan3::sequence_file_input_format and seqan3::sequence...
void push_back(record_t &&r) requires TupleLike< record_t > &&requires
Write a seqan3::record to the file.
Definition: output.hpp:383
Whether a type behaves like a tuple.
This header includes C++17 filesystem support and imports it into namespace seqan3::filesystem (indep...