SeqAn3  3.0.0
The Modern C++ library for sequence analysis.
output.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2019, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2019, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <cassert>
16 #include <fstream>
17 #include <string>
18 #include <string_view>
19 #include <variant>
20 #include <vector>
21 
33 #include <seqan3/io/detail/record.hpp>
34 #include <seqan3/io/exception.hpp>
35 #include <seqan3/std/filesystem>
36 #include <seqan3/io/record.hpp>
38 #include <seqan3/std/ranges>
39 
40 namespace seqan3
41 {
42 
43 // ----------------------------------------------------------------------------
44 // alignment_file_output
45 // ----------------------------------------------------------------------------
46 
169 template <detail::Fields selected_field_ids_ =
170  fields<field::SEQ,
171  field::ID,
177  field::MAPQ,
178  field::QUAL,
179  field::FLAG,
180  field::MATE,
181  field::TAGS,
185  detail::TypeListOfAlignmentFileOutputFormats valid_formats_ = type_list<format_sam, format_bam>,
186  Char stream_char_type_ = char,
187  typename ref_ids_type = ref_info_not_given>
189 {
190 public:
195  using selected_field_ids = selected_field_ids_;
198  using valid_formats = valid_formats_;
200  using stream_char_type = stream_char_type_;
202 
205  field::SEQ,
206  field::ID,
212  field::MAPQ,
213  field::FLAG,
214  field::QUAL,
215  field::MATE,
216  field::TAGS,
219 
220  static_assert([] () constexpr
221  {
222  for (field f : selected_field_ids::as_array)
223  if (!field_ids::contains(f))
224  return false;
225  return true;
226  }(),
227  "You selected a field that is not valid for alignment files, "
228  "please refer to the documentation of "
229  "seqan3::alignment_file_output::field_ids for the accepted values.");
230 
236  using value_type = void;
239  using reference = void;
241  using const_reference = void;
243  using size_type = void;
247  using iterator = detail::out_file_iterator<alignment_file_output>;
249  using const_iterator = void;
253 
257  alignment_file_output() = delete;
268  ~alignment_file_output() = default;
269 
296  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
297  primary_stream{new std::ofstream{filename, std::ios_base::out | std::ios::binary}, stream_deleter_default}
298  {
299  // open stream
300  if (!primary_stream->good())
301  throw file_open_error{"Could not open file " + filename.string() + " for writing."};
302 
303  // possibly add intermediate compression stream
304  secondary_stream = detail::make_secondary_ostream(*primary_stream, filename);
305 
306  // initialise format handler or throw if format is not found
307  detail::set_format(format, filename);
308  }
309 
326  template <OStream2 stream_type, AlignmentFileOutputFormat file_format>
327  alignment_file_output(stream_type & stream,
328  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
329  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
330  primary_stream{&stream, stream_deleter_noop},
331  secondary_stream{&stream, stream_deleter_noop},
332  format{detail::alignment_file_output_format<file_format>{}}
333  {
334  static_assert(meta::in<valid_formats, file_format>::value,
335  "You selected a format that is not in the valid_formats of this file.");
336  }
337 
339  template <OStream2 stream_type, AlignmentFileOutputFormat file_format>
340  alignment_file_output(stream_type && stream,
341  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
342  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
343  primary_stream{new stream_type{std::move(stream)}, stream_deleter_default},
344  secondary_stream{&*primary_stream, stream_deleter_noop},
345  format{detail::alignment_file_output_format<file_format>{}}
346  {
347  static_assert(meta::in<valid_formats, file_format>::value,
348  "You selected a format that is not in the valid_formats of this file.");
349  }
350 
381  template <typename ref_ids_type_, std::ranges::ForwardRange ref_lengths_type>
386  ref_ids_type_ && ref_ids,
387  ref_lengths_type && ref_lengths,
388  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
390 
391  {
392  assert(std::ranges::size(ref_ids) == std::ranges::size(ref_lengths));
393 
394  header_ptr = std::make_unique<alignment_file_header<ref_ids_type>>(std::forward<ref_ids_type_>(ref_ids));
395 
396  // fill ref_dict
397  for (size_t idx = 0; idx < std::ranges::size(ref_ids); ++idx)
398  {
399  header_ptr->ref_id_info.push_back({ref_lengths[idx], ""});
400  header_ptr->ref_dict[(header_ptr->ref_ids()[idx])] = idx;
401  }
402  }
403 
425  template <OStream2 stream_type,
426  AlignmentFileOutputFormat file_format,
427  typename ref_ids_type_, // generic type to capture lvalue references
428  std::ranges::ForwardRange ref_lengths_type>
432  alignment_file_output(stream_type && stream,
433  ref_ids_type_ && ref_ids,
434  ref_lengths_type && ref_lengths,
435  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
436  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
437  alignment_file_output{std::forward<stream_type>(stream), file_format{}, selected_field_ids{}}
438  {
439  assert(std::ranges::size(ref_ids) == std::ranges::size(ref_lengths));
440 
441  header_ptr = std::make_unique<alignment_file_header<ref_ids_type>>(std::forward<ref_ids_type_>(ref_ids));
442 
443  // fill ref_dict
444  for (uint32_t idx = 0; idx < std::ranges::size(ref_ids); ++idx)
445  {
446  header_ptr->ref_id_info.emplace_back(ref_lengths[idx], "");
447  header_ptr->ref_dict[header_ptr->ref_ids()[idx]] = idx;
448  }
449  }
451 
473  iterator begin() noexcept
474  {
475  return {*this};
476  }
477 
492  sentinel end() noexcept
493  {
494  return {};
495  }
496 
515  template <typename record_t>
516  void push_back(record_t && r)
518  requires TupleLike<record_t> &&
519  requires { requires detail::is_type_specialisation_of_v<remove_cvref_t<record_t>, record>; }
521  {
522  using default_align_t = std::pair<std::span<gapped<char>>, std::span<gapped<char>>>;
523  using default_mate_t = std::tuple<std::string_view, std::optional<int32_t>, int32_t>;
524 
525  write_record(detail::get_or<field::HEADER_PTR>(r, nullptr),
526  detail::get_or<field::SEQ>(r, std::string_view{}),
527  detail::get_or<field::QUAL>(r, std::string_view{}),
528  detail::get_or<field::ID>(r, std::string_view{}),
529  detail::get_or<field::OFFSET>(r, 0u),
530  detail::get_or<field::REF_SEQ>(r, std::string_view{}),
531  detail::get_or<field::REF_ID>(r, std::ignore),
532  detail::get_or<field::REF_OFFSET>(r, std::optional<int32_t>{}),
533  detail::get_or<field::ALIGNMENT>(r, default_align_t{}),
534  detail::get_or<field::FLAG>(r, 0u),
535  detail::get_or<field::MAPQ>(r, 0u),
536  detail::get_or<field::MATE>(r, default_mate_t{}),
537  detail::get_or<field::TAGS>(r, sam_tag_dictionary{}),
538  detail::get_or<field::EVALUE>(r, 0u),
539  detail::get_or<field::BIT_SCORE>(r, 0u));
540  }
541 
563  template <typename tuple_t>
564  void push_back(tuple_t && t)
566  requires TupleLike<tuple_t>
568  {
569  using default_align_t = std::pair<std::span<gapped<char>>, std::span<gapped<char>>>;
570  using default_mate_t = std::tuple<std::string_view, std::optional<int32_t>, int32_t>;
571 
572  // index_of might return npos, but this will be handled well by get_or_ignore (and just return ignore)
573  write_record(detail::get_or<selected_field_ids::index_of(field::HEADER_PTR)>(t, nullptr),
574  detail::get_or<selected_field_ids::index_of(field::SEQ)>(t, std::string_view{}),
575  detail::get_or<selected_field_ids::index_of(field::QUAL)>(t, std::string_view{}),
576  detail::get_or<selected_field_ids::index_of(field::ID)>(t, std::string_view{}),
577  detail::get_or<selected_field_ids::index_of(field::OFFSET)>(t, 0u),
578  detail::get_or<selected_field_ids::index_of(field::REF_SEQ)>(t, std::string_view{}),
579  detail::get_or<selected_field_ids::index_of(field::REF_ID)>(t, std::ignore),
580  detail::get_or<selected_field_ids::index_of(field::REF_OFFSET)>(t, std::optional<int32_t>{}),
581  detail::get_or<selected_field_ids::index_of(field::ALIGNMENT)>(t, default_align_t{}),
582  detail::get_or<selected_field_ids::index_of(field::FLAG)>(t, 0u),
583  detail::get_or<selected_field_ids::index_of(field::MAPQ)>(t, 0u),
584  detail::get_or<selected_field_ids::index_of(field::MATE)>(t, default_mate_t{}),
585  detail::get_or<selected_field_ids::index_of(field::TAGS)>(t, sam_tag_dictionary{}),
586  detail::get_or<selected_field_ids::index_of(field::EVALUE)>(t, 0u),
587  detail::get_or<selected_field_ids::index_of(field::BIT_SCORE)>(t, 0u));
588  }
589 
613  template <typename arg_t, typename ... arg_types>
614  void emplace_back(arg_t && arg, arg_types && ... args)
615  {
616  push_back(std::tie(arg, args...));
617  }
618 
640  template <typename rng_t>
645  {
646  for (auto && record : range)
647  push_back(std::forward<decltype(record)>(record));
648  return *this;
649  }
650 
679  template <typename rng_t>
684  {
685  f = range;
686  return f;
687  }
688 
690  template <typename rng_t>
695  {
696  f = range;
697  return std::move(f);
698  }
700 
703 
708  {
709  return *secondary_stream;
710  }
712 
723  auto & header()
724  {
726  throw std::logic_error{"Please construct your file with reference id and length information in order "
727  "to properly initialise the header before accessing it."};
728 
729  return *header_ptr;
730  }
731 
732 protected:
734 
742  static void stream_deleter_noop(std::basic_ostream<stream_char_type> *) {}
744  static void stream_deleter_default(std::basic_ostream<stream_char_type> * ptr) { delete ptr; }
745 
747  stream_ptr_t primary_stream{nullptr, stream_deleter_noop};
749  stream_ptr_t secondary_stream{nullptr, stream_deleter_noop};
750 
752  using format_type = typename detail::variant_from_tags<valid_formats, detail::alignment_file_output_format>::type;
753 
755  format_type format;
757 
759  using header_type = alignment_file_header<std::conditional_t<std::Same<ref_ids_type, ref_info_not_given>,
761  ref_ids_type>>;
762 
764  std::unique_ptr<header_type> header_ptr;
765 
767  template <typename record_header_ptr_t, typename ...pack_type>
768  void write_record(record_header_ptr_t && record_header_ptr, pack_type && ...remainder)
769  {
770  static_assert((sizeof...(pack_type) == 14), "Wrong parameter list passed to write_record.");
771 
772  assert(!format.valueless_by_exception());
773 
774  std::visit([&] (auto & f)
775  {
776  // use header from record if explicitly given, e.g. file_output = file_input
778  f.write(*secondary_stream, options, *record_header_ptr, std::forward<pack_type>(remainder)...);
780  f.write(*secondary_stream, options, std::ignore, std::forward<pack_type>(remainder)...);
781  else
782  f.write(*secondary_stream, options, *header_ptr, std::forward<pack_type>(remainder)...);
783  }, format);
784  }
785 
787  friend iterator;
788 };
789 
798 template <detail::Fields selected_field_ids>
799 alignment_file_output(std::filesystem::path, selected_field_ids const &)
800  -> alignment_file_output<selected_field_ids,
803  ref_info_not_given>;
804 
808 template <OStream2 stream_type,
809  AlignmentFileOutputFormat file_format,
810  detail::Fields selected_field_ids>
811 alignment_file_output(stream_type &&, file_format const &, selected_field_ids const &)
812  -> alignment_file_output<selected_field_ids,
813  type_list<file_format>,
815  ref_info_not_given>;
816 
820 template <OStream2 stream_type,
821  AlignmentFileOutputFormat file_format,
822  detail::Fields selected_field_ids>
823 alignment_file_output(stream_type &, file_format const &, selected_field_ids const &)
824  -> alignment_file_output<selected_field_ids,
825  type_list<file_format>,
827  ref_info_not_given>;
828 
832 template <OStream2 stream_type,
833  AlignmentFileOutputFormat file_format>
834 alignment_file_output(stream_type &&, file_format const &)
836  type_list<file_format>,
838  ref_info_not_given>;
839 
843 template <OStream2 stream_type,
844  AlignmentFileOutputFormat file_format>
845 alignment_file_output(stream_type &, file_format const &)
847  type_list<file_format>,
849  ref_info_not_given>;
850 
852 template <detail::Fields selected_field_ids,
853  std::ranges::ForwardRange ref_ids_type,
854  std::ranges::ForwardRange ref_lengths_type>
855 alignment_file_output(std::filesystem::path const &,
856  ref_ids_type &&,
857  ref_lengths_type &&,
858  selected_field_ids const &)
859  -> alignment_file_output<selected_field_ids,
863 
865 template <std::ranges::ForwardRange ref_ids_type,
866  std::ranges::ForwardRange ref_lengths_type>
867 alignment_file_output(std::filesystem::path const &,
868  ref_ids_type &&,
869  ref_lengths_type &&)
874 
876 template <OStream2 stream_type,
877  std::ranges::ForwardRange ref_ids_type,
878  std::ranges::ForwardRange ref_lengths_type,
879  AlignmentFileOutputFormat file_format,
880  detail::Fields selected_field_ids>
881 alignment_file_output(stream_type &&,
882  ref_ids_type &&,
883  ref_lengths_type &&,
884  file_format const &,
885  selected_field_ids const &)
886  -> alignment_file_output<selected_field_ids,
887  type_list<file_format>,
890 
892 template <OStream2 stream_type,
893  std::ranges::ForwardRange ref_ids_type,
894  std::ranges::ForwardRange ref_lengths_type,
895  AlignmentFileOutputFormat file_format,
896  detail::Fields selected_field_ids>
897 alignment_file_output(stream_type &,
898  ref_ids_type &&,
899  ref_lengths_type &&,
900  file_format const &,
901  selected_field_ids const &)
902  -> alignment_file_output<selected_field_ids,
903  type_list<file_format>,
906 
908 template <OStream2 stream_type,
909  std::ranges::ForwardRange ref_ids_type,
910  std::ranges::ForwardRange ref_lengths_type,
911  AlignmentFileOutputFormat file_format>
912 alignment_file_output(stream_type &&,
913  ref_ids_type &&,
914  ref_lengths_type &&,
915  file_format const &)
917  type_list<file_format>,
920 
922 template <OStream2 stream_type,
923  std::ranges::ForwardRange ref_ids_type,
924  std::ranges::ForwardRange ref_lengths_type,
925  AlignmentFileOutputFormat file_format>
926 alignment_file_output(stream_type &,
927  ref_ids_type &&,
928  ref_lengths_type &&,
929  file_format const &)
931  type_list<file_format>,
935 
936 } // namespace seqan3
detail::out_file_iterator< alignment_file_output > iterator
The iterator type of this view (an output iterator).
Definition: output.hpp:247
T visit(T... args)
The (reference) "sequence" information, usually a range of nucleotides or amino acids.
The "sequence", usually a range of nucleotides or amino acids.
void emplace_back(arg_t &&arg, arg_types &&... args)
Write a record to the file by passing individual fields.
Definition: output.hpp:614
~alignment_file_output()=default
Destructor is defaulted.
Provides exceptions used in the I/O module.
T tie(T... args)
alignment_file_output & operator=(alignment_file_output const &)=delete
Copy assignment is explicitly deleted, because you can&#39;t have multiple access to the same file...
valid_formats_ valid_formats
A seqan3::type_list with the possible formats.
Definition: output.hpp:198
alignment_file_output(stream_type &stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: output.hpp:327
The (pairwise) alignment stored in an seqan3::alignment object.
The alignment flag (bit information), uint16_t value.
auto & header()
Access the file&#39;s header.
Definition: output.hpp:723
The class template that file records are based on; behaves like an std::tuple.
Definition: record.hpp:187
Provides various utility functions required only for output.
alignment_file_output(std::filesystem::path const &filename, ref_ids_type_ &&ref_ids, ref_lengths_type &&ref_lengths, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename.
Definition: output.hpp:385
Provides seqan3::type_list and auxiliary type traits.
void const_reference
The const reference type (void).
Definition: output.hpp:241
alignment_file_output(std::filesystem::path filename, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename.
Definition: output.hpp:295
sentinel end() noexcept
Returns a sentinel for comparison with iterator.
Definition: output.hpp:492
alignment_file_output(stream_type &&stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Definition: output.hpp:340
::ranges::size size
Alias for ranges::size. Obtains the size of a range whose size can be calculated in constant time...
Definition: ranges:189
The main SeqAn3 namespace.
The qualities, usually in phred-score notation.
The e-value (length normalized bit score), double value.
Provides seqan3::alignment_file_output_options.
Sequence (REF_SEQ) relative start position (0-based), unsigned value.
selected_field_ids_ selected_field_ids
A seqan3::fields list with the fields selected for the record.
Definition: output.hpp:196
A class template that holds a choice of seqan3::field.
Definition: record.hpp:127
A class for writing alignment files, e.g. SAM, BAL, BLAST, ...
Definition: output.hpp:188
iterator begin() noexcept
Returns an iterator to current position in the file.
Definition: output.hpp:473
Provides seqan3::TupleLike.
std::ranges::default_sentinel_t sentinel
The type returned by end().
Definition: output.hpp:251
Provides the seqan3::alignment_file_header class.
alignment_file_output & operator=(rng_t &&range)
Write a range of records (or tuples) to the file.
Definition: output.hpp:641
Provides the seqan3::record template and the seqan3::field enum.
Sequence (SEQ) relative start position (0-based), unsigned value.
void push_back(record_t &&r)
Write a seqan3::record to the file.
Definition: output.hpp:516
The identifier, usually a string.
void size_type
The size type (void).
Definition: output.hpp:243
Adaptations of concepts from the Ranges TS.
The mate pair information given as a std::tuple of reference name, offset and template length...
The identifier of the (reference) sequence that SEQ was aligned to.
The options type defines various option members that influence the behavior of all or some formats...
Definition: output_options.hpp:22
A pointer to the seqan3::alignment_file_header object storing header information. ...
alignment_file_output(stream_type &&stream, ref_ids_type_ &&ref_ids, ref_lengths_type &&ref_lengths, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: output.hpp:432
Provides the seqan3::format_sam tag and the seqan3::alignment_file_input_format and seqan3::alignment...
void push_back(tuple_t &&t)
Write a record in form of a std::tuple to the file.
Definition: output.hpp:564
Stream concepts.
Specifies requirements of a Range type for which begin returns a type that models std::InputIterator...
Provides helper data structures for the seqan3::alignment_file_output.
alignment_file_output_options options
The options are public and its members can be set directly.
Definition: output.hpp:702
Provides various type traits on generic types.
Provides the seqan3::detail::out_file_iterator class template.
Provides seqan3::AlignmentFileOutputFormat and auxiliary classes.
Provides the seqan3::alignment_file_format_bam class.
void reference
The reference type (void).
Definition: output.hpp:239
friend alignment_file_output operator|(rng_t &&range, alignment_file_output &&f)
Definition: output.hpp:691
void const_iterator
The const iterator type is void, because files are not const-iterable.
Definition: output.hpp:249
::ranges::default_sentinel_t default_sentinel_t
Alias for ranges::default_sentinel_t. Type of ranges::default_sentinel.
Definition: iterator:351
field
An enumerator for the fields used in file formats.Some of the fields are shared between formats...
Definition: record.hpp:63
The optional tags in the SAM format, stored in a dictionary.
alignment_file_output()=delete
Default constructor is explicitly deleted, you need to give a stream or file name.
Specifies requirements of a Range type for which begin returns a type that models std::ForwardIterato...
The concept std::Same<T, U> is satisfied if and only if T and U denote the same type.
Exposes the value_type of another type.
Definition: pre.hpp:41
T forward(T... args)
The bit score (statistical significance indicator), unsigned value.
stream_char_type_ stream_char_type
Character type of the stream(s), usually char.
Definition: output.hpp:200
The mapping quality of the SEQ alignment, usually a ohred-scaled score.
Whether a type behaves like a tuple.
friend alignment_file_output & operator|(rng_t &&range, alignment_file_output &f)
Write a range of records (or tuples) to the file.
Definition: output.hpp:680
The SAM tag dictionary class that stores all optional SAM fields.
Definition: sam_tag_dictionary.hpp:324
This header includes C++17 filesystem support and imports it into namespace seqan3::filesystem (indep...