SeqAn3  3.0.0
The Modern C++ library for sequence analysis.
output.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2019, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2019, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <cassert>
16 #include <fstream>
17 #include <optional>
18 #include <string>
19 #include <type_traits>
20 #include <variant>
21 #include <vector>
22 
23 #include <range/v3/algorithm/equal.hpp>
24 #include <range/v3/view/zip.hpp>
25 
31 #include <seqan3/io/exception.hpp>
32 #include <seqan3/std/filesystem>
33 #include <seqan3/io/record.hpp>
36 #include <seqan3/io/detail/record.hpp>
42 #include <seqan3/std/ranges>
43 
44 namespace seqan3
45 {
46 
47 // ----------------------------------------------------------------------------
48 // structure_file_output
49 // ----------------------------------------------------------------------------
50 
170 template <detail::Fields selected_field_ids_ = fields<field::SEQ, field::ID, field::STRUCTURE>,
171  detail::TypeListOfStructureFileOutputFormats valid_formats_ = type_list<format_vienna>,
172  Char stream_char_type_ = char>
174 {
175 public:
180  using selected_field_ids = selected_field_ids_;
183  using valid_formats = valid_formats_;
185  using stream_char_type = stream_char_type_;
187 
189  using field_ids = fields<field::SEQ,
190  field::ID,
191  field::BPP,
195  field::REACT,
199 
200  static_assert([] () constexpr
201  {
202  for (field f : selected_field_ids::as_array)
203  if (!field_ids::contains(f))
204  return false;
205  return true;
206  }(),
207  "You selected a field that is not valid for structure files, please refer to the documentation "
208  "of structure_file_output::field_ids for the accepted values.");
209 
210  static_assert([] () constexpr
211  {
212  return !(selected_field_ids::contains(field::STRUCTURED_SEQ) &&
213  (selected_field_ids::contains(field::SEQ) ||
214  (selected_field_ids::contains(field::STRUCTURE))));
215  }(), "You may not select field::STRUCTURED_SEQ and either of field::SEQ and field::STRUCTURE "
216  "at the same time.");
217 
223  using value_type = void;
226  using reference = void;
228  using const_reference = void;
230  using size_type = void;
234  using iterator = detail::out_file_iterator<structure_file_output>;
236  using const_iterator = void;
240 
244  structure_file_output() = delete;
255  ~structure_file_output() = default;
256 
273  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
274  primary_stream{new std::ofstream{filename, std::ios_base::out | std::ios::binary}, stream_deleter_default}
275  {
276  if (!primary_stream->good())
277  throw file_open_error{"Could not open file " + filename.string() + " for writing."};
278 
279  // possibly add intermediate compression stream
280  secondary_stream = detail::make_secondary_ostream(*primary_stream, filename);
281 
282  // initialise format handler or throw if format is not found
283  detail::set_format(format, filename);
284  }
285 
302  template <OStream2 stream_t, StructureFileOutputFormat file_format>
303  structure_file_output(stream_t & stream,
304  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
305  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
306  primary_stream{&stream, stream_deleter_noop},
307  secondary_stream{&stream, stream_deleter_noop},
308  format{detail::structure_file_output_format<file_format>{}}
309  {
310  static_assert(meta::in<valid_formats, file_format>::value,
311  "You selected a format that is not in the valid_formats of this file.");
312  }
313 
315  template <OStream2 stream_t, StructureFileOutputFormat file_format>
316  structure_file_output(stream_t && stream,
317  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
318  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
319  primary_stream{new stream_t{std::move(stream)}, stream_deleter_default},
320  secondary_stream{&*primary_stream, stream_deleter_noop},
321  format{detail::structure_file_output_format<file_format>{}}
322  {
323  static_assert(meta::in<valid_formats, file_format>::value,
324  "You selected a format that is not in the valid_formats of this file.");
325  }
327 
349  iterator begin() noexcept
350  {
351  return {*this};
352  }
353 
368  sentinel end() noexcept
369  {
370  return {};
371  }
372 
391  template <typename record_t>
392  void push_back(record_t && r)
393  requires TupleLike<record_t> &&
394  requires { requires detail::is_type_specialisation_of_v<remove_cvref_t<record_t>, record>; }
395  {
396  write_record(detail::get_or_ignore<field::SEQ>(r),
397  detail::get_or_ignore<field::ID>(r),
398  detail::get_or_ignore<field::BPP>(r),
399  detail::get_or_ignore<field::STRUCTURE>(r),
400  detail::get_or_ignore<field::STRUCTURED_SEQ>(r),
401  detail::get_or_ignore<field::ENERGY>(r),
402  detail::get_or_ignore<field::REACT>(r),
403  detail::get_or_ignore<field::REACT_ERR>(r),
404  detail::get_or_ignore<field::COMMENT>(r),
405  detail::get_or_ignore<field::OFFSET>(r));
406  }
407 
429  template <typename tuple_t>
430  void push_back(tuple_t && t)
431  requires TupleLike<tuple_t>
432  {
433  // index_of might return npos, but this will be handled well by get_or_ignore (and just return ignore)
434  write_record(detail::get_or_ignore<selected_field_ids::index_of(field::SEQ)>(t),
435  detail::get_or_ignore<selected_field_ids::index_of(field::ID)>(t),
436  detail::get_or_ignore<selected_field_ids::index_of(field::BPP)>(t),
437  detail::get_or_ignore<selected_field_ids::index_of(field::STRUCTURE)>(t),
438  detail::get_or_ignore<selected_field_ids::index_of(field::STRUCTURED_SEQ)>(t),
439  detail::get_or_ignore<selected_field_ids::index_of(field::ENERGY)>(t),
440  detail::get_or_ignore<selected_field_ids::index_of(field::REACT)>(t),
441  detail::get_or_ignore<selected_field_ids::index_of(field::REACT_ERR)>(t),
442  detail::get_or_ignore<selected_field_ids::index_of(field::COMMENT)>(t),
443  detail::get_or_ignore<selected_field_ids::index_of(field::OFFSET)>(t));
444  }
445 
469  template <typename arg_t, typename ... arg_types>
470  void emplace_back(arg_t && arg, arg_types && ... args)
471  {
472  push_back(std::tie(arg, args...));
473  }
474 
496  template <std::ranges::InputRange rng_t>
499  {
500  for (auto && record : range)
501  push_back(std::forward<decltype(record)>(record));
502  return *this;
503  }
504 
532  template <std::ranges::InputRange rng_t>
535  {
536  f = range;
537  return f;
538  }
539 
541  template <std::ranges::InputRange rng_t>
544  {
545  f = range;
546  return std::move(f);
547  }
549 
576  template <typename typelist, typename field_ids>
578  {
579  write_columns(detail::range_wrap_ignore(detail::get_or_ignore<field::SEQ>(r)),
580  detail::range_wrap_ignore(detail::get_or_ignore<field::ID>(r)),
581  detail::range_wrap_ignore(detail::get_or_ignore<field::BPP>(r)),
582  detail::range_wrap_ignore(detail::get_or_ignore<field::STRUCTURE>(r)),
583  detail::range_wrap_ignore(detail::get_or_ignore<field::STRUCTURED_SEQ>(r)),
584  detail::range_wrap_ignore(detail::get_or_ignore<field::ENERGY>(r)),
585  detail::range_wrap_ignore(detail::get_or_ignore<field::REACT>(r)),
586  detail::range_wrap_ignore(detail::get_or_ignore<field::REACT_ERR>(r)),
587  detail::range_wrap_ignore(detail::get_or_ignore<field::COMMENT>(r)),
588  detail::range_wrap_ignore(detail::get_or_ignore<field::OFFSET>(r)));
589  return *this;
590  }
591 
614  template <typename ... arg_types>
616  {
617  // index_of might return npos, but this will be handled well by get_or_ignore (and just return ignore)
618  write_columns(
619  detail::range_wrap_ignore(detail::get_or_ignore<selected_field_ids::index_of(field::SEQ)>(t)),
620  detail::range_wrap_ignore(detail::get_or_ignore<selected_field_ids::index_of(field::ID)>(t)),
621  detail::range_wrap_ignore(detail::get_or_ignore<selected_field_ids::index_of(field::BPP)>(t)),
622  detail::range_wrap_ignore(detail::get_or_ignore<selected_field_ids::index_of(field::STRUCTURE)>(t)),
623  detail::range_wrap_ignore(detail::get_or_ignore<selected_field_ids::index_of(field::STRUCTURED_SEQ)>(t)),
624  detail::range_wrap_ignore(detail::get_or_ignore<selected_field_ids::index_of(field::ENERGY)>(t)),
625  detail::range_wrap_ignore(detail::get_or_ignore<selected_field_ids::index_of(field::REACT)>(t)),
626  detail::range_wrap_ignore(detail::get_or_ignore<selected_field_ids::index_of(field::REACT_ERR)>(t)),
627  detail::range_wrap_ignore(detail::get_or_ignore<selected_field_ids::index_of(field::COMMENT)>(t)),
628  detail::range_wrap_ignore(detail::get_or_ignore<selected_field_ids::index_of(field::OFFSET)>(t)));
629  return *this;
630  }
632 
635 
640  {
641  return *secondary_stream;
642  }
644 protected:
646 
654  static void stream_deleter_noop(std::basic_ostream<stream_char_type> *) {}
656  static void stream_deleter_default(std::basic_ostream<stream_char_type> * ptr) { delete ptr; }
657 
659  stream_ptr_t primary_stream{nullptr, stream_deleter_noop};
661  stream_ptr_t secondary_stream{nullptr, stream_deleter_noop};
662 
664  using format_type = typename detail::variant_from_tags<valid_formats, detail::structure_file_output_format>::type;
666  format_type format;
668 
670  template <typename seq_type,
671  typename id_type,
672  typename bpp_type,
673  typename structure_type,
674  typename structured_seq_type,
675  typename energy_type,
676  typename react_type,
677  typename comment_type,
678  typename offset_type>
679  void write_record(seq_type && seq,
680  id_type && id,
681  bpp_type && bpp,
682  structure_type && structure,
683  structured_seq_type && structured_seq,
684  energy_type && energy,
685  react_type && react,
686  react_type && react_error,
687  comment_type && comment,
688  offset_type && offset)
689  {
690  static_assert(detail::decays_to_ignore_v<structured_seq_type> ||
691  (detail::decays_to_ignore_v<seq_type> && detail::decays_to_ignore_v<structure_type>),
692  "You may not select field::STRUCTURED_SEQ and either of field::SEQ and field::STRUCTURE "
693  "at the same time.");
694 
695  assert(!format.valueless_by_exception());
696  std::visit([&] (auto & f)
697  {
698  if constexpr (!detail::decays_to_ignore_v<structured_seq_type>)
699  {
700  f.write(*secondary_stream,
701  options,
702  structured_seq | view::get<0>,
703  id,
704  bpp,
705  structured_seq | view::get<1>,
706  energy,
707  react,
708  react_error,
709  comment,
710  offset);
711  }
712  else
713  {
714  f.write(*secondary_stream,
715  options,
716  seq,
717  id,
718  bpp,
719  structure,
720  energy,
721  react,
722  react_error,
723  comment,
724  offset);
725  }
726  }, format);
727  }
728 
730  template <std::ranges::InputRange seq_type,
731  std::ranges::InputRange id_type,
732  std::ranges::InputRange bpp_type,
733  std::ranges::InputRange structure_type,
734  std::ranges::InputRange structured_seq_type,
735  std::ranges::InputRange energy_type,
736  std::ranges::InputRange react_type,
737  std::ranges::InputRange comment_type,
738  std::ranges::InputRange offset_type>
739  void write_columns(seq_type && seq,
740  id_type && id,
741  bpp_type && bpp,
742  structure_type && structure,
743  structured_seq_type && structured_seq,
744  energy_type && energy,
745  react_type && react,
746  react_type && react_error,
747  comment_type && comment,
748  offset_type && offset)
749  {
750  static_assert(!(detail::decays_to_ignore_v<reference_t<seq_type>> &&
751  detail::decays_to_ignore_v<reference_t<id_type>> &&
752  detail::decays_to_ignore_v<reference_t<bpp_type>> &&
753  detail::decays_to_ignore_v<reference_t<structure_type>> &&
754  detail::decays_to_ignore_v<reference_t<structured_seq_type>> &&
755  detail::decays_to_ignore_v<reference_t<energy_type>> &&
756  detail::decays_to_ignore_v<reference_t<react_type>> &&
757  detail::decays_to_ignore_v<reference_t<comment_type>> &&
758  detail::decays_to_ignore_v<reference_t<offset_type>>),
759  "At least one of the columns must not be set to std::ignore.");
760 
761  static_assert(detail::decays_to_ignore_v<reference_t<structured_seq_type>> ||
762  (detail::decays_to_ignore_v<reference_t<seq_type>> &&
763  detail::decays_to_ignore_v<reference_t<structure_type>>),
764  "You may not select field::STRUCTURED_SEQ and either of field::SEQ and field::STRUCTURE "
765  "at the same time.");
766 
767  assert(!format.valueless_by_exception());
768  std::visit([&] (auto & f)
769  {
770  if constexpr (!detail::decays_to_ignore_v<reference_t<structured_seq_type>>)
771  {
772  auto zipped = std::view::zip(structured_seq, id, bpp, energy, react, react_error, comment, offset);
773 
774  for (auto && v : zipped)
775  {
776  f.write(*secondary_stream,
777  options,
778  std::get<0>(v) | view::get<0>, // seq
779  std::get<1>(v), // id
780  std::get<2>(v), // bpp
781  std::get<0>(v) | view::get<1>, // structure
782  std::get<3>(v), // energy
783  std::get<4>(v), // react
784  std::get<5>(v), // react_error
785  std::get<6>(v), // comment
786  std::get<7>(v)); // offset
787  }
788  }
789  else
790  {
791  auto zipped = std::view::zip(seq, id, bpp, structure, energy, react, react_error, comment, offset);
792 
793  for (auto && v : zipped)
794  {
795  f.write(*secondary_stream, options, std::get<0>(v), std::get<1>(v), std::get<2>(v), std::get<3>(v),
796  std::get<4>(v), std::get<5>(v), std::get<6>(v), std::get<7>(v), std::get<8>(v));
797  }
798  }
799  }, format);
800  }
801 
803  friend iterator;
804 };
805 
811 template <OStream2 stream_t,
813  StructureFileOutputFormat file_format,
814  detail::Fields selected_field_ids>
815 structure_file_output(stream_t &&, file_format const &, selected_field_ids const &)
816  -> structure_file_output<selected_field_ids,
817  type_list<file_format>,
819 
821 template <OStream2 stream_t,
822  StructureFileOutputFormat file_format,
823  detail::Fields selected_field_ids>
824 structure_file_output(stream_t &, file_format const &, selected_field_ids const &)
825  -> structure_file_output<selected_field_ids,
826  type_list<file_format>,
829 
830 } // namespace seqan3
Provides seqan3::view::get.
structure_file_output & operator=(record< typelist, field_ids > const &r)
Write columns (wrapped in a seqan3::record) to the file.
Definition: output.hpp:577
T visit(T... args)
The "sequence", usually a range of nucleotides or amino acids.
structure_file_output & operator=(rng_t &&range) requires TupleLike< reference_t< rng_t >>
Write a range of records (or tuples) to the file.
Definition: output.hpp:497
constexpr sequenced_policy seq
Global execution policy object for sequenced execution policy.
Definition: execution.hpp:54
Provides exceptions used in the I/O module.
Sequence and fixed interactions combined in one range.
Energy of a folded sequence, represented by one float number.
T tie(T... args)
Provides the seqan3::format_vienna tag and the seqan3::sequencestrucure_file_input_format and seqan3:...
valid_formats_ valid_formats
A seqan3::type_list with the possible formats.
Definition: output.hpp:183
constexpr auto zip
A range adaptor that transforms a tuple of range into a range of tuples.
Definition: ranges:948
void emplace_back(arg_t &&arg, arg_types &&... args)
Write a record to the file by passing individual fields.
Definition: output.hpp:470
friend structure_file_output & operator|(rng_t &&range, structure_file_output &f) requires TupleLike< reference_t< rng_t >>
Write a range of records (or tuples) to the file.
Definition: output.hpp:533
A class for writing structured sequence files, e.g. Stockholm, Connect, Vienna, ViennaRNA bpp matrix ...
Definition: output.hpp:173
void const_reference
The const reference type (void).
Definition: output.hpp:228
The class template that file records are based on; behaves like an std::tuple.
Definition: record.hpp:187
Provides various utility functions required only for output.
Provides seqan3::type_list and auxiliary type traits.
Comment field of arbitrary content, usually a string.
Meta-header for the structure module. It includes all headers from alphabet/structure/.
structure_file_output & operator=(std::tuple< arg_types... > const &t)
Write columns (wrapped in a std::tuple) to the file.
Definition: output.hpp:615
Provides seqan3::view::convert.
stream_char_type_ stream_char_type
Character type of the stream(s), usually char.
Definition: output.hpp:185
The main SeqAn3 namespace.
detail::out_file_iterator< structure_file_output > iterator
The iterator type of this view (an output iterator).
Definition: output.hpp:234
Base pair probability matrix of interactions, usually a matrix of float numbers.
A class template that holds a choice of seqan3::field.
Definition: record.hpp:127
Provides seqan3::structure_file_output_options.
friend structure_file_output operator|(rng_t &&range, structure_file_output &&f) requires TupleLike< reference_t< rng_t >>
Definition: output.hpp:542
void size_type
The size type (void).
Definition: output.hpp:230
Provides seqan3::TupleLike.
Reactivity error values given in a vector corresponding to REACT.
Provides the seqan3::record template and the seqan3::field enum.
~structure_file_output()=default
Destructor is defaulted.
Sequence (SEQ) relative start position (0-based), unsigned value.
The identifier, usually a string.
Adaptations of concepts from the Ranges TS.
Provides seqan3::StructureFileOutputFormat and auxiliary classes.
sentinel end() noexcept
Returns a sentinel for comparison with iterator.
Definition: output.hpp:368
Fixed interactions, usually a string of structure alphabet characters.
The options type defines various option members that influence the behaviour of all or some formats...
Definition: output_options.hpp:23
iterator begin() noexcept
Returns an iterator to current position in the file.
Definition: output.hpp:349
void const_iterator
The const iterator type is void, because files are not const-iterable.
Definition: output.hpp:236
Stream concepts.
Specifies requirements of a Range type for which begin returns a type that models std::InputIterator...
structure_file_output(stream_t &&stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Definition: output.hpp:316
Provides various type traits on generic types.
Provides the seqan3::detail::out_file_iterator class template.
selected_field_ids_ selected_field_ids
A seqan3::fields list with the fields selected for the record.
Definition: output.hpp:181
::ranges::default_sentinel_t default_sentinel_t
Alias for ranges::default_sentinel_t. Type of ranges::default_sentinel.
Definition: iterator:351
structure_file_output_options options
The options are public and its members can be set directly.
Definition: output.hpp:634
structure_file_output()=delete
Default constructor is explicitly deleted, you need to give a stream or file name.
field
An enumerator for the fields used in file formats.Some of the fields are shared between formats...
Definition: record.hpp:63
void reference
The reference type (void).
Definition: output.hpp:226
Reactivity values of the sequence characters given in a vector of float numbers.
structure_file_output(std::filesystem::path filename, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename.
Definition: output.hpp:272
std::ranges::default_sentinel_t sentinel
The type returned by end().
Definition: output.hpp:238
T forward(T... args)
void push_back(record_t &&r) requires TupleLike< record_t > &&requires
Write a seqan3::record to the file.
Definition: output.hpp:392
void push_back(tuple_t &&t) requires TupleLike< tuple_t >
Write a record in form of a std::tuple to the file.
Definition: output.hpp:430
void value_type
The value type (void).
Definition: output.hpp:224
Whether a type behaves like a tuple.
structure_file_output & operator=(structure_file_output const &)=delete
Copy assignment is explicitly deleted, because you can&#39;t have multiple access to the same file...
structure_file_output(stream_t &stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: output.hpp:303
This header includes C++17 filesystem support and imports it into namespace seqan3::filesystem (indep...