SeqAn3  3.0.0
The Modern C++ library for sequence analysis.
input.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2019, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2019, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <cassert>
16 #include <fstream>
17 #include <limits>
18 #include <optional>
19 #include <string>
20 #include <type_traits>
21 #include <utility>
22 #include <variant>
23 #include <vector>
24 
25 #include <range/v3/algorithm/equal.hpp>
26 
34 #include <seqan3/io/exception.hpp>
35 #include <seqan3/std/filesystem>
36 #include <seqan3/io/record.hpp>
39 #include <seqan3/io/detail/record.hpp>
44 
45 namespace seqan3
46 {
47 // ----------------------------------------------------------------------------
48 // StructureFileInputTraits
49 // ----------------------------------------------------------------------------
50 
172 template<typename t>
175 SEQAN3_CONCEPT StructureFileInputTraits = requires(t v)
176 {
177  // TODO(joergi-w) The expensive concept checks are currently omitted. Check again when compiler has improved.
178  // sequence
179  requires WritableAlphabet<typename t::seq_alphabet>;
180  requires WritableAlphabet<typename t::seq_legal_alphabet>;
181  requires ExplicitlyConvertibleTo<typename t::seq_legal_alphabet, typename t::seq_alphabet>;
182  requires SequenceContainer<typename t::template seq_container<typename t::seq_alphabet>>;
183 // requires SequenceContainer
184 // <typename t::template seq_container_container
185 // <typename t::template seq_container
186 // <typename t::seq_alphabet>>>;
187 
188  // id
189  requires WritableAlphabet<typename t::id_alphabet>;
190  requires SequenceContainer<typename t::template id_container<typename t::id_alphabet>>;
191 // requires SequenceContainer
192 // <typename t::template id_container_container
193 // <typename t::template id_container
194 // <typename t::id_alphabet>>>;
195 
196  // bpp
197  requires std::is_floating_point_v<typename t::bpp_prob>;
199 
200 // requires Container // TODO check Associative Container Concept when implemented
201 // <typename t::template bpp_queue
202 // <typename t::template bpp_item
203 // <typename t::bpp_prob, typename t::bpp_partner>>>
204 // && requires(typename t::template bpp_queue // TODO maybe implement also a version that allows emplace_back
205 // <typename t::template bpp_item
206 // <typename t::bpp_prob, typename t::bpp_partner>> value) { value.emplace(1.0, 1); };
207 // requires SequenceContainer
208 // <typename t::template bpp_container
209 // <typename t::template bpp_queue
210 // <typename t::template bpp_item
211 // <typename t::bpp_prob, typename t::bpp_partner>>>>;
212 // requires SequenceContainer
213 // <typename t::template bpp_container_container
214 // <typename t::template bpp_container
215 // <typename t::template bpp_queue
216 // <typename t::template bpp_item
217 // <typename t::bpp_prob, typename t::bpp_partner>>>>>;
218 
219  // structure
220  requires std::is_same_v<typename t::structure_alphabet, dssp9> // TODO(joergi-w) add aa_structure_concept
221  || RnaStructureAlphabet<typename t::structure_alphabet>;
222  requires SequenceContainer<typename t::template structure_container<typename t::structure_alphabet>>;
223 // requires SequenceContainer
224 // <typename t::template structure_container_container
225 // <typename t::template structure_container
226 // <typename t::structure_alphabet>>>;
227 
228  // structured sequence: tuple composites of seq and structure
229  requires std::is_base_of_v<alphabet_tuple_base
230  <typename t::template structured_seq_alphabet
231  <typename t::seq_alphabet, typename t::structure_alphabet>,
232  typename t::seq_alphabet, typename t::structure_alphabet>,
233  typename t::template structured_seq_alphabet<typename t::seq_alphabet, typename t::structure_alphabet>>;
234 // requires SequenceContainer
235 // <typename t::template structured_seq_container
236 // <typename t::template structured_seq_alphabet
237 // <typename t::seq_alphabet, typename t::structure_alphabet>>>;
238 // requires SequenceContainer
239 // <typename t::template structured_seq_container_container
240 // <typename t::template structured_seq_container
241 // <typename t::template structured_seq_alphabet
242 // <typename t::seq_alphabet, typename t::structure_alphabet>>>>;
243 
244  // energy: std::optional of floating point number
245  requires std::is_floating_point_v<typename t::energy_type::value_type>;
246  requires SequenceContainer<typename t::template energy_container<typename t::energy_type>>;
247 
248  // reactivity [error]
249  requires std::is_floating_point_v<typename t::react_type>;
250  requires SequenceContainer<typename t::template react_container<typename t::react_type>>;
251 // requires SequenceContainer
252 // <typename t::template react_container_container
253 // <typename t::template react_container
254 // <typename t::react_type>>>;
255 
256  // comment
257  requires WritableAlphabet<typename t::comment_alphabet>;
258  requires SequenceContainer<typename t::template comment_container<typename t::comment_alphabet>>;
259 // requires SequenceContainer
260 // <typename t::template comment_container_container
261 // <typename t::template comment_container
262 // <typename t::comment_alphabet>>>;
263 
264  // offset
266  requires SequenceContainer<typename t::template offset_container<typename t::offset_type>>;
267 };
269 
270 // ----------------------------------------------------------------------------
271 // structure_file_input_default_traits
272 // ----------------------------------------------------------------------------
273 
288 {
294  // sequence
295 
298 
301 
303  template<typename _seq_alphabet>
305 
307  template<typename _seq_container>
309 
310  // id
311 
313  using id_alphabet = char;
314 
316  template<typename _id_alphabet>
318 
320  template<typename _id_container>
322 
323  // base pair probability structure
324 
326  using bpp_prob = double;
327 
329  using bpp_partner = size_t;
330 
332  template<typename _bpp_prob, typename _bpp_partner>
334 
336  template<typename _bpp_item>
338 
340  template<typename _bpp_queue>
342 
344  template<typename _bpp_container>
346 
347  // fixed structure
348 
350  using structure_alphabet = wuss51;
351 
353  template<typename _structure_alphabet>
355 
357  template<typename _structure_container>
359 
360  // combined sequence and structure
361 
363  template<typename _seq_alphabet, typename _structure_alphabet>
365 
367  template<typename _structured_seq_alphabet>
369 
371  template<typename _structured_seq_container>
373 
374  // energy
375 
378 
380  template<typename _energy_type>
382 
383  // reactivity [error]
384 
386  using react_type = double;
387 
389  template<typename _react_type>
391 
393  template<typename _react_container>
395 
396  // comment
397 
399  using comment_alphabet = char;
400 
402  template<typename _comment_alphabet>
404 
406  template<typename _comment_container>
408 
409  // offset
410 
412  using offset_type = size_t;
413 
415  template<typename _offset_type>
418 };
419 
423 {
429  using seq_alphabet = aa27;
436  template<typename _seq_alphabet, typename _structure_alphabet>
439 };
440 
441 // ----------------------------------------------------------------------------
442 // structure_file_input
443 // ----------------------------------------------------------------------------
444 
585  detail::Fields selected_field_ids_ = fields<field::SEQ, field::ID, field::STRUCTURE>,
586  detail::TypeListOfStructureFileInputFormats valid_formats_
588  Char stream_char_type_ = char>
590 {
591 public:
596  using traits_type = traits_type_;
599  using selected_field_ids = selected_field_ids_;
601  using valid_formats = valid_formats_;
603  using stream_char_type = stream_char_type_;
605 
609  using field_ids = fields<field::SEQ,
610  field::ID,
611  field::BPP,
615  field::REACT,
619 
620  static_assert([]() constexpr
621  {
622  for (field f : selected_field_ids::as_array)
623  if (!field_ids::contains(f))
624  return false;
625  return true;
626  }(),
627  "You selected a field that is not valid for structure files, please refer to the documentation "
628  "of structure_file_input::field_ids for the accepted values.");
629 
630  static_assert([]() constexpr
631  {
632  return !(selected_field_ids::contains(field::STRUCTURED_SEQ) &&
633  (selected_field_ids::contains(field::SEQ) ||
634  (selected_field_ids::contains(field::STRUCTURE))));
635  }(), "You may not select field::STRUCTURED_SEQ and either of field::SEQ and field::STRUCTURE "
636  "at the same time.");
637 
643  using seq_type = typename traits_type::template seq_container<typename traits_type::seq_alphabet>;
646  using id_type = typename traits_type::template id_container<typename traits_type::id_alphabet>;
648  using bpp_type = typename traits_type::template bpp_container
649  <typename traits_type::template bpp_queue
650  <typename traits_type::template bpp_item
651  <typename traits_type::bpp_prob, typename traits_type::bpp_partner>>>;
653  using structure_type = typename traits_type::template structure_container
654  <typename traits_type::structure_alphabet>;
656  using structured_seq_type = typename traits_type::template structured_seq_container
657  <typename traits_type::template structured_seq_alphabet
658  <typename traits_type::seq_alphabet, typename traits_type::structure_alphabet>>;
660  using energy_type = typename traits_type::energy_type;
662  using react_type = typename traits_type::template react_container<typename traits_type::react_type>;
664  using comment_type = typename traits_type::template comment_container
665  <typename traits_type::comment_alphabet>;
667  using offset_type = typename traits_type::offset_type;
668 
672 
677 
683  using seq_column_type = typename traits_type::template seq_container_container<seq_type>;
686  using id_column_type = typename traits_type::template id_container_container<id_type>;
688  using bpp_column_type = typename traits_type::template bpp_container_container<bpp_type>;
690  using structure_column_type = typename traits_type::template structure_container_container<structure_type>;
692  using structured_seq_column_type = typename traits_type::template structured_seq_container_container
695  using energy_column_type = typename traits_type::template energy_container<energy_type>;
697  using react_column_type = typename traits_type::template react_container_container<react_type>;
699  using comment_column_type = typename traits_type::template comment_container_container<comment_type>;
701  using offset_column_type = typename traits_type::template offset_container<offset_type>;
702 
715  using file_as_tuple_type = record<detail::select_types_with_ids_t<field_column_types, field_ids,
718 
723  using value_type = record_type;
728  using const_reference = void;
730  using size_type = size_t;
734  using iterator = detail::in_file_iterator<structure_file_input>;
736  using const_iterator = void;
740 
744  structure_file_input() = delete;
747  structure_file_input(structure_file_input const &) = delete;
755  ~structure_file_input() = default;
756 
774  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
775  primary_stream{new std::ifstream{filename, std::ios_base::in | std::ios::binary}, stream_deleter_default}
776  {
777  if (!primary_stream->good())
778  throw file_open_error{"Could not open file " + filename.string() + " for reading."};
779 
780  // possibly add intermediate decompression stream
781  secondary_stream = detail::make_secondary_istream(*primary_stream, filename);
782 
783  // initialise format handler
784  detail::set_format(format, filename);
785 
786  // buffer first record
787  read_next_record();
788  }
789 
805  template<IStream2 stream_t, StructureFileInputFormat file_format>
806  structure_file_input(stream_t & stream,
807  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
808  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
809  primary_stream{&stream, stream_deleter_noop},
810  format{detail::structure_file_input_format<file_format>{}}
811  {
812  static_assert(meta::in<valid_formats, file_format>::value,
813  "You selected a format that is not in the valid_formats of this file.");
814 
815  // possibly add intermediate decompression stream
816  secondary_stream = detail::make_secondary_istream(*primary_stream);
817 
818  // buffer first record
819  read_next_record();
820  }
821 
823  template<IStream2 stream_t, StructureFileInputFormat file_format>
824  structure_file_input(stream_t && stream,
825  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
826  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
827  primary_stream{new stream_t{std::move(stream)}, stream_deleter_default},
828  format{detail::structure_file_input_format<file_format>{}}
829  {
830  static_assert(meta::in<valid_formats, file_format>::value,
831  "You selected a format that is not in the valid_formats of this file.");
832 
833  // possibly add intermediate compression stream
834  secondary_stream = detail::make_secondary_istream(*primary_stream);
835 
836  // buffer first record
837  read_next_record();
838  }
840 
858  iterator begin() noexcept
859  {
860  return {*this};
861  }
862 
876  sentinel end() noexcept
877  {
878  return {};
879  }
880 
904  reference front() noexcept
905  {
906  return record_buffer;
907  }
909 
914  template<field f>
916  friend auto & get(structure_file_input & file)
917  {
918  static_assert(structure_file_input::selected_field_ids::contains(f),
919  "You requested a field via get that was not selected for the file.");
920 
921  file.read_columns();
922 
923  return seqan3::get<f>(file.columns_buffer);
924  }
925 
927  template<field f>
928  friend auto && get(structure_file_input && file)
929  {
930  return std::move(get<f>(file));
931  }
932 
934  template<size_t i>
935  friend auto & get(structure_file_input & file)
936  {
937  static_assert(i < structure_file_input::selected_field_ids::as_array.size(),
938  "You requested a field number larger than the number of selected fields for the file.");
939  file.read_columns();
940 
941  return std::get<i>(file.columns_buffer);
942  }
943 
945  template<size_t i>
946  friend auto && get(structure_file_input && file)
947  {
948  return std::move(get<i>(file));
949  }
950 
952  template<typename t>
953  friend auto & get(structure_file_input & file)
954  {
955  file.read_columns();
956 
957  return std::get<t>(file.columns_buffer);
958  }
959 
961  template<typename t>
962  friend auto && get(structure_file_input && file)
963  {
964  return std::move(get<t>(file));
965  }
967 
969  structure_file_input_options<typename traits_type::seq_legal_alphabet,
970  selected_field_ids::contains(field::STRUCTURED_SEQ)> options;
971 
972 protected:
974 
977  record_type record_buffer;
980  file_as_tuple_type columns_buffer;
982 
990  static void stream_deleter_noop(std::basic_istream<stream_char_type> *) {}
992  static void stream_deleter_default(std::basic_istream<stream_char_type> * ptr) { delete ptr; }
993 
995  stream_ptr_t primary_stream{nullptr, stream_deleter_noop};
997  stream_ptr_t secondary_stream{nullptr, stream_deleter_noop};
998 
1000  bool at_end{false};
1001 
1003  using format_type = typename detail::variant_from_tags<valid_formats, detail::structure_file_input_format>::type;
1005  format_type format;
1007 
1009  void read_next_record()
1010  {
1011  // clear the record
1012  record_buffer.clear();
1013 
1014  // at end if we could not read further
1015  if ((std::istreambuf_iterator<stream_char_type>{*secondary_stream} ==
1017  {
1018  at_end = true;
1019  return;
1020  }
1021 
1022  assert(!format.valueless_by_exception());
1023  std::visit([&] (auto & f)
1024  {
1025  // read new record
1026  if constexpr (selected_field_ids::contains(field::STRUCTURED_SEQ))
1027  {
1028  static_assert(!selected_field_ids::contains(field::STRUCTURE),
1029  "You may not select field::STRUCTURED_SEQ and field::STRUCTURE at the same time.");
1030  static_assert(!selected_field_ids::contains(field::SEQ),
1031  "You may not select field::STRUCTURED_SEQ and field::SEQ at the same time.");
1032  f.read(*secondary_stream,
1033  options,
1034  detail::get_or_ignore<field::STRUCTURED_SEQ>(record_buffer), // seq
1035  detail::get_or_ignore<field::ID>(record_buffer),
1036  detail::get_or_ignore<field::BPP>(record_buffer),
1037  detail::get_or_ignore<field::STRUCTURED_SEQ>(record_buffer), // structure
1038  detail::get_or_ignore<field::ENERGY>(record_buffer),
1039  detail::get_or_ignore<field::REACT>(record_buffer),
1040  detail::get_or_ignore<field::REACT_ERR>(record_buffer),
1041  detail::get_or_ignore<field::COMMENT>(record_buffer),
1042  detail::get_or_ignore<field::OFFSET>(record_buffer));
1043  }
1044  else
1045  {
1046  f.read(*secondary_stream,
1047  options,
1048  detail::get_or_ignore<field::SEQ>(record_buffer),
1049  detail::get_or_ignore<field::ID>(record_buffer),
1050  detail::get_or_ignore<field::BPP>(record_buffer),
1051  detail::get_or_ignore<field::STRUCTURE>(record_buffer),
1052  detail::get_or_ignore<field::ENERGY>(record_buffer),
1053  detail::get_or_ignore<field::REACT>(record_buffer),
1054  detail::get_or_ignore<field::REACT_ERR>(record_buffer),
1055  detail::get_or_ignore<field::COMMENT>(record_buffer),
1056  detail::get_or_ignore<field::OFFSET>(record_buffer));
1057  }
1058  }, format);
1059  }
1060 
1062  void read_columns()
1063  {
1064  //TODO don't do multiple visits
1065  //TODO create specialised version for concatenated_sequences where we append on the concat
1066  auto & seq_column_buffer = detail::get_or_ignore<field::SEQ>(columns_buffer);
1067  auto & id_column_buffer = detail::get_or_ignore<field::ID>(columns_buffer);
1068  auto & bpp_column_buffer = detail::get_or_ignore<field::BPP>(columns_buffer);
1069  auto & structure_column_buffer = detail::get_or_ignore<field::STRUCTURE>(columns_buffer);
1070  auto & structured_seq_column_buffer = detail::get_or_ignore<field::STRUCTURED_SEQ>(columns_buffer);
1071  auto & energy_column_buffer = detail::get_or_ignore<field::ENERGY>(columns_buffer);
1072  auto & react_column_buffer = detail::get_or_ignore<field::REACT>(columns_buffer);
1073  auto & react_err_column_buffer = detail::get_or_ignore<field::REACT_ERR>(columns_buffer);
1074  auto & comment_column_buffer = detail::get_or_ignore<field::COMMENT>(columns_buffer);
1075  auto & offset_column_buffer = detail::get_or_ignore<field::OFFSET>(columns_buffer);
1076 
1077  // read the remaining records and split into column buffers
1078  for (auto & rec : *this)
1079  {
1080  if constexpr (selected_field_ids::contains(field::SEQ))
1081  seq_column_buffer.push_back(std::move(seqan3::get<field::SEQ>(rec)));
1082  if constexpr (selected_field_ids::contains(field::ID))
1083  id_column_buffer.push_back(std::move(seqan3::get<field::ID>(rec)));
1084  if constexpr (selected_field_ids::contains(field::BPP))
1085  bpp_column_buffer.push_back(std::move(seqan3::get<field::BPP>(rec)));
1086  if constexpr (selected_field_ids::contains(field::STRUCTURE))
1087  structure_column_buffer.push_back(std::move(seqan3::get<field::STRUCTURE>(rec)));
1088  if constexpr (selected_field_ids::contains(field::STRUCTURED_SEQ))
1089  structured_seq_column_buffer.push_back(std::move(seqan3::get<field::STRUCTURED_SEQ>(rec)));
1090  if constexpr (selected_field_ids::contains(field::ENERGY))
1091  energy_column_buffer.push_back(std::move(seqan3::get<field::ENERGY>(rec)));
1092  if constexpr (selected_field_ids::contains(field::REACT))
1093  react_column_buffer.push_back(std::move(seqan3::get<field::REACT>(rec)));
1094  if constexpr (selected_field_ids::contains(field::REACT_ERR))
1095  react_err_column_buffer.push_back(std::move(seqan3::get<field::REACT_ERR>(rec)));
1096  if constexpr (selected_field_ids::contains(field::COMMENT))
1097  comment_column_buffer.push_back(std::move(seqan3::get<field::COMMENT>(rec)));
1098  if constexpr (selected_field_ids::contains(field::OFFSET))
1099  offset_column_buffer.push_back(std::move(seqan3::get<field::OFFSET>(rec)));
1100  }
1101  }
1102 
1104  friend iterator;
1105 };
1106 
1112 template <IStream2 stream_type,
1114  StructureFileInputFormat file_format,
1115  detail::Fields selected_field_ids>
1116 structure_file_input(stream_type && stream, file_format const &, selected_field_ids const &)
1118  selected_field_ids,
1119  type_list<file_format>,
1121 
1123 template <IStream2 stream_type,
1124  StructureFileInputFormat file_format,
1125  detail::Fields selected_field_ids>
1126 structure_file_input(stream_type & stream, file_format const &, selected_field_ids const &)
1128  selected_field_ids,
1129  type_list<file_format>,
1132 
1133 } // namespace seqan3
1134 
1135 // ------------------------------------------------------------------
1136 // std-overloads for the tuple-like interface
1137 // ------------------------------------------------------------------
1138 
1139 namespace std
1140 {
1141 
1147 template<seqan3::StructureFileInputTraits traits_type,
1148  seqan3::detail::Fields selected_field_ids,
1149  seqan3::detail::TypeListOfStructureFileInputFormats valid_formats,
1150  seqan3::Char stream_char_t>
1151 struct tuple_size<seqan3::structure_file_input<traits_type, selected_field_ids, valid_formats, stream_char_t>>
1152 {
1154  static constexpr size_t value = selected_field_ids::as_array.size();
1155 };
1156 
1162 template<size_t elem_no,
1164  seqan3::detail::Fields selected_field_ids,
1165  seqan3::detail::TypeListOfStructureFileInputFormats valid_formats,
1166  seqan3::Char stream_char_t>
1167 struct tuple_element<elem_no,
1168  seqan3::structure_file_input<traits_type, selected_field_ids, valid_formats, stream_char_t>>
1169  : tuple_element<elem_no, typename seqan3::structure_file_input<traits_type,
1170  selected_field_ids,
1171  valid_formats,
1172  stream_char_t>::file_as_tuple_type>
1173 {};
1174 
1175 } // namespace std
structure_file_input(std::filesystem::path filename, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename.
Definition: input.hpp:773
typename traits_type::template bpp_container< typename traits_type::template bpp_queue< typename traits_type::template bpp_item< typename traits_type::bpp_prob, typename traits_type::bpp_partner > >> bpp_type
The type of the base pair probabilies (default std::vector of std::set<std::pair<double, size_t>>).
Definition: input.hpp:651
A seqan3::alphabet_tuple_base that joins an aminoacid alphabet with a protein structure alphabet...
Definition: structured_aa.hpp:55
Provides seqan3::StructureFileInputFormat.
T visit(T... args)
The "sequence", usually a range of nucleotides or amino acids.
The protein structure alphabet of the characters "HGIEBTSCX".
Definition: dssp9.hpp:60
Provides exceptions used in the I/O module.
Sequence and fixed interactions combined in one range.
Energy of a folded sequence, represented by one float number.
Provides the seqan3::format_vienna tag and the seqan3::sequencestrucure_file_input_format and seqan3:...
The five letter RNA alphabet of A,C,G,U and the unknown character N.
Definition: rna5.hpp:46
wuss51 structure_alphabet
The alphabet for a structure annotation is seqan3::phred42.
Definition: input.hpp:350
typename traits_type::template structured_seq_container_container< structured_seq_type > structured_seq_column_type
Column type of field::STRUCTURED_SEQ (seqan3::concatenated_sequences<structured_seq_type> by default)...
Definition: input.hpp:693
size_t offset_type
The type of the offset is size_t.
Definition: input.hpp:412
This concept encompasses exactly the types char, signed char, unsigned char, wchar_t, char16_t and char32_t.
fields< field::SEQ, field::ID, field::BPP, field::STRUCTURE, field::STRUCTURED_SEQ, field::ENERGY, field::REACT, field::REACT_ERR, field::COMMENT, field::OFFSET > field_ids
The subset of seqan3::field IDs that are valid for this file; order corresponds to the types in field...
Definition: input.hpp:618
typename traits_type::template comment_container< typename traits_type::comment_alphabet > comment_type
The type of the comment field (default double).
Definition: input.hpp:665
Comment field of arbitrary content, usually a string.
Meta-header for the structure module. It includes all headers from alphabet/structure/.
typename traits_type::energy_type energy_type
The type of the energy field (default double).
Definition: input.hpp:660
Provides seqan3::structure_file_input_options.
structure_file_input(stream_t &&stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Definition: input.hpp:824
SeqAn specific customisations in the standard namespace.
structure_file_input(stream_t &stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: input.hpp:806
structure_file_input_options< typename traits_type::seq_legal_alphabet, selected_field_ids::contains(field::STRUCTURED_SEQ)> options
The options are public and its members can be set directly.
Definition: input.hpp:970
structure_file_input()=delete
Default constructor is explicitly deleted, you need to give a stream or file name.
record< detail::select_types_with_ids_t< field_types, field_ids, selected_field_ids >, selected_field_ids > record_type
The type of the record, a specialisation of seqan3::record; acts as a tuple of the selected field typ...
Definition: input.hpp:675
traits_type_ traits_type
A traits type that defines aliases and template for storage of the fields.
Definition: input.hpp:597
A class for reading structured sequence files, e.g. Stockholm, Connect, Vienna, ViennaRNA bpp matrix ...
Definition: input.hpp:589
::ranges::size size
Alias for ranges::size. Obtains the size of a range whose size can be calculated in constant time...
Definition: ranges:189
The main SeqAn3 namespace.
typename traits_type::template structured_seq_container< typename traits_type::template structured_seq_alphabet< typename traits_type::seq_alphabet, typename traits_type::structure_alphabet > > structured_seq_type
The type of the sequence-structure field (default std::vector of structured_rna<rna5, wuss51>).
Definition: input.hpp:658
std::ranges::default_sentinel_t sentinel
The type returned by end().
Definition: input.hpp:738
The requirements a traits_type for seqan3::structure_file_input must meet.
typename traits_type::offset_type offset_type
The type of the offset field (default size_t).
Definition: input.hpp:667
Base pair probability matrix of interactions, usually a matrix of float numbers.
Provides seqan3::rna5, container aliases and string literals.
typename traits_type::template seq_container_container< seq_type > seq_column_type
Column type of field::SEQ (seqan3::concatenated_sequences<seq_type> by default).
Definition: input.hpp:684
Provides seqan3::concatenated_sequences.
valid_formats_ valid_formats
A seqan3::type_list with the possible formats.
Definition: input.hpp:601
Provides seqan3::rna15, container aliases and string literals.
The twenty-seven letter amino acid alphabet.
Definition: aa27.hpp:43
The default traits for seqan3::structure_file_input.
Definition: input.hpp:287
A class template that holds a choice of seqan3::field.
Definition: record.hpp:127
Container that stores sequences concatenated internally.
Definition: concatenated_sequences.hpp:89
typename traits_type::template offset_container< offset_type > offset_column_type
Column type of field::OFFSET (std::vector<offset_type> by default).
Definition: input.hpp:701
double bpp_prob
The type for a base pair probability is double.
Definition: input.hpp:326
detail::in_file_iterator< structure_file_input > iterator
The iterator type of this view (an input iterator).
Definition: input.hpp:734
typename traits_type::template structure_container< typename traits_type::structure_alphabet > structure_type
The type of the structure field (default std::vector of seqan3::wuss51).
Definition: input.hpp:654
type_list< seq_column_type, id_column_type, bpp_column_type, structure_column_type, structured_seq_column_type, energy_column_type, react_column_type, react_column_type, comment_column_type, offset_column_type > field_column_types
The previously defined types aggregated in a seqan3::type_list.
Definition: input.hpp:713
Provides alphabet adaptations for standard char types.
typename traits_type::template react_container< typename traits_type::react_type > react_type
The type of the reactivity and reactivity error fields (default double).
Definition: input.hpp:662
Reactivity error values given in a vector corresponding to REACT.
Provides the seqan3::record template and the seqan3::field enum.
Provides various utility functions required only for input.
typename traits_type::template structure_container_container< structure_type > structure_column_type
Column type of field::STRUCTURE (seqan3::concatenated_sequences<structure_type> by default)...
Definition: input.hpp:690
Sequence (SEQ) relative start position (0-based), unsigned value.
~structure_file_input()=default
Destructor is defaulted.
The identifier, usually a string.
The 15 letter RNA alphabet, containing all IUPAC smybols minus the gap.
Definition: rna15.hpp:48
typename traits_type::template id_container_container< id_type > id_column_type
Column type of field::ID (seqan3::concatenated_sequences<id_type> by default).
Definition: input.hpp:686
void const_reference
The const_reference type is void, because files are not const-iterable.
Definition: input.hpp:728
size_t size_type
An unsigned integer type, usually std::size_t.
Definition: input.hpp:730
Fixed interactions, usually a string of structure alphabet characters.
size_t bpp_partner
The type for the partner position of a base pair probability is size_t.
Definition: input.hpp:329
typename traits_type::template comment_container_container< comment_type > comment_column_type
Column type of field::COMMENT (seqan3::concatenated_sequences<comment_type> by default).
Definition: input.hpp:699
structure_file_input & operator=(structure_file_input const &)=delete
Copy assignment is explicitly deleted, because you cannot have multiple access to the same file...
A seqan3::alphabet_tuple_base that joins a nucleotide alphabet with an RNA structure alphabet...
Definition: structured_rna.hpp:57
type_list< seq_type, id_type, bpp_type, structure_type, structured_seq_type, energy_type, react_type, react_type, comment_type, offset_type > field_types
The previously defined types aggregated in a seqan3::type_list.
Definition: input.hpp:671
selected_field_ids_ selected_field_ids
A seqan3::fields list with the fields selected for the record.
Definition: input.hpp:599
Stream concepts.
typename traits_type::template id_container< typename traits_type::id_alphabet > id_type
The type of the ID field (default std::string).
Definition: input.hpp:646
char id_alphabet
The alphabet for an identifier string is char.
Definition: input.hpp:313
void const_iterator
The const iterator type is void, because files are not const-iterable.
Definition: input.hpp:736
typename traits_type::template seq_container< typename traits_type::seq_alphabet > seq_type
The type of the sequence field (default std::vector of seqan3::rna5).
Definition: input.hpp:644
Provides various type traits on generic types.
T is_base_of_v
Meta-header for the aminoacid submodule; includes all headers from alphabet/aminoacid/.
::ranges::default_sentinel_t default_sentinel_t
Alias for ranges::default_sentinel_t. Type of ranges::default_sentinel.
Definition: iterator:351
A traits type that specifies input as amino acids.
Definition: input.hpp:422
meta::list< types... > type_list
Type that contains multiple types, an alias for meta::list.
Definition: type_list.hpp:27
sentinel end() noexcept
Returns a sentinel for comparison with iterator.
Definition: input.hpp:876
field
An enumerator for the fields used in file formats.Some of the fields are shared between formats...
Definition: record.hpp:63
iterator begin() noexcept
Returns an iterator to current position in the file.
Definition: input.hpp:858
Reactivity values of the sequence characters given in a vector of float numbers.
Provides the seqan3::detail::in_file_iterator class template.
char comment_alphabet
The alphabet for a comment string is char.
Definition: input.hpp:399
typename traits_type::template bpp_container_container< bpp_type > bpp_column_type
Column type of field::BPP (std::vector<bpp_type> by default).
Definition: input.hpp:688
reference front() noexcept
Return the record we are currently at in the file.
Definition: input.hpp:904
stream_char_type_ stream_char_type
Character type of the stream(s), usually char.
Definition: input.hpp:603
double react_type
The type of the reactivity and reactivity error is double.
Definition: input.hpp:386
typename traits_type::template energy_container< energy_type > energy_column_type
Column type of field::ENERGY (std::vector<energy_type> by default).
Definition: input.hpp:695
The options type defines various option members that influence the behaviour of all or some formats...
Definition: input_options.hpp:27
This header includes C++17 filesystem support and imports it into namespace seqan3::filesystem (indep...
typename traits_type::template react_container_container< react_type > react_column_type
Column type of field::REACT and field::REACT_ERR (std::vector<react_type> by default).
Definition: input.hpp:697