SeqAn3  3.0.1
The Modern C++ library for sequence analysis.
input.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <cassert>
16 #include <fstream>
17 #include <string>
18 #include <variant>
19 #include <vector>
20 
36 #include <seqan3/io/detail/record.hpp>
37 #include <seqan3/io/exception.hpp>
38 #include <seqan3/io/record.hpp>
44 #include <seqan3/std/concepts>
45 #include <seqan3/std/filesystem>
46 #include <seqan3/std/ranges>
47 
48 namespace seqan3
49 {
50 
51 // ---------------------------------------------------------------------------------------------------------------------
52 // alignment_file_input_traits
53 // ---------------------------------------------------------------------------------------------------------------------
54 
111 template <typename t>
114 SEQAN3_CONCEPT alignment_file_input_traits = requires (t v)
115 {
116  // field::seq
121 
122  // field::id
124 
125  // field::qual
128 
129  // field::ref_seq
130  // either ref_info_not_given or a range over ranges over alphabet (e.g. std::vector<dna4_vector>)
132  (std::ranges::forward_range<typename t::ref_sequences> &&
133  std::ranges::forward_range<detail::transformation_trait_or_t<reference<typename t::ref_sequences>, dna4_vector>> &&
135 
136  // field::ref_id
140  requires std::ranges::forward_range<reference_t<typename t::ref_ids>>;
141  requires std::ranges::forward_range<typename t::ref_ids>;
142 
143  // field::offset is fixed to int32_t
144  // field::ref_offset is fixed to std::optional<int32_t>
145  // field::flag is fixed to seqan3::sam_flag
146  // field::mapq is fixed to uint8_t
147  // field::evalue is fixed to double
148  // field::bitscore is fixed to double
149  // field::mate is fixed to std::tuple<ref_id_container<ref_id_alphabet>, ref_offset_type, int32_t>
150 
151  // field::alignment
152  // the alignment type cannot be configured.
153  // Type of tuple entry 1 (reference) is set to
154  // 1) a std::ranges::subrange over value_type_t<typename t::ref_sequences> if reference information was given
155  // or 2) a "dummy" sequence type:
156  // views::repeat_n(sequence_alphabet{}, size_t{}) | std::views::transform(detail::access_restrictor_fn{})
157  // Type of tuple entry 2 (query) is set to
158  // 1) a std::ranges::subrange over value_type_t<typename t::ref_sequences> if reference information was given
159  // or 2) a "dummy" sequence type:
160 };
162 
163 // ---------------------------------------------------------------------------------------------------------------------
164 // alignment_file_input_default_traits
165 // ---------------------------------------------------------------------------------------------------------------------
166 
182 template <typename ref_sequences_t = ref_info_not_given, typename ref_ids_t = std::deque<std::string>>
184 {
190  using sequence_alphabet = dna5;
192 
195 
197  template <typename _sequence_alphabet>
199 
201  template <typename _id_alphabet>
203 
206 
208  template <typename _quality_alphabet>
210 
212  using ref_sequences = ref_sequences_t;
213 
215  using ref_ids = ref_ids_t;
217 };
218 
219 // ---------------------------------------------------------------------------------------------------------------------
220 // alignment_file_input
221 // ---------------------------------------------------------------------------------------------------------------------
222 
364 template <
366  detail::fields_specialisation selected_field_ids_ = fields<field::seq,
367  field::id,
373  field::mapq,
374  field::qual,
375  field::flag,
376  field::mate,
377  field::tags,
381  detail::type_list_of_alignment_file_input_formats valid_formats_ = type_list<format_sam, format_bam>>
383 {
384 public:
389  using traits_type = traits_type_;
392  using selected_field_ids = selected_field_ids_;
394  using valid_formats = valid_formats_;
396  using stream_char_type = char;
398 
399 private:
401  using dummy_ref_type = decltype(views::repeat_n(typename traits_type::sequence_alphabet{}, size_t{}) |
402  std::views::transform(detail::access_restrictor_fn{}));
403 public:
409  using sequence_type = typename traits_type::template sequence_container<
411  typename traits_type::sequence_alphabet>;
413  using id_type = typename traits_type::template id_container<char>;
415  using offset_type = int32_t;
423  dummy_ref_type,
424  decltype(std::declval<
425  detail::transformation_trait_or_t<
427  dummy_ref_type> /* does not matter as type is not chosen */
428  >() | views::slice(0, 0))>;
445  using mapq_type = uint8_t;
447  using quality_type = typename traits_type::template quality_container<
448  typename traits_type::quality_alphabet>;
456  using e_value_type = double;
458  using bitscore_type = double;
461 
462 private:
467  decltype(std::declval<sequence_type &>() | views::slice(0, 0))>,
468  typename traits_type::template sequence_container<
470 
471 public:
474 
477  id_type,
478  offset_type,
480  ref_id_type,
484  mapq_type,
485  quality_type,
486  flag_type,
487  mate_type,
489  e_value_type,
492 
496  using field_ids = fields<field::seq,
497  field::id,
503  field::cigar,
504  field::mapq,
505  field::qual,
506  field::flag,
507  field::mate,
508  field::tags,
512 
513  static_assert([] () constexpr
514  {
515  for (field f : selected_field_ids::as_array)
516  if (!field_ids::contains(f))
517  return false;
518  return true;
519  }(),
520  "You selected a field that is not valid for aligment files, please refer to the documentation "
521  "of alignment_file_input::field_ids for the accepted values.");
522 
527 
532  using value_type = record_type;
537  using const_reference = void;
539  using size_type = size_t;
543  using iterator = detail::in_file_iterator<alignment_file_input>;
545  using const_iterator = void;
547  using sentinel = std::ranges::default_sentinel_t;
549 
553  alignment_file_input() = delete;
556  alignment_file_input(alignment_file_input const &) = delete;
564  ~alignment_file_input() = default;
565 
584  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
585  primary_stream{new std::ifstream{filename, std::ios_base::in | std::ios::binary}, stream_deleter_default}
586  {
587  init(filename);
588  }
589 
609  template <input_stream stream_t, alignment_file_input_format file_format>
613  alignment_file_input(stream_t & stream,
614  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
615  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
616  primary_stream{&stream, stream_deleter_noop}
617  {
618  init(file_format{});
619  }
620 
622  template <input_stream stream_t, alignment_file_input_format file_format>
626  alignment_file_input(stream_t && stream,
627  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
628  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
629  primary_stream{new stream_t{std::move(stream)}, stream_deleter_default}
630  {
631  init(file_format{});
632  }
633 
658  typename traits_type::ref_ids & ref_ids,
659  typename traits_type::ref_sequences & ref_sequences,
660  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
661  primary_stream{new std::ifstream{filename, std::ios_base::in | std::ios::binary}, stream_deleter_default}
662  {
663  // initialize reference information
664  set_references(ref_ids, ref_sequences);
665 
666  init(filename);
667  }
668 
694  template <input_stream stream_t, alignment_file_input_format file_format>
695  alignment_file_input(stream_t & stream,
696  typename traits_type::ref_ids & ref_ids,
697  typename traits_type::ref_sequences & ref_sequences,
698  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
699  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
700  primary_stream{&stream, stream_deleter_noop}
701  {
702  // initialize reference information
703  set_references(ref_ids, ref_sequences);
704 
705  init(file_format{});
706  }
707 
709  template <input_stream stream_t, alignment_file_input_format file_format>
710  alignment_file_input(stream_t && stream,
711  typename traits_type::ref_ids & ref_ids,
712  typename traits_type::ref_sequences & ref_sequences,
713  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
714  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
715  primary_stream{new stream_t{std::move(stream)}, stream_deleter_default}
716  {
717  // initialize reference information
718  set_references(ref_ids, ref_sequences);
719 
720  init(file_format{});
721  }
722 
724  // explicitly delete rvalues for reference information
726  typename traits_type::ref_ids &&,
727  typename traits_type::ref_sequences &&,
728  selected_field_ids const &) = delete;
729 
730  template <input_stream stream_t, alignment_file_input_format file_format>
731  alignment_file_input(stream_t &&,
732  typename traits_type::ref_ids &&,
733  typename traits_type::ref_sequences &&,
734  file_format const &,
735  selected_field_ids const &) = delete;
738 
760  {
761  // buffer first record
762  if (!first_record_was_read)
763  {
764  read_next_record();
765  first_record_was_read = true;
766  }
767 
768  return {*this};
769  }
770 
784  sentinel end() noexcept
785  {
786  return {};
787  }
788 
812  reference front() noexcept
813  {
814  return *begin();
815  }
817 
820 
834  {
835  // make sure header is read
836  if (!first_record_was_read)
837  {
838  read_next_record();
839  first_record_was_read = true;
840  }
841 
842  return *header_ptr;
843  }
844 
845 protected:
847 
849  void init(std::filesystem::path & filename)
850  {
851  // open stream
852  if (!primary_stream->good())
853  throw file_open_error{"Could not open file " + filename.string() + " for reading."};
854 
855  secondary_stream = detail::make_secondary_istream(*primary_stream, filename);
856  detail::set_format(format, filename);
857  }
858 
860  template <typename format_type>
861  void init(format_type const &)
862  {
863  static_assert(list_traits::contains<format_type, valid_formats>,
864  "You selected a format that is not in the valid_formats of this file.");
865 
866  format = detail::alignment_file_input_format_exposer<format_type>{};
867  secondary_stream = detail::make_secondary_istream(*primary_stream);
868  }
869 
871  std::unique_ptr<header_type> header_ptr{new header_type{}};
872 
876  record_type record_buffer;
879 
887  static void stream_deleter_noop(std::basic_istream<stream_char_type> *) {}
889  static void stream_deleter_default(std::basic_istream<stream_char_type> * ptr) { delete ptr; }
890 
892  stream_ptr_t primary_stream{nullptr, stream_deleter_noop};
894  stream_ptr_t secondary_stream{nullptr, stream_deleter_noop};
895 
897  bool first_record_was_read{false};
899  bool at_end{false};
900 
902  using format_type = typename detail::variant_from_tags<valid_formats,
903  detail::alignment_file_input_format_exposer>::type;
904 
906  format_type format;
908 
912  typename traits_type::ref_sequences const * reference_sequences_ptr{nullptr};
914 
925  template <std::ranges::forward_range ref_sequences_t>
926  void set_references(typename traits_type::ref_ids & ref_ids, ref_sequences_t && ref_sequences)
927  {
928  assert(std::ranges::distance(ref_ids) == std::ranges::distance(ref_sequences));
929 
930  header_ptr = std::unique_ptr<header_type>{std::make_unique<header_type>(ref_ids)};
931  reference_sequences_ptr = &ref_sequences;
932 
933  // initialise reference map and ref_dict if ref_ids are non-empty
934  for (int32_t idx = 0; idx < std::ranges::distance(ref_ids); ++idx)
935  {
936  header_ptr->ref_id_info.emplace_back(std::ranges::distance(ref_sequences[idx]), "");
937 
938  if constexpr (std::ranges::contiguous_range<reference_t<typename traits_type::ref_ids>> &&
939  std::ranges::sized_range<reference_t<typename traits_type::ref_ids>> &&
940  forwarding_range<reference_t<typename traits_type::ref_ids>>)
941  {
942  auto && id = header_ptr->ref_ids()[idx];
943  header_ptr->ref_dict[std::span{std::ranges::data(id), std::ranges::size(id)}] = idx;
944  }
945  else
946  {
947  header_ptr->ref_dict[header_ptr->ref_ids()[idx]] = idx;
948  }
949  }
950  }
952 
954  void read_next_record()
955  {
956  // clear the record
957  record_buffer.clear();
958  detail::get_or_ignore<field::header_ptr>(record_buffer) = header_ptr.get();
959 
960  // at end if we could not read further
961  if (std::istreambuf_iterator<stream_char_type>{*secondary_stream} ==
963  {
964  at_end = true;
965  return;
966  }
967 
968  auto call_read_func = [this] (auto & ref_seq_info)
969  {
970  std::visit([&] (auto & f)
971  {
972  f.read_alignment_record(*secondary_stream,
973  options,
974  ref_seq_info,
975  *header_ptr,
976  detail::get_or_ignore<field::seq>(record_buffer),
977  detail::get_or_ignore<field::qual>(record_buffer),
978  detail::get_or_ignore<field::id>(record_buffer),
979  detail::get_or_ignore<field::offset>(record_buffer),
980  detail::get_or_ignore<field::ref_seq>(record_buffer),
981  detail::get_or_ignore<field::ref_id>(record_buffer),
982  detail::get_or_ignore<field::ref_offset>(record_buffer),
983  detail::get_or_ignore<field::alignment>(record_buffer),
984  detail::get_or_ignore<field::cigar>(record_buffer),
985  detail::get_or_ignore<field::flag>(record_buffer),
986  detail::get_or_ignore<field::mapq>(record_buffer),
987  detail::get_or_ignore<field::mate>(record_buffer),
988  detail::get_or_ignore<field::tags>(record_buffer),
989  detail::get_or_ignore<field::evalue>(record_buffer),
990  detail::get_or_ignore<field::bit_score>(record_buffer));
991  }, format);
992  };
993 
994  assert(!format.valueless_by_exception());
995 
997  call_read_func(*reference_sequences_ptr);
998  else
999  call_read_func(std::ignore);
1000  }
1001 
1003  friend iterator;
1004 };
1005 
1010 template <input_stream stream_type,
1012  alignment_file_input_format file_format,
1013  detail::fields_specialisation selected_field_ids>
1014 alignment_file_input(stream_type && stream,
1015  file_format const &,
1016  selected_field_ids const &)
1018  selected_field_ids,
1019  type_list<file_format>>;
1020 
1022 template <input_stream stream_type,
1023  alignment_file_input_format file_format,
1024  detail::fields_specialisation selected_field_ids>
1025 alignment_file_input(stream_type & stream,
1026  file_format const &,
1027  selected_field_ids const &)
1029  selected_field_ids,
1030  type_list<file_format>>;
1031 
1033 template <input_stream stream_type,
1034  alignment_file_input_format file_format>
1035 alignment_file_input(stream_type && stream,
1036  file_format const &)
1038  typename alignment_file_input<>::selected_field_ids, // actually use the default
1039  type_list<file_format>>;
1040 
1042 template <input_stream stream_type,
1043  alignment_file_input_format file_format>
1044 alignment_file_input(stream_type & stream,
1045  file_format const &)
1047  typename alignment_file_input<>::selected_field_ids, // actually use the default
1048  type_list<file_format>>;
1049 
1051 template <std::ranges::forward_range ref_ids_t,
1052  std::ranges::forward_range ref_sequences_t,
1053  detail::fields_specialisation selected_field_ids>
1054 alignment_file_input(std::filesystem::path path,
1055  ref_ids_t &,
1056  ref_sequences_t &,
1057  selected_field_ids const &)
1058  -> alignment_file_input<alignment_file_input_default_traits<std::remove_reference_t<ref_sequences_t>,
1060  selected_field_ids,
1061  typename alignment_file_input<>::valid_formats>; // actually use the default
1062 
1064 template <std::ranges::forward_range ref_ids_t,
1065  std::ranges::forward_range ref_sequences_t>
1066 alignment_file_input(std::filesystem::path path,
1067  ref_ids_t &,
1068  ref_sequences_t &)
1069  -> alignment_file_input<alignment_file_input_default_traits<std::remove_reference_t<ref_sequences_t>,
1071  typename alignment_file_input<>::selected_field_ids, // actually use the default
1072  typename alignment_file_input<>::valid_formats>; // actually use the default
1073 
1075 template <input_stream stream_type,
1076  std::ranges::forward_range ref_ids_t,
1077  std::ranges::forward_range ref_sequences_t,
1078  alignment_file_input_format file_format,
1079  detail::fields_specialisation selected_field_ids>
1080 alignment_file_input(stream_type && stream,
1081  ref_ids_t &,
1082  ref_sequences_t &,
1083  file_format const &,
1084  selected_field_ids const &)
1085  -> alignment_file_input<alignment_file_input_default_traits<std::remove_reference_t<ref_sequences_t>,
1087  selected_field_ids,
1088  type_list<file_format>>;
1089 
1091 template <input_stream stream_type,
1092  std::ranges::forward_range ref_ids_t,
1093  std::ranges::forward_range ref_sequences_t,
1094  alignment_file_input_format file_format,
1095  detail::fields_specialisation selected_field_ids>
1096 alignment_file_input(stream_type & stream,
1097  ref_ids_t &,
1098  ref_sequences_t &,
1099  file_format const &,
1100  selected_field_ids const &)
1101  -> alignment_file_input<alignment_file_input_default_traits<std::remove_reference_t<ref_sequences_t>,
1103  selected_field_ids,
1104  type_list<file_format>>;
1105 
1107 template <input_stream stream_type,
1108  std::ranges::forward_range ref_ids_t,
1109  std::ranges::forward_range ref_sequences_t,
1110  alignment_file_input_format file_format>
1111 alignment_file_input(stream_type && stream,
1112  ref_ids_t &,
1113  ref_sequences_t &,
1114  file_format const &)
1115  -> alignment_file_input<alignment_file_input_default_traits<std::remove_reference_t<ref_sequences_t>,
1117  typename alignment_file_input<>::selected_field_ids, // actually use the default
1118  type_list<file_format>>;
1119 
1121 template <input_stream stream_type,
1122  std::ranges::forward_range ref_ids_t,
1123  std::ranges::forward_range ref_sequences_t,
1124  alignment_file_input_format file_format>
1125 alignment_file_input(stream_type & stream,
1126  ref_ids_t &,
1127  ref_sequences_t &,
1128  file_format const &)
1129  -> alignment_file_input<alignment_file_input_default_traits<std::remove_reference_t<ref_sequences_t>,
1131  typename alignment_file_input<>::selected_field_ids, // actually use the default
1132  type_list<file_format>>;
1134 
1135 } // namespace seqan3
1136 
1137 // ------------------------------------------------------------------
1138 // std-overloads for the tuple-like interface
1139 // ------------------------------------------------------------------
1140 
1141 namespace std
1142 {
1143 
1149 template <seqan3::alignment_file_input_traits traits_type,
1150  seqan3::detail::fields_specialisation selected_field_ids,
1151  seqan3::detail::type_list_of_alignment_file_input_formats valid_formats>
1152 struct tuple_size<seqan3::alignment_file_input<traits_type, selected_field_ids, valid_formats>>
1153 {
1155  static constexpr size_t value = selected_field_ids::as_array.size();
1156 };
1157 
1163 template <size_t elem_no,
1165  seqan3::detail::fields_specialisation selected_field_ids,
1166  seqan3::detail::type_list_of_alignment_file_input_formats valid_formats>
1167 struct tuple_element<elem_no, seqan3::alignment_file_input<traits_type, selected_field_ids, valid_formats>>
1168  : tuple_element<elem_no, typename seqan3::alignment_file_input<traits_type,
1169  selected_field_ids,
1170  valid_formats>::file_as_tuple_type>
1171 {};
1172 
1173 } // namespace std
seqan3::alignment_file_input::mapq_type
uint8_t mapq_type
The type of field::mapq is fixed to uint8_t.
Definition: input.hpp:445
seqan3::alignment_file_input::begin
iterator begin()
Returns an iterator to current position in the file.
Definition: input.hpp:759
misc.hpp
Provides helper data structures for the seqan3::alignment_file_output.
seqan3::alignment_file_input::front
reference front() noexcept
Return the record we are currently at in the file.
Definition: input.hpp:812
seqan3::field::seq
The "sequence", usually a range of nucleotides or amino acids.
size
qualified.hpp
Provides quality alphabet composites.
seqan3::alignment_file_input::bitscore_type
double bitscore_type
The type of field::bitscore is fixed to double.
Definition: input.hpp:458
std::span
fstream
std::basic_string
sequence_container
A more refined container concept than seqan3::container.
seqan3::alignment_file_input::size_type
size_t size_type
An unsigned integer type, usually std::size_t.
Definition: input.hpp:539
seqan3::alignment_file_input::alignment_type
std::tuple< gap_decorator< ref_sequence_type >, alignment_query_type > alignment_type
The type of field::alignment (default: std::pair<std::vector<gapped<dna5>>, std::vector<gapped<dna5>>...
Definition: input.hpp:473
seqan3::type_list
meta::list< types... > type_list
Type that contains multiple types, an alias for meta::list.
Definition: type_list.hpp:31
tuple.hpp
Provides seqan3::tuple_like.
seqan3::field::offset
Sequence (SEQ) relative start position (0-based), unsigned value.
concept.hpp
Stream concepts.
seqan3::alignment_file_input::alignment_file_input
alignment_file_input(stream_t &&stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: input.hpp:626
seqan3::alignment_file_input::end
sentinel end() noexcept
Returns a sentinel for comparison with iterator.
Definition: input.hpp:784
seqan3::alignment_file_input::options
alignment_file_input_options< typename traits_type::sequence_legal_alphabet > options
The options are public and its members can be set directly.
Definition: input.hpp:819
gap_decorator.hpp
Provides seqan3::gap_decorator.
vector
explicitly_convertible_to
Resolves to std::ranges::explicitly_convertible_to<type1, type2>().
seqan3::alignment_file_input::const_reference
void const_reference
The const_reference type is void because files are not const-iterable.
Definition: input.hpp:537
seqan3::alignment_file_input::header_type
alignment_file_header< typename traits_type::ref_ids > header_type
The type of field::header_ptr (default: alignment_file_header<typename traits_type::ref_ids>).
Definition: input.hpp:460
seqan3::field::id
The identifier, usually a string.
format_sam.hpp
Provides the seqan3::format_sam.
seqan3::views::move
const auto move
A view that turns lvalue-references into rvalue-references.
Definition: move.hpp:68
seqan3::alignment_file_input::alignment_file_input
alignment_file_input(std::filesystem::path filename, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename.
Definition: input.hpp:583
seqan3::dna15
The 15 letter DNA alphabet, containing all IUPAC smybols minus the gap.
Definition: dna15.hpp:48
seqan3::sam_flag
sam_flag
An enum flag that describes the properties of an aligned read (given as a SAM record).
Definition: misc.hpp:70
std::tuple
seqan3::alignment_file_input_options< typename traits_type::sequence_legal_alphabet >
seqan3::alignment_file_input::alignment_file_input
alignment_file_input(stream_t &&stream, typename traits_type::ref_ids &ref_ids, typename traits_type::ref_sequences &ref_sequences, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: input.hpp:710
record.hpp
Provides the seqan3::record template and the seqan3::field enum.
std::function
seqan3::alignment_file_input::ref_offset_type
std::optional< int32_t > ref_offset_type
The type of field::ref_offset is fixed to an std::optional<int32_t>.
Definition: input.hpp:443
filesystem
This header includes C++17 filesystem support and imports it into namespace seqan3::filesystem (indep...
seqan3::field::bit_score
The bit score (statistical significance indicator), unsigned value.
seqan3::alignment_file_input::sentinel
std::ranges::default_sentinel_t sentinel
The type returned by end().
Definition: input.hpp:547
seqan3::alignment_file_input::quality_type
typename traits_type::template quality_container< typename traits_type::quality_alphabet > quality_type
The type of field::qual (default std::vector<seqan3::phred42>).
Definition: input.hpp:448
alignment_file_input_format
The generic concept for alignment file input formats.
seqan3::field::ref_seq
The (reference) "sequence" information, usually a range of nucleotides or amino acids.
seqan3::field::ref_offset
Sequence (REF_SEQ) relative start position (0-based), unsigned value.
std::filesystem::path
seqan3::pack_traits::contains
constexpr bool contains
Whether a type occurs in a pack or not.
Definition: traits.hpp:193
seqan3::alignment_file_input::header
header_type & header()
Access the file's header.
Definition: input.hpp:833
seqan3::alignment_file_input::field_types
type_list< sequence_type, id_type, offset_type, ref_sequence_type, ref_id_type, ref_offset_type, alignment_type, std::vector< cigar >, mapq_type, quality_type, flag_type, mate_type, sam_tag_dictionary, e_value_type, bitscore_type, header_type * > field_types
The previously defined types aggregated in a seqan3::type_list.
Definition: input.hpp:491
concepts
The Concepts library.
seqan3::fields
A class template that holds a choice of seqan3::field.
Definition: record.hpp:165
input_format_concept.hpp
Provides seqan3::alignment_file_input_format and auxiliary classes.
seqan3::alignment_file_input::e_value_type
double e_value_type
The type of field::evalue is fixed to double.
Definition: input.hpp:456
seqan3::alignment_file_input::~alignment_file_input
~alignment_file_input()=default
Destructor is defaulted.
seqan3::alignment_file_input::operator=
alignment_file_input & operator=(alignment_file_input const &)=delete
Copy assignment is explicitly deleted because you cannot have multiple access to the same file.
seqan3::alphabet_variant
A combined alphabet that can hold values of either of its alternatives.
Definition: alphabet_variant.hpp:129
same_as
The concept std::same_as<T, U> is satisfied if and only if T and U denote the same type.
seqan3::alignment_file_input::valid_formats
valid_formats_ valid_formats
A seqan3::type_list with the possible formats.
Definition: input.hpp:394
seqan3::alignment_file_input::offset_type
int32_t offset_type
The type of field::offset is fixed to int32_t.
Definition: input.hpp:415
seqan3::alignment_file_input::const_iterator
void const_iterator
The const iterator type is void because files are not const-iterable.
Definition: input.hpp:545
seqan3::alignment_file_input
A class for reading alignment files, e.g. SAM, BAM, BLAST ...
Definition: input.hpp:382
seqan3::alignment_file_input::ref_id_type
std::optional< int32_t > ref_id_type
The type of field::ref_id is fixed to std::optional<int32_t>.
Definition: input.hpp:436
all.hpp
Meta-header for the nucleotide submodule; includes all headers from alphabet/nucleotide/.
slice.hpp
Provides seqan3::views::slice.
repeat_n.hpp
Provides seqan3::views::repeat_n.
aa27.hpp
Provides seqan3::aa27, container aliases and string literals.
seqan3::field::cigar
The cigar vector (std::vector<seqan3::cigar>) representing the alignment in SAM/BAM format.
seqan3::alignment_file_input::alignment_file_input
alignment_file_input(stream_t &stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: input.hpp:613
seqan3::alignment_file_header
Stores the header information of alignment files.
Definition: header.hpp:32
seqan3::field::mapq
The mapping quality of the SEQ alignment, usually a ohred-scaled score.
seqan3::alignment_file_input::alignment_file_input
alignment_file_input(std::filesystem::path filename, typename traits_type::ref_ids &ref_ids, typename traits_type::ref_sequences &ref_sequences, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename and given additional reference information.
Definition: input.hpp:657
transformation_trait_or.hpp
Provides seqan3::detail::transformation_trait_or.
exception.hpp
Provides exceptions used in the I/O module.
seqan3
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:36
seqan3::alignment_file_input::iterator
detail::in_file_iterator< alignment_file_input > iterator
The iterator type of this view (an input iterator).
Definition: input.hpp:543
seqan3::alignment_file_input::sequence_type
typename traits_type::template sequence_container< typename traits_type::sequence_alphabet > sequence_type
The type of field::seq (default std::vector<seqan3::dna5>).
Definition: input.hpp:411
seqan3::alignment_file_input::ref_sequence_type
std::conditional_t< std::same_as< typename traits_type::ref_sequences, ref_info_not_given >, dummy_ref_type, decltype(std::declval< detail::transformation_trait_or_t< seqan3::reference< typename traits_type::ref_sequences const >, dummy_ref_type > >()|views::slice(0, 0))> ref_sequence_type
The type of field::ref_seq (default depends on construction).
Definition: input.hpp:428
seqan3::alignment_file_input_default_traits::ref_ids
ref_ids_t ref_ids
The type of the reference identifiers is deduced on construction.
Definition: input.hpp:215
seqan3::field::tags
The optional tags in the SAM format, stored in a dictionary.
std::istreambuf_iterator
std::make_signed_t
seqan3::phred42
Quality type for traditional Sanger and modern Illumina Phred scores (typical range).
Definition: phred42.hpp:43
seqan3::alignment_file_input::selected_field_ids
selected_field_ids_ selected_field_ids
A seqan3::fields list with the fields selected for the record.
Definition: input.hpp:392
seqan3::pack_traits::size
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:116
seqan3::reference
Exposes the reference of another type.
Definition: pre.hpp:70
misc_input.hpp
Provides various utility functions required only for input.
seqan3::alignment_file_input::traits_type
traits_type_ traits_type
A traits type that defines aliases and template for storage of the fields.
Definition: input.hpp:390
seqan3::alignment_file_input::stream_char_type
char stream_char_type
Character type of the stream(s).
Definition: input.hpp:396
char.hpp
Provides alphabet adaptations for standard char types.
concatenated_sequences.hpp
Provides seqan3::concatenated_sequences.
ranges
Adaptations of concepts from the Ranges TS.
seqan3::sam_tag_dictionary
The SAM tag dictionary class that stores all optional SAM fields.
Definition: sam_tag_dictionary.hpp:324
seqan3::alignment_file_input_default_traits
The default traits for seqan3::alignment_file_input.
Definition: input.hpp:183
format_bam.hpp
Provides the seqan3::format_bam.
std::remove_reference_t
alphabet
The generic alphabet concept that covers most data types used in ranges.
std
SeqAn specific customisations in the standard namespace.
cassert
cigar.hpp
Provides the seqan3::cigar alphabet.
seqan3::field::ref_id
The identifier of the (reference) sequence that SEQ was aligned to.
seqan3::field
field
An enumerator for the fields used in file formats.
Definition: record.hpp:64
alignment_file_input_traits
The requirements a traits_type for seqan3::alignment_file_input must meet.
seqan3::record
The class template that file records are based on; behaves like an std::tuple.
Definition: record.hpp:225
seqan3::views::slice
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:141
std::visit
T visit(T... args)
seqan3::field::qual
The qualities, usually in phred-score notation.
std::optional
seqan3::views::repeat_n
constexpr auto repeat_n
A view factory that repeats a given value n times.
Definition: repeat_n.hpp:94
seqan3::gap_decorator
A gap decorator allows the annotation of sequences with gap symbols while leaving the underlying sequ...
Definition: gap_decorator.hpp:84
seqan3::dna5
The five letter DNA alphabet of A,C,G,T and the unknown character N.
Definition: dna5.hpp:48
seqan3::alignment_file_input_default_traits::ref_sequences
ref_sequences_t ref_sequences
The type of the reference sequences is deduced on construction.
Definition: input.hpp:212
seqan3::pack_traits::transform
seqan3::type_list< trait_t< pack_t >... > transform
Apply a transformation trait to every type in the pack and return a seqan3::type_list of the results.
Definition: traits.hpp:307
in_file_iterator.hpp
Provides the seqan3::detail::in_file_iterator class template.
std::conditional_t
seqan3::field::flag
The alignment flag (bit information), uint16_t value.
seqan3::file_open_error
Thrown if there is an unspecified filesystem or stream error while opening, e.g. permission problem.
Definition: exception.hpp:39
traits.hpp
Provides traits for seqan3::type_list.
seqan3::alignment_file_input::id_type
typename traits_type::template id_container< char > id_type
The type of field::id (default std::string by default).
Definition: input.hpp:413
seqan3::alignment_file_input::alignment_file_input
alignment_file_input()=delete
Default constructor is explicitly deleted, you need to give a stream or file name.
seqan3::alignment_file_input::mate_type
std::tuple< ref_id_type, ref_offset_type, int32_t > mate_type
The type of field::mate is fixed to std::tuple<ref_id_type, ref_offset_type, int32_t>).
Definition: input.hpp:454
std::basic_istream
writable_alphabet
Refines seqan3::alphabet and adds assignability.
std::unique_ptr
writable_quality_alphabet
A concept that indicates whether a writable alphabet represents quality scores.
forwarding_range
Specifies a range whose iterators may outlive the range and remain valid.
seqan3::alignment_file_input::alignment_file_input
alignment_file_input(stream_t &stream, typename traits_type::ref_ids &ref_ids, typename traits_type::ref_sequences &ref_sequences, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: input.hpp:695
seqan3::alignment_file_input::record_type
record< detail::select_types_with_ids_t< field_types, field_ids, selected_field_ids >, selected_field_ids > record_type
The type of the record, a specialisation of seqan3::record; acts as a tuple of the selected field typ...
Definition: input.hpp:525
phred42.hpp
Provides seqan3::phred42 quality scores.
seqan3::alignment_file_input::flag_type
sam_flag flag_type
The type of field::flag is fixed to seqan3::sam_flag.
Definition: input.hpp:450
std::declval
T declval(T... args)
seqan3::field::header_ptr
A pointer to the seqan3::alignment_file_header object storing header information.
seqan3::field::evalue
The e-value (length normalized bit score), double value.
seqan3::field::alignment
The (pairwise) alignment stored in an seqan3::alignment object.
variant
std::ifstream
seqan3::field::mate
The mate pair information given as a std::tuple of reference name, offset and template length.
string