SeqAn3  3.0.0
The Modern C++ library for sequence analysis.
input.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2019, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2019, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <cassert>
16 #include <fstream>
17 #include <string>
18 #include <variant>
19 #include <vector>
20 
35 #include <seqan3/io/detail/record.hpp>
36 #include <seqan3/io/exception.hpp>
37 #include <seqan3/io/record.hpp>
43 #include <seqan3/std/concepts>
44 #include <seqan3/std/filesystem>
45 #include <seqan3/std/ranges>
46 
47 namespace seqan3
48 {
49 
50 // ---------------------------------------------------------------------------------------------------------------------
51 // AlignmentFileInputTraits
52 // ---------------------------------------------------------------------------------------------------------------------
53 
113 template <typename t>
116 SEQAN3_CONCEPT AlignmentFileInputTraits = requires (t v)
117 {
118  // field::SEQ
119  requires WritableAlphabet<typename t::sequence_alphabet>;
120  requires WritableAlphabet<typename t::sequence_legal_alphabet>;
121  requires ExplicitlyConvertibleTo<typename t::sequence_legal_alphabet, typename t::sequence_alphabet>;
122  requires SequenceContainer<typename t::template sequence_container<typename t::sequence_alphabet>>;
123 
124  // field::ID
125  requires WritableAlphabet<typename t::id_alphabet>;
126  requires SequenceContainer<typename t::template id_container<typename t::id_alphabet>>;
127 
128  // field::QUAL
129  requires WritableQualityAlphabet<typename t::quality_alphabet>;
130  requires SequenceContainer<typename t::template quality_container<typename t::quality_alphabet>>;
131 
132  // field::REF_SEQ
133  // either ref_info_not_given or a range over ranges over Alphabet (e.g. std::vector<dna4_vector>)
137  Alphabet<reference_t<detail::transformation_trait_or_t<reference<typename t::ref_sequences>, dna4_vector>>>);
138 
139  // field::REF_ID
140  requires Alphabet<reference_t<reference_t<typename t::ref_ids>>> &&
142  WritableAlphabet<reference_t<reference_t<typename t::ref_ids>>>);
145 
146  // field::OFFSET is fixed to int32_t
147  // field::REF_OFFSET is fixed to std::optional<int32_t>
148  // field::FLAG is fixed to uint16_t
149  // field::MAPQ is fixed to uint8_t
150  // field::EVALUE is fixed to double
151  // field::BITSCORE is fixed to double
152  // field::MATE is fixed to std::tuple<ref_id_container<ref_id_alphabet>, ref_offset_type, int32_t>
153 
154  // field::ALIGNMENT
155  // the alignment type cannot be configured.
156  // Type of tuple entry 1 (reference) is set to
157  // 1) a std::ranges::subrange over value_type_t<typename t::ref_sequences> if reference information was given
158  // or 2) a "dummy" sequence type:
159  // view::repeat_n(sequence_alphabet{}, size_t{}) | std::view::transform(detail::access_restrictor_fn{})
160  // Type of tuple entry 2 (query) is set to
161  // 1) a std::ranges::subrange over value_type_t<typename t::ref_sequences> if reference information was given
162  // or 2) a "dummy" sequence type:
163 };
165 
166 // ---------------------------------------------------------------------------------------------------------------------
167 // alignment_file_input_default_traits
168 // ---------------------------------------------------------------------------------------------------------------------
169 
185 template <typename ref_sequences_t = ref_info_not_given, typename ref_ids_t = std::deque<std::string>>
187 {
193  using sequence_alphabet = dna5;
195 
198 
200  template <typename _sequence_alphabet>
202 
204  using id_alphabet = char;
205 
207  template <typename _id_alphabet>
209 
212 
214  template <typename _quality_alphabet>
216 
218  using ref_sequences = ref_sequences_t;
219 
221  using ref_ids = ref_ids_t;
223 };
224 
225 // ---------------------------------------------------------------------------------------------------------------------
226 // alignment_file_input
227 // ---------------------------------------------------------------------------------------------------------------------
228 
370 template <
372  detail::Fields selected_field_ids_ = fields<field::SEQ,
373  field::ID,
379  field::MAPQ,
380  field::QUAL,
381  field::FLAG,
382  field::MATE,
383  field::TAGS,
387  detail::TypeListOfAlignmentFileInputFormats valid_formats_ = type_list<format_sam, format_bam>,
388  std::Integral stream_char_type_ = char>
390 {
391 public:
396  using traits_type = traits_type_;
399  using selected_field_ids = selected_field_ids_;
401  using valid_formats = valid_formats_;
403  using stream_char_type = stream_char_type_;
405 
406 private:
408  using dummy_ref_type = decltype(view::repeat_n(typename traits_type::sequence_alphabet{}, size_t{}) |
409  std::view::transform(detail::access_restrictor_fn{}));
410 public:
416  using sequence_type = typename traits_type::template sequence_container<
418  typename traits_type::sequence_alphabet>;
420  using id_type = typename traits_type::template id_container<
421  typename traits_type::id_alphabet>;
423  using offset_type = int32_t;
431  dummy_ref_type,
432  decltype(std::declval<
433  detail::transformation_trait_or_t<
435  dummy_ref_type> /* does not matter as type is not chosen */
436  >() | view::slice(0, 0))>;
453  using mapq_type = uint8_t;
455  using quality_type = typename traits_type::template quality_container<
456  typename traits_type::quality_alphabet>;
458  using flag_type = uint16_t;
462  using e_value_type = double;
464  using bitscore_type = double;
467 
468 private:
471  selected_field_ids::contains(field::SEQ),
473  decltype(std::declval<sequence_type &>() | view::slice(0, 0))>,
474  typename traits_type::template sequence_container<
476 
477 public:
480 
483  id_type,
484  offset_type,
486  ref_id_type,
489  mapq_type,
490  quality_type,
491  flag_type,
492  mate_type,
494  e_value_type,
497 
501  using field_ids = fields<field::SEQ,
502  field::ID,
508  field::MAPQ,
509  field::QUAL,
510  field::FLAG,
511  field::MATE,
512  field::TAGS,
516 
517  static_assert([] () constexpr
518  {
519  for (field f : selected_field_ids::as_array)
520  if (!field_ids::contains(f))
521  return false;
522  return true;
523  }(),
524  "You selected a field that is not valid for aligment files, please refer to the documentation "
525  "of alignment_file_input::field_ids for the accepted values.");
526 
531 
536  using value_type = record_type;
541  using const_reference = void;
543  using size_type = size_t;
547  using iterator = detail::in_file_iterator<alignment_file_input>;
549  using const_iterator = void;
553 
557  alignment_file_input() = delete;
560  alignment_file_input(alignment_file_input const &) = delete;
568  ~alignment_file_input() = default;
569 
588  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
589  primary_stream{new std::ifstream{filename, std::ios_base::in | std::ios::binary}, stream_deleter_default}
590  {
591  init(filename);
592  }
593 
613  template <IStream2 stream_t, AlignmentFileInputFormat file_format>
614  alignment_file_input(stream_t & stream,
615  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
616  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
617  primary_stream{&stream, stream_deleter_noop}
618  {
619  init(file_format{});
620  }
621 
623  template <IStream2 stream_t, AlignmentFileInputFormat file_format>
624  alignment_file_input(stream_t && stream,
625  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
626  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
627  primary_stream{new stream_t{std::move(stream)}, stream_deleter_default}
628  {
629  init(file_format{});
630  }
631 
656  typename traits_type::ref_ids & ref_ids,
657  typename traits_type::ref_sequences & ref_sequences,
658  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
659  primary_stream{new std::ifstream{filename, std::ios_base::in | std::ios::binary}, stream_deleter_default}
660  {
661  // initialize reference information
662  set_references(ref_ids, ref_sequences);
663 
664  init(filename);
665  }
666 
692  template <IStream2 stream_t, AlignmentFileInputFormat file_format>
693  alignment_file_input(stream_t & stream,
694  typename traits_type::ref_ids & ref_ids,
695  typename traits_type::ref_sequences & ref_sequences,
696  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
697  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
698  primary_stream{&stream, stream_deleter_noop}
699  {
700  // initialize reference information
701  set_references(ref_ids, ref_sequences);
702 
703  init(file_format{});
704  }
705 
707  template <IStream2 stream_t, AlignmentFileInputFormat file_format>
708  alignment_file_input(stream_t && stream,
709  typename traits_type::ref_ids & ref_ids,
710  typename traits_type::ref_sequences & ref_sequences,
711  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
712  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
713  primary_stream{new stream_t{std::move(stream)}, stream_deleter_default}
714  {
715  // initialize reference information
716  set_references(ref_ids, ref_sequences);
717 
718  init(file_format{});
719  }
720 
722  // explicitly delete rvalues for reference information
724  typename traits_type::ref_ids &&,
725  typename traits_type::ref_sequences &&,
726  selected_field_ids const &) = delete;
727 
728  template <IStream2 stream_t, AlignmentFileInputFormat file_format>
729  alignment_file_input(stream_t &&,
730  typename traits_type::ref_ids &&,
731  typename traits_type::ref_sequences &&,
732  file_format const &,
733  selected_field_ids const &) = delete;
736 
756  iterator begin() noexcept
757  {
758  return {*this};
759  }
760 
774  sentinel end() noexcept
775  {
776  return {};
777  }
778 
802  reference front() noexcept
803  {
804  return record_buffer;
805  }
807 
810 
826  {
827  return *header_ptr;
828  }
829 
830 protected:
832 
834  void init(std::filesystem::path & filename)
835  {
836  // open stream
837  if (!primary_stream->good())
838  throw file_open_error{"Could not open file " + filename.string() + " for reading."};
839 
840  secondary_stream = detail::make_secondary_istream(*primary_stream, filename);
841  detail::set_format(format, filename);
842 
843  // buffer first record
844  read_next_record();
845  }
846 
848  template <typename format_type>
849  void init(format_type const &)
850  {
851  static_assert(meta::in<valid_formats, format_type>::value,
852  "You selected a format that is not in the valid_formats of this file.");
853 
854  format = detail::alignment_file_input_format<format_type>{};
855  secondary_stream = detail::make_secondary_istream(*primary_stream);
856 
857  // buffer first record
858  read_next_record();
859  }
860 
862  std::unique_ptr<header_type> header_ptr{new header_type{}};
863 
867  record_type record_buffer;
870 
878  static void stream_deleter_noop(std::basic_istream<stream_char_type> *) {}
880  static void stream_deleter_default(std::basic_istream<stream_char_type> * ptr) { delete ptr; }
881 
883  stream_ptr_t primary_stream{nullptr, stream_deleter_noop};
885  stream_ptr_t secondary_stream{nullptr, stream_deleter_noop};
886 
888  bool at_end{false};
889 
891  using format_type = typename detail::variant_from_tags<valid_formats, detail::alignment_file_input_format>::type;
892 
894  format_type format;
896 
900  typename traits_type::ref_sequences const * reference_sequences_ptr{nullptr};
902 
913  template <std::ranges::ForwardRange ref_sequences_t>
914  void set_references(typename traits_type::ref_ids & ref_ids, ref_sequences_t && ref_sequences)
915  {
916  assert(std::ranges::size(ref_ids) == std::ranges::size(ref_sequences));
917 
918  header_ptr = std::unique_ptr<header_type>{std::make_unique<header_type>(ref_ids)};
919  reference_sequences_ptr = &ref_sequences;
920 
921  // initialise reference map and ref_dict if ref_ids are non-empty
922  for (size_t idx = 0; idx < ref_ids.size(); ++idx)
923  {
924  header_ptr->ref_id_info.emplace_back(std::ranges::size(ref_sequences[idx]), "");
925  header_ptr->ref_dict[header_ptr->ref_ids()[idx]] = idx;
926  }
927  }
929 
931  void read_next_record()
932  {
933  // clear the record
934  record_buffer.clear();
935  detail::get_or_ignore<field::HEADER_PTR>(record_buffer) = header_ptr.get();
936 
937  // at end if we could not read further
938  if (std::istreambuf_iterator<stream_char_type>{*secondary_stream} ==
940  {
941  at_end = true;
942  return;
943  }
944 
945  auto call_read_func = [this] (auto & ref_seq_info)
946  {
947  std::visit([&] (auto & f)
948  {
949  f.read(*secondary_stream,
950  options,
951  ref_seq_info,
952  *header_ptr,
953  detail::get_or_ignore<field::SEQ>(record_buffer),
954  detail::get_or_ignore<field::QUAL>(record_buffer),
955  detail::get_or_ignore<field::ID>(record_buffer),
956  detail::get_or_ignore<field::OFFSET>(record_buffer),
957  detail::get_or_ignore<field::REF_SEQ>(record_buffer),
958  detail::get_or_ignore<field::REF_ID>(record_buffer),
959  detail::get_or_ignore<field::REF_OFFSET>(record_buffer),
960  detail::get_or_ignore<field::ALIGNMENT>(record_buffer),
961  detail::get_or_ignore<field::FLAG>(record_buffer),
962  detail::get_or_ignore<field::MAPQ>(record_buffer),
963  detail::get_or_ignore<field::MATE>(record_buffer),
964  detail::get_or_ignore<field::TAGS>(record_buffer),
965  detail::get_or_ignore<field::EVALUE>(record_buffer),
966  detail::get_or_ignore<field::BIT_SCORE>(record_buffer));
967 
968  }, format);
969  };
970 
971  assert(!format.valueless_by_exception());
972 
974  call_read_func(*reference_sequences_ptr);
975  else
976  call_read_func(std::ignore);
977  }
978 
980  friend iterator;
981 };
982 
987 template <IStream2 stream_type,
989  AlignmentFileInputFormat file_format,
990  detail::Fields selected_field_ids>
991 alignment_file_input(stream_type && stream,
992  file_format const &,
993  selected_field_ids const &)
995  selected_field_ids,
996  type_list<file_format>,
998 
1000 template <IStream2 stream_type,
1001  AlignmentFileInputFormat file_format,
1002  detail::Fields selected_field_ids>
1003 alignment_file_input(stream_type & stream,
1004  file_format const &,
1005  selected_field_ids const &)
1007  selected_field_ids,
1008  type_list<file_format>,
1010 
1012 template <IStream2 stream_type,
1013  AlignmentFileInputFormat file_format>
1014 alignment_file_input(stream_type && stream,
1015  file_format const &)
1017  typename alignment_file_input<>::selected_field_ids, // actually use the default
1018  type_list<file_format>,
1020 
1022 template <IStream2 stream_type,
1023  AlignmentFileInputFormat file_format>
1024 alignment_file_input(stream_type & stream,
1025  file_format const &)
1027  typename alignment_file_input<>::selected_field_ids, // actually use the default
1028  type_list<file_format>,
1030 
1032 template <std::ranges::ForwardRange ref_ids_t,
1033  std::ranges::ForwardRange ref_sequences_t,
1034  detail::Fields selected_field_ids>
1035 alignment_file_input(std::filesystem::path path,
1036  ref_ids_t &,
1037  ref_sequences_t &,
1038  selected_field_ids const &)
1039  -> alignment_file_input<alignment_file_input_default_traits<std::remove_reference_t<ref_sequences_t>,
1041  selected_field_ids,
1042  typename alignment_file_input<>::valid_formats, // actually use the default
1043  typename alignment_file_input<>::stream_char_type>; // actually use the default
1044 
1046 template <std::ranges::ForwardRange ref_ids_t,
1047  std::ranges::ForwardRange ref_sequences_t>
1048 alignment_file_input(std::filesystem::path path,
1049  ref_ids_t &,
1050  ref_sequences_t &)
1051  -> alignment_file_input<alignment_file_input_default_traits<std::remove_reference_t<ref_sequences_t>,
1053  typename alignment_file_input<>::selected_field_ids, // actually use the default
1054  typename alignment_file_input<>::valid_formats, // actually use the default
1055  typename alignment_file_input<>::stream_char_type>; // actually use the default
1056 
1058 template <IStream2 stream_type,
1059  std::ranges::ForwardRange ref_ids_t,
1060  std::ranges::ForwardRange ref_sequences_t,
1061  AlignmentFileInputFormat file_format,
1062  detail::Fields selected_field_ids>
1063 alignment_file_input(stream_type && stream,
1064  ref_ids_t &,
1065  ref_sequences_t &,
1066  file_format const &,
1067  selected_field_ids const &)
1068  -> alignment_file_input<alignment_file_input_default_traits<std::remove_reference_t<ref_sequences_t>,
1070  selected_field_ids,
1071  type_list<file_format>,
1073 
1075 template <IStream2 stream_type,
1076  std::ranges::ForwardRange ref_ids_t,
1077  std::ranges::ForwardRange ref_sequences_t,
1078  AlignmentFileInputFormat file_format,
1079  detail::Fields selected_field_ids>
1080 alignment_file_input(stream_type & stream,
1081  ref_ids_t &,
1082  ref_sequences_t &,
1083  file_format const &,
1084  selected_field_ids const &)
1085  -> alignment_file_input<alignment_file_input_default_traits<std::remove_reference_t<ref_sequences_t>,
1087  selected_field_ids,
1088  type_list<file_format>,
1090 
1092 template <IStream2 stream_type,
1093  std::ranges::ForwardRange ref_ids_t,
1094  std::ranges::ForwardRange ref_sequences_t,
1095  AlignmentFileInputFormat file_format>
1096 alignment_file_input(stream_type && stream,
1097  ref_ids_t &,
1098  ref_sequences_t &,
1099  file_format const &)
1100  -> alignment_file_input<alignment_file_input_default_traits<std::remove_reference_t<ref_sequences_t>,
1102  typename alignment_file_input<>::selected_field_ids, // actually use the default
1103  type_list<file_format>,
1105 
1107 template <IStream2 stream_type,
1108  std::ranges::ForwardRange ref_ids_t,
1109  std::ranges::ForwardRange ref_sequences_t,
1110  AlignmentFileInputFormat file_format>
1111 alignment_file_input(stream_type & stream,
1112  ref_ids_t &,
1113  ref_sequences_t &,
1114  file_format const &)
1115  -> alignment_file_input<alignment_file_input_default_traits<std::remove_reference_t<ref_sequences_t>,
1117  typename alignment_file_input<>::selected_field_ids, // actually use the default
1118  type_list<file_format>,
1121 
1122 } // namespace seqan3
1123 
1124 // ------------------------------------------------------------------
1125 // std-overloads for the tuple-like interface
1126 // ------------------------------------------------------------------
1127 
1128 namespace std
1129 {
1130 
1136 template <seqan3::AlignmentFileInputTraits traits_type,
1137  seqan3::detail::Fields selected_field_ids,
1138  seqan3::detail::TypeListOfAlignmentFileInputFormats valid_formats,
1139  std::Integral stream_char_t>
1140 struct tuple_size<seqan3::alignment_file_input<traits_type, selected_field_ids, valid_formats, stream_char_t>>
1141 {
1143  static constexpr size_t value = selected_field_ids::as_array.size();
1144 };
1145 
1151 template <size_t elem_no,
1153  seqan3::detail::Fields selected_field_ids,
1154  seqan3::detail::TypeListOfAlignmentFileInputFormats valid_formats,
1155  std::Integral stream_char_t>
1156 struct tuple_element<elem_no, seqan3::alignment_file_input<traits_type, selected_field_ids, valid_formats, stream_char_t>>
1157  : tuple_element<elem_no, typename seqan3::alignment_file_input<traits_type,
1158  selected_field_ids,
1159  valid_formats,
1160  stream_char_t>::file_as_tuple_type>
1161 {};
1162 
1163 } // namespace std
Provides quality alphabet composites.
A combined alphabet that can hold values of either of its alternatives.
Definition: alphabet_variant.hpp:205
valid_formats_ valid_formats
A seqan3::type_list with the possible formats.
Definition: input.hpp:401
alignment_file_header< typename traits_type::ref_ids > header_type
The type of field::HEADER_PTR (default: alignment_file_header<typename traits_type::ref_ids>).
Definition: input.hpp:466
T visit(T... args)
alignment_file_input(std::filesystem::path filename, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename.
Definition: input.hpp:587
The (reference) "sequence" information, usually a range of nucleotides or amino acids.
The "sequence", usually a range of nucleotides or amino acids.
header_type & header()
Access the file&#39;s header.
Definition: input.hpp:825
char id_alphabet
The alphabet for an identifier string is char.
Definition: input.hpp:204
alignment_file_input & operator=(alignment_file_input const &)=delete
Copy assignment is explicitly deleted because you cannot have multiple access to the same file...
Provides exceptions used in the I/O module.
std::optional< int32_t > ref_id_type
The type of field::REF_ID is fixed to std::optional<int32_t>.
Definition: input.hpp:444
std::tuple< ref_id_type, ref_offset_type, int32_t > mate_type
The type of field::MATE is fixed to std::tuple<ref_id_type, ref_offset_type, int32_t>).
Definition: input.hpp:460
The (pairwise) alignment stored in an seqan3::alignment object.
The alignment flag (bit information), uint16_t value.
void const_iterator
The const iterator type is void because files are not const-iterable.
Definition: input.hpp:549
Provides seqan3::detail::transformation_trait_or.
selected_field_ids_ selected_field_ids
A seqan3::fields list with the fields selected for the record.
Definition: input.hpp:399
The class template that file records are based on; behaves like an std::tuple.
Definition: record.hpp:187
double e_value_type
The type of field::EVALUE is fixed to double.
Definition: input.hpp:462
sentinel end() noexcept
Returns a sentinel for comparison with iterator.
Definition: input.hpp:774
Provides seqan3::aa27, container aliases and string literals.
SeqAn specific customisations in the standard namespace.
int32_t offset_type
The type of field::OFFSET is fixed to int32_t.
Definition: input.hpp:423
alignment_file_input()=delete
Default constructor is explicitly deleted, you need to give a stream or file name.
The 15 letter DNA alphabet, containing all IUPAC smybols minus the gap.
Definition: dna15.hpp:48
::ranges::size size
Alias for ranges::size. Obtains the size of a range whose size can be calculated in constant time...
Definition: ranges:189
The main SeqAn3 namespace.
std::conditional_t< std::Same< typename traits_type::ref_sequences, ref_info_not_given >, dummy_ref_type, decltype(std::declval< detail::transformation_trait_or_t< seqan3::reference< typename traits_type::ref_sequences const >, dummy_ref_type > >()|view::slice(0, 0))> ref_sequence_type
The type of field::REF_SEQ (default depends on construction).
Definition: input.hpp:436
The qualities, usually in phred-score notation.
The e-value (length normalized bit score), double value.
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:144
stream_char_type_ stream_char_type
Character type of the stream(s), usually char.
Definition: input.hpp:403
Provides seqan3::concatenated_sequences.
iterator begin() noexcept
Returns an iterator to current position in the file.
Definition: input.hpp:756
Sequence (REF_SEQ) relative start position (0-based), unsigned value.
std::ranges::default_sentinel_t sentinel
The type returned by end().
Definition: input.hpp:551
ref_ids_t ref_ids
The type of the reference identifiers is deduced on construction.
Definition: input.hpp:221
Thrown if there is an unspecified filesystem or stream error while opening, e.g. permission problem...
Definition: exception.hpp:39
A class template that holds a choice of seqan3::field.
Definition: record.hpp:127
The five letter DNA alphabet of A,C,G,T and the unknown character N.
Definition: dna5.hpp:48
Provides seqan3::AlignmentFileInputFormat and auxiliary classes.
Stores the header information of alignment files.
Definition: header.hpp:29
type_list< sequence_type, id_type, offset_type, ref_sequence_type, ref_id_type, ref_offset_type, alignment_type, mapq_type, quality_type, flag_type, mate_type, sam_tag_dictionary, e_value_type, bitscore_type, header_type * > field_types
The previously defined types aggregated in a seqan3::type_list.
Definition: input.hpp:496
uint16_t flag_type
The type of field::FLAG is fixed to uint16_t.
Definition: input.hpp:458
Provides alphabet adaptations for standard char types.
typename traits_type::template sequence_container< typename traits_type::sequence_alphabet > sequence_type
The type of field::SEQ (default std::vector<seqan3::dna5>).
Definition: input.hpp:418
Provides seqan3::TupleLike.
std::tuple< gap_decorator< ref_sequence_type >, alignment_query_type > alignment_type
The type of field::ALIGNMENT (default: std::pair<std::vector<gapped<dna5>>, std::vector<gapped<dna5>>...
Definition: input.hpp:479
Provides seqan3::view::repeat_n.
Provides the seqan3::record template and the seqan3::field enum.
Provides various utility functions required only for input.
The requirements a traits_type for seqan3::alignment_file_input must meet.
~alignment_file_input()=default
Destructor is defaulted.
Sequence (SEQ) relative start position (0-based), unsigned value.
The Concepts library.
The identifier, usually a string.
T declval(T... args)
record< detail::select_types_with_ids_t< field_types, field_ids, selected_field_ids >, selected_field_ids > record_type
The type of the record, a specialisation of seqan3::record; acts as a tuple of the selected field typ...
Definition: input.hpp:529
Adaptations of concepts from the Ranges TS.
size_t size_type
An unsigned integer type, usually std::size_t.
Definition: input.hpp:543
traits_type_ traits_type
A traits type that defines aliases and template for storage of the fields.
Definition: input.hpp:397
The mate pair information given as a std::tuple of reference name, offset and template length...
The identifier of the (reference) sequence that SEQ was aligned to.
alignment_file_input_options< typename traits_type::sequence_legal_alphabet > options
The options are public and its members can be set directly.
Definition: input.hpp:809
A pointer to the seqan3::alignment_file_header object storing header information. ...
Provides the seqan3::format_sam tag and the seqan3::alignment_file_input_format and seqan3::alignment...
Stream concepts.
ref_sequences_t ref_sequences
The type of the reference sequences is deduced on construction.
Definition: input.hpp:218
alignment_file_input(std::filesystem::path filename, typename traits_type::ref_ids &ref_ids, typename traits_type::ref_sequences &ref_sequences, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename and given additional reference information.
Definition: input.hpp:655
Provides seqan3::phred42 quality scores.
Provides helper data structures for the seqan3::alignment_file_output.
detail::in_file_iterator< alignment_file_input > iterator
The iterator type of this view (an input iterator).
Definition: input.hpp:547
Provides various type traits on generic types.
Provides the seqan3::alignment_file_format_bam class.
typename traits_type::template quality_container< typename traits_type::quality_alphabet > quality_type
The type of field::QUAL (default std::vector<seqan3::phred42>).
Definition: input.hpp:456
A class for reading alignment files, e.g. SAM, BAM, BLAST ...
Definition: input.hpp:389
Provides seqan3::view::slice.
::ranges::default_sentinel_t default_sentinel_t
Alias for ranges::default_sentinel_t. Type of ranges::default_sentinel.
Definition: iterator:351
Meta-header for the nucleotide submodule; includes all headers from alphabet/nucleotide/.
meta::list< types... > type_list
Type that contains multiple types, an alias for meta::list.
Definition: type_list.hpp:27
typename traits_type::template id_container< typename traits_type::id_alphabet > id_type
The type of field::ID (default std::string by default).
Definition: input.hpp:421
The default traits for seqan3::alignment_file_input.
Definition: input.hpp:186
Exposes the reference of another type.
Definition: pre.hpp:70
constexpr auto repeat_n
A view factory that repeats a given value n times.
Definition: repeat_n.hpp:97
void const_reference
The const_reference type is void because files are not const-iterable.
Definition: input.hpp:541
field
An enumerator for the fields used in file formats.Some of the fields are shared between formats...
Definition: record.hpp:63
double bitscore_type
The type of field::BITSCORE is fixed to double.
Definition: input.hpp:464
alignment_file_input(stream_t &stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: input.hpp:614
The optional tags in the SAM format, stored in a dictionary.
reference front() noexcept
Return the record we are currently at in the file.
Definition: input.hpp:802
Specifies requirements of a Range type for which begin returns a type that models std::ForwardIterato...
alignment_file_input(stream_t &&stream, typename traits_type::ref_ids &ref_ids, typename traits_type::ref_sequences &ref_sequences, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Definition: input.hpp:708
uint8_t mapq_type
The type of field::MAPQ is fixed to uint8_t.
Definition: input.hpp:453
The concept std::Same<T, U> is satisfied if and only if T and U denote the same type.
Quality type for traditional Sanger and modern Illumina Phred scores (typical range).
Definition: phred42.hpp:43
Provides the seqan3::detail::in_file_iterator class template.
Provides seqan3::gap_decorator.
constexpr auto transform
A range adaptor that takes a invocable and returns a view of the elements with the invocable applied...
Definition: ranges:911
alignment_file_input(stream_t &&stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Definition: input.hpp:624
The bit score (statistical significance indicator), unsigned value.
The mapping quality of the SEQ alignment, usually a ohred-scaled score.
The concept Integral is satisfied if and only if T is an integral type.
A gap decorator allows the annotation of sequences with gap symbols while leaving the underlying sequ...
Definition: gap_decorator.hpp:83
std::optional< int32_t > ref_offset_type
The type of field::REF_OFFSET is fixed to an std::optional<int32_t>.
Definition: input.hpp:451
The SAM tag dictionary class that stores all optional SAM fields.
Definition: sam_tag_dictionary.hpp:324
This header includes C++17 filesystem support and imports it into namespace seqan3::filesystem (indep...
alignment_file_input(stream_t &stream, typename traits_type::ref_ids &ref_ids, typename traits_type::ref_sequences &ref_sequences, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: input.hpp:693