SeqAn3  3.0.3
The Modern C++ library for sequence analysis.
input.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <cassert>
16 #include <seqan3/std/concepts>
17 #include <seqan3/std/filesystem>
18 #include <fstream>
19 #include <seqan3/std/ranges>
20 #include <string>
21 #include <variant>
22 #include <vector>
23 
34 #include <seqan3/io/detail/record.hpp>
35 #include <seqan3/io/exception.hpp>
47 
48 namespace seqan3
49 {
50 
51 // ---------------------------------------------------------------------------------------------------------------------
52 // sam_file_input_traits
53 // ---------------------------------------------------------------------------------------------------------------------
54 
113 template <typename t>
114 SEQAN3_CONCEPT sam_file_input_traits = requires (t v)
115 {
116  // field::seq
121 
122  // field::id
124 
125  // field::qual
128 
129  // field::ref_seq
130  // either ref_info_not_given or a range over ranges over alphabet (e.g. std::vector<dna4_vector>)
131  requires std::same_as<typename t::ref_sequences, ref_info_not_given> || requires ()
132  {
134  };
135 
136  // field::ref_id
138  (!std::same_as<typename t::ref_sequences, ref_info_not_given> ||
140  requires std::ranges::forward_range<std::ranges::range_reference_t<typename t::ref_ids>>;
141  requires std::ranges::forward_range<typename t::ref_ids>;
142 
143  // field::offset is fixed to int32_t
144  // field::ref_offset is fixed to std::optional<int32_t>
145  // field::flag is fixed to seqan3::sam_flag
146  // field::mapq is fixed to uint8_t
147  // field::evalue is fixed to double
148  // field::bitscore is fixed to double
149  // field::mate is fixed to std::tuple<ref_id_container<ref_id_alphabet>, ref_offset_type, int32_t>
150 
151  // field::alignment
152  // the alignment type cannot be configured.
153  // Type of tuple entry 1 (reference) is set to
154  // 1) a std::ranges::subrange over std::ranges::range_value_t<typename t::ref_sequences> if reference information was given
155  // or 2) a "dummy" sequence type:
156  // views::repeat_n(sequence_alphabet{}, size_t{}) | std::views::transform(detail::access_restrictor_fn{})
157  // Type of tuple entry 2 (query) is set to
158  // 1) a std::ranges::subrange over std::ranges::range_value_t<typename t::ref_sequences> if reference information was given
159  // or 2) a "dummy" sequence type:
160 };
162 
163 // ---------------------------------------------------------------------------------------------------------------------
164 // sam_file_input_default_traits
165 // ---------------------------------------------------------------------------------------------------------------------
166 
182 template <typename ref_sequences_t = ref_info_not_given, typename ref_ids_t = std::deque<std::string>>
184 {
192 
195 
197  template <typename _sequence_alphabet>
199 
201  template <typename _id_alphabet>
203 
206 
208  template <typename _quality_alphabet>
210 
212  using ref_sequences = ref_sequences_t;
213 
215  using ref_ids = ref_ids_t;
217 };
218 
219 // ---------------------------------------------------------------------------------------------------------------------
220 // sam_file_input
221 // ---------------------------------------------------------------------------------------------------------------------
222 
333 template <
335  detail::fields_specialisation selected_field_ids_ = fields<field::seq,
336  field::id,
338 #ifdef SEQAN3_DEPRECATED_310
340 #endif // SEQAN3_DEPRECATED_310
344  field::cigar,
345  field::mapq,
346  field::qual,
347  field::flag,
348  field::mate,
349  field::tags,
350 #ifdef SEQAN3_DEPRECATED_310
353 #endif // SEQAN3_DEPRECATED_310
355  detail::type_list_of_sam_file_input_formats valid_formats_ = type_list<format_sam, format_bam>>
357 {
358 public:
364  using traits_type = traits_type_;
366  using selected_field_ids = selected_field_ids_;
368  using valid_formats = valid_formats_;
370  using stream_char_type = char;
372 
373 private:
375  using dummy_ref_type = decltype(views::repeat_n(typename traits_type::sequence_alphabet{}, size_t{}) |
376  std::views::transform(detail::access_restrictor_fn{}));
377 
379  using ref_sequence_unsliced_type =
380  detail::lazy_conditional_t<std::ranges::range<typename traits_type::ref_sequences const>,
381  detail::lazy<std::ranges::range_reference_t,
382  typename traits_type::ref_sequences const>,
383  dummy_ref_type>;
384 
386  using ref_sequence_sliced_type = decltype(std::declval<ref_sequence_unsliced_type>() | views::slice(0, 0));
387 public:
394  using sequence_type = typename traits_type::template sequence_container<
395  typename traits_type::sequence_alphabet>;
397  using id_type = typename traits_type::template id_container<char>;
399  using offset_type = int32_t;
407  dummy_ref_type,
408  ref_sequence_sliced_type>;
425  using mapq_type = uint8_t;
427  using quality_type = typename traits_type::template quality_container<
428  typename traits_type::quality_alphabet>;
435 #ifdef SEQAN3_DEPRECATED_310
437  using e_value_type = double;
439  using bitscore_type = double;
440 #endif // SEQAN3_DEPRECATED_310
442  using header_type = sam_file_header<typename traits_type::ref_ids>;
443 
444 private:
449  decltype(std::declval<sequence_type &>() | views::slice(0, 0))>,
450  typename traits_type::template sequence_container<
452 
453 public:
456 
459  id_type,
460  offset_type,
461 #ifdef SEQAN3_DEPRECATED_310
463 #endif // SEQAN3_DEPRECATED_310
464  ref_id_type,
468  mapq_type,
469  quality_type,
470  flag_type,
471  mate_type,
473 #ifdef SEQAN3_DEPRECATED_310
474  e_value_type,
476 #endif // SEQAN3_DEPRECATED_310
477  header_type *>;
478 
501  field::id,
503 #ifdef SEQAN3_DEPRECATED_310
505 #endif // SEQAN3_DEPRECATED_310
509  field::cigar,
510  field::mapq,
511  field::qual,
512  field::flag,
513  field::mate,
514  field::tags,
515 #ifdef SEQAN3_DEPRECATED_310
518 #endif // SEQAN3_DEPRECATED_310
520 
521 #ifdef SEQAN3_DEPRECATED_310
523  static constexpr bool is_default_selected_field_ids = selected_field_ids::size == field_ids::size;
524 
526  "You selected the deprecated seqan3::field::ref_seq. It will not be available in the record.");
528  "You selected the deprecated seqan3::field::evalue. It will not be available in the record.");
530  "You selected the deprecated seqan3::field::bit_score. It will not be available in the record.");
531 #endif // SEQAN3_DEPRECATED_310
532 
533  static_assert([] () constexpr
534  {
535  for (field f : selected_field_ids::as_array)
536  if (!field_ids::contains(f))
537  return false;
538  return true;
539  }(),
540  "You selected a field that is not valid for alignment files, please refer to the documentation "
541  "of sam_file_input::field_ids for the accepted values.");
542 
547 
557  using const_reference = void;
559  using size_type = size_t;
563  using iterator = detail::in_file_iterator<sam_file_input>;
565  using const_iterator = void;
567  using sentinel = std::default_sentinel_t;
569 
574  sam_file_input() = delete;
576  sam_file_input(sam_file_input const &) = delete;
584  ~sam_file_input() = default;
585 
604  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
605  primary_stream{new std::ifstream{}, stream_deleter_default}
606  {
607  init_by_filename(std::move(filename));
608  }
609 
629  template <input_stream stream_t, sam_file_input_format file_format>
631  requires std::same_as<typename std::remove_reference_t<stream_t>::char_type, stream_char_type>
633  sam_file_input(stream_t & stream,
634  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
635  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
636  primary_stream{&stream, stream_deleter_noop}
637  {
638  init_by_format<file_format>();
639  }
640 
642  template <input_stream stream_t, sam_file_input_format file_format>
644  requires std::same_as<typename std::remove_reference_t<stream_t>::char_type, stream_char_type>
646  sam_file_input(stream_t && stream,
647  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
648  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
649  primary_stream{new stream_t{std::move(stream)}, stream_deleter_default}
650  {
651  init_by_format<file_format>();
652  }
653 
678  typename traits_type::ref_ids & ref_ids,
679  typename traits_type::ref_sequences & ref_sequences,
680  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
681  primary_stream{new std::ifstream{}, stream_deleter_default}
682  {
683  // initialize reference information
684  set_references(ref_ids, ref_sequences);
685 
686  init_by_filename(std::move(filename));
687  }
688 
714  template <input_stream stream_t, sam_file_input_format file_format>
715  sam_file_input(stream_t & stream,
716  typename traits_type::ref_ids & ref_ids,
717  typename traits_type::ref_sequences & ref_sequences,
718  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
719  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
720  primary_stream{&stream, stream_deleter_noop}
721  {
722  // initialize reference information
723  set_references(ref_ids, ref_sequences);
724 
725  init_by_format<file_format>();
726  }
727 
729  template <input_stream stream_t, sam_file_input_format file_format>
730  sam_file_input(stream_t && stream,
731  typename traits_type::ref_ids & ref_ids,
732  typename traits_type::ref_sequences & ref_sequences,
733  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
734  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
735  primary_stream{new stream_t{std::move(stream)}, stream_deleter_default}
736  {
737  // initialize reference information
738  set_references(ref_ids, ref_sequences);
739 
740  init_by_format<file_format>();
741  }
742 
744  // explicitly delete rvalues for reference information
746  typename traits_type::ref_ids &&,
747  typename traits_type::ref_sequences &&,
748  selected_field_ids const &) = delete;
749 
750  template <input_stream stream_t, sam_file_input_format file_format>
751  sam_file_input(stream_t &&,
752  typename traits_type::ref_ids &&,
753  typename traits_type::ref_sequences &&,
754  file_format const &,
755  selected_field_ids const &) = delete;
758 
780  {
781  // buffer first record
782  if (!first_record_was_read)
783  {
784  read_next_record();
785  first_record_was_read = true;
786  }
787 
788  return {*this};
789  }
790 
804  sentinel end() noexcept
805  {
806  return {};
807  }
808 
832  reference front() noexcept
833  {
834  return *begin();
835  }
837 
840 
854  {
855  // make sure header is read
856  if (!first_record_was_read)
857  {
858  read_next_record();
859  first_record_was_read = true;
860  }
861 
862  return *header_ptr;
863  }
864 
865 protected:
867 
869  void init_by_filename(std::filesystem::path filename)
870  {
871  primary_stream->rdbuf()->pubsetbuf(stream_buffer.data(), stream_buffer.size());
872  static_cast<std::basic_ifstream<char> *>(primary_stream.get())->open(filename,
873  std::ios_base::in | std::ios::binary);
874  // open stream
875  if (!primary_stream->good())
876  throw file_open_error{"Could not open file " + filename.string() + " for reading."};
877 
878  secondary_stream = detail::make_secondary_istream(*primary_stream, filename);
879  detail::set_format(format, filename);
880  }
881 
883  template <typename format_type>
884  void init_by_format()
885  {
886  static_assert(list_traits::contains<format_type, valid_formats>,
887  "You selected a format that is not in the valid_formats of this file.");
888 
889  format = detail::sam_file_input_format_exposer<format_type>{};
890  secondary_stream = detail::make_secondary_istream(*primary_stream);
891  }
892 
894  std::unique_ptr<header_type> header_ptr{new header_type{}};
895 
900  record_type record_buffer;
902  std::vector<char> stream_buffer{std::vector<char>(1'000'000)};
904 
912  static void stream_deleter_noop(std::basic_istream<stream_char_type> *) {}
914  static void stream_deleter_default(std::basic_istream<stream_char_type> * ptr) { delete ptr; }
915 
917  stream_ptr_t primary_stream{nullptr, stream_deleter_noop};
919  stream_ptr_t secondary_stream{nullptr, stream_deleter_noop};
920 
922  bool first_record_was_read{false};
924  bool at_end{false};
925 
927  using format_type = typename detail::variant_from_tags<valid_formats,
928  detail::sam_file_input_format_exposer>::type;
929 
931  format_type format;
933 
938  typename traits_type::ref_sequences const * reference_sequences_ptr{nullptr};
939 
950  template <std::ranges::forward_range ref_sequences_t>
951  void set_references(typename traits_type::ref_ids & ref_ids, ref_sequences_t && ref_sequences)
952  {
953  assert(std::ranges::distance(ref_ids) == std::ranges::distance(ref_sequences));
954 
955  header_ptr = std::unique_ptr<header_type>{std::make_unique<header_type>(ref_ids)};
956  reference_sequences_ptr = &ref_sequences;
957 
958  // initialise reference map and ref_dict if ref_ids are non-empty
959  for (int32_t idx = 0; idx < std::ranges::distance(ref_ids); ++idx)
960  {
961  header_ptr->ref_id_info.emplace_back(std::ranges::distance(ref_sequences[idx]), "");
962 
963  if constexpr (std::ranges::contiguous_range<std::ranges::range_reference_t<
964  typename traits_type::ref_ids>> &&
965  std::ranges::sized_range<std::ranges::range_reference_t<typename traits_type::ref_ids>> &&
966  std::ranges::borrowed_range<std::ranges::range_reference_t<typename traits_type::ref_ids>>)
967  {
968  auto && id = header_ptr->ref_ids()[idx];
969  header_ptr->ref_dict[std::span{std::ranges::data(id), std::ranges::size(id)}] = idx;
970  }
971  else
972  {
973  header_ptr->ref_dict[header_ptr->ref_ids()[idx]] = idx;
974  }
975  }
976  }
978 
980  void read_next_record()
981  {
982  // clear the record
983  record_buffer.clear();
984  detail::get_or_ignore<field::header_ptr>(record_buffer) = header_ptr.get();
985 
986  // at end if we could not read further
987  if (std::istreambuf_iterator<stream_char_type>{*secondary_stream} ==
989  {
990  at_end = true;
991  return;
992  }
993 
994  auto call_read_func = [this] (auto & ref_seq_info)
995  {
996  std::visit([&] (auto & f)
997  {
998  f.read_alignment_record(*secondary_stream,
999  options,
1000  ref_seq_info,
1001  *header_ptr,
1002  detail::get_or_ignore<field::seq>(record_buffer),
1003  detail::get_or_ignore<field::qual>(record_buffer),
1004  detail::get_or_ignore<field::id>(record_buffer),
1005  detail::get_or_ignore<field::offset>(record_buffer),
1006  detail::get_or_ignore<field::ref_seq>(record_buffer),
1007  detail::get_or_ignore<field::ref_id>(record_buffer),
1008  detail::get_or_ignore<field::ref_offset>(record_buffer),
1009  detail::get_or_ignore<field::alignment>(record_buffer),
1010  detail::get_or_ignore<field::cigar>(record_buffer),
1011  detail::get_or_ignore<field::flag>(record_buffer),
1012  detail::get_or_ignore<field::mapq>(record_buffer),
1013  detail::get_or_ignore<field::mate>(record_buffer),
1014  detail::get_or_ignore<field::tags>(record_buffer),
1015  detail::get_or_ignore<field::evalue>(record_buffer),
1016  detail::get_or_ignore<field::bit_score>(record_buffer));
1017  }, format);
1018  };
1019 
1020  assert(!format.valueless_by_exception());
1021 
1022  if constexpr (!std::same_as<typename traits_type::ref_sequences, ref_info_not_given>)
1023  call_read_func(*reference_sequences_ptr);
1024  else
1025  call_read_func(std::ignore);
1026  }
1027 
1029  friend iterator;
1030 };
1031 
1037 template <input_stream stream_type, sam_file_input_format file_format, detail::fields_specialisation selected_field_ids>
1038 sam_file_input(stream_type && stream, file_format const &, selected_field_ids const &)
1039  -> sam_file_input<typename sam_file_input<>::traits_type, // actually use the default
1042 
1044 template <input_stream stream_type, sam_file_input_format file_format, detail::fields_specialisation selected_field_ids>
1045 sam_file_input(stream_type & stream, file_format const &, selected_field_ids const &)
1046  -> sam_file_input<typename sam_file_input<>::traits_type, // actually use the default
1049 
1051 template <input_stream stream_type, sam_file_input_format file_format>
1052 sam_file_input(stream_type && stream, file_format const &)
1053  -> sam_file_input<typename sam_file_input<>::traits_type, // actually use the default
1054  typename sam_file_input<>::selected_field_ids, // actually use the default
1056 
1058 template <input_stream stream_type, sam_file_input_format file_format>
1059 sam_file_input(stream_type & stream, file_format const &)
1060  -> sam_file_input<typename sam_file_input<>::traits_type, // actually use the default
1061  typename sam_file_input<>::selected_field_ids, // actually use the default
1063 
1065 template <std::ranges::forward_range ref_ids_t,
1066  std::ranges::forward_range ref_sequences_t,
1067  detail::fields_specialisation selected_field_ids>
1068 sam_file_input(std::filesystem::path path, ref_ids_t &, ref_sequences_t &, selected_field_ids const &)
1072  typename sam_file_input<>::valid_formats>; // actually use the default
1073 
1075 template <std::ranges::forward_range ref_ids_t,
1076  std::ranges::forward_range ref_sequences_t>
1077 sam_file_input(std::filesystem::path path, ref_ids_t &, ref_sequences_t &)
1080  typename sam_file_input<>::selected_field_ids, // actually use the default
1081  typename sam_file_input<>::valid_formats>; // actually use the default
1082 
1084 template <input_stream stream_type,
1085  std::ranges::forward_range ref_ids_t,
1086  std::ranges::forward_range ref_sequences_t,
1087  sam_file_input_format file_format,
1088  detail::fields_specialisation selected_field_ids>
1089 sam_file_input(stream_type && stream, ref_ids_t &, ref_sequences_t &, file_format const &, selected_field_ids const &)
1094 
1096 template <input_stream stream_type,
1097  std::ranges::forward_range ref_ids_t,
1098  std::ranges::forward_range ref_sequences_t,
1099  sam_file_input_format file_format,
1100  detail::fields_specialisation selected_field_ids>
1101 sam_file_input(stream_type & stream, ref_ids_t &, ref_sequences_t &, file_format const &, selected_field_ids const &)
1106 
1108 template <input_stream stream_type,
1109  std::ranges::forward_range ref_ids_t,
1110  std::ranges::forward_range ref_sequences_t,
1111  sam_file_input_format file_format>
1112 sam_file_input(stream_type && stream, ref_ids_t &, ref_sequences_t &, file_format const &)
1115  typename sam_file_input<>::selected_field_ids, // actually use the default
1117 
1119 template <input_stream stream_type,
1120  std::ranges::forward_range ref_ids_t,
1121  std::ranges::forward_range ref_sequences_t,
1122  sam_file_input_format file_format>
1123 sam_file_input(stream_type & stream, ref_ids_t &, ref_sequences_t &, file_format const &)
1126  typename sam_file_input<>::selected_field_ids, // actually use the default
1129 
1130 } // namespace seqan3
Provides seqan3::aa27, container aliases and string literals.
Provides seqan3::gap_decorator.
Provides the seqan3::cigar alphabet.
Provides alphabet adaptations for standard char types.
A combined alphabet that can hold values of either of its alternatives.
Definition: alphabet_variant.hpp:131
The 15 letter DNA alphabet, containing all IUPAC smybols minus the gap.
Definition: dna15.hpp:51
The five letter DNA alphabet of A,C,G,T and the unknown character N.
Definition: dna5.hpp:51
A gap decorator allows the annotation of sequences with gap symbols while leaving the underlying sequ...
Definition: gap_decorator.hpp:83
Quality type for traditional Sanger and modern Illumina Phred scores.
Definition: phred42.hpp:47
Stores the header information of alignment files.
Definition: header.hpp:32
A class for reading alignment files, e.g. SAM, BAM, BLAST ...
Definition: input.hpp:357
sentinel end() noexcept
Returns a sentinel for comparison with iterator.
Definition: input.hpp:804
size_t size_type
An unsigned integer type, usually std::size_t.
Definition: input.hpp:559
std::optional< int32_t > ref_id_type
The type of field::ref_id is fixed to std::optional<int32_t>.
Definition: input.hpp:416
void const_reference
The const_reference type is void because files are not const-iterable.
Definition: input.hpp:557
sam_file_input(stream_t &stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: input.hpp:633
valid_formats_ valid_formats
A seqan3::type_list with the possible formats.
Definition: input.hpp:368
sam_file_input(std::filesystem::path path, ref_ids_t &, ref_sequences_t &) -> sam_file_input< sam_file_input_default_traits< std::remove_reference_t< ref_sequences_t >, std::remove_reference_t< ref_ids_t >>, typename sam_file_input<>::selected_field_ids, typename sam_file_input<>::valid_formats >
Deduce ref_sequences_t and ref_ids_t, default the rest.
sam_file_input(stream_type &&stream, ref_ids_t &, ref_sequences_t &, file_format const &, selected_field_ids const &) -> sam_file_input< sam_file_input_default_traits< std::remove_reference_t< ref_sequences_t >, std::remove_reference_t< ref_ids_t >>, selected_field_ids, type_list< file_format >>
Deduce selected fields, ref_sequences_t and ref_ids_t, and file format.
char stream_char_type
Character type of the stream(s).
Definition: input.hpp:370
detail::in_file_iterator< sam_file_input > iterator
The iterator type of this view (an input iterator).
Definition: input.hpp:563
typename traits_type::template sequence_container< typename traits_type::sequence_alphabet > sequence_type
The type of field::seq (default std::vector<seqan3::dna5>).
Definition: input.hpp:395
sam_file_input(std::filesystem::path filename, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename.
Definition: input.hpp:603
sam_file_input & operator=(sam_file_input &&)=default
Move assignment is defaulted.
sam_file_input(stream_type &&stream, file_format const &) -> sam_file_input< typename sam_file_input<>::traits_type, typename sam_file_input<>::selected_field_ids, type_list< file_format >>
Deduce file_format, and default the rest.
std::default_sentinel_t sentinel
The type returned by end().
Definition: input.hpp:567
sam_file_input(stream_t &stream, typename traits_type::ref_ids &ref_ids, typename traits_type::ref_sequences &ref_sequences, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: input.hpp:715
double bitscore_type
The type of field::bitscore is fixed to double.
Definition: input.hpp:439
sam_file_input(stream_type &stream, ref_ids_t &, ref_sequences_t &, file_format const &, selected_field_ids const &) -> sam_file_input< sam_file_input_default_traits< std::remove_reference_t< ref_sequences_t >, std::remove_reference_t< ref_ids_t >>, selected_field_ids, type_list< file_format >>
Deduce selected fields, ref_sequences_t and ref_ids_t, and file format.
sam_file_input(stream_type &stream, ref_ids_t &, ref_sequences_t &, file_format const &) -> sam_file_input< sam_file_input_default_traits< std::remove_reference_t< ref_sequences_t >, std::remove_reference_t< ref_ids_t >>, typename sam_file_input<>::selected_field_ids, type_list< file_format >>
Deduce selected fields, ref_sequences_t and ref_ids_t, and file format.
std::optional< int32_t > ref_offset_type
The type of field::ref_offset is fixed to an std::optional<int32_t>.
Definition: input.hpp:423
traits_type_ traits_type
A traits type that defines aliases and template for storage of the fields.
Definition: input.hpp:364
int32_t offset_type
The type of field::offset is fixed to int32_t.
Definition: input.hpp:399
double e_value_type
The type of field::evalue is fixed to double.
Definition: input.hpp:437
sam_file_input_options< typename traits_type::sequence_legal_alphabet > options
The options are public and its members can be set directly.
Definition: input.hpp:839
sam_file_header< typename traits_type::ref_ids > header_type
The type of field::header_ptr (default: sam_file_header<typename traits_type::ref_ids>).
Definition: input.hpp:442
header_type & header()
Access the file's header.
Definition: input.hpp:853
sam_file_input(std::filesystem::path path, ref_ids_t &, ref_sequences_t &, selected_field_ids const &) -> sam_file_input< sam_file_input_default_traits< std::remove_reference_t< ref_sequences_t >, std::remove_reference_t< ref_ids_t >>, selected_field_ids, typename sam_file_input<>::valid_formats >
Deduce selected fields, ref_sequences_t and ref_ids_t, default the rest.
static constexpr bool is_default_selected_field_ids
brief Does selected_field_ids contain all fields like in the default case?
Definition: input.hpp:523
typename traits_type::template id_container< char > id_type
The type of field::id (default std::string by default).
Definition: input.hpp:397
typename traits_type::template quality_container< typename traits_type::quality_alphabet > quality_type
The type of field::qual (default std::vector<seqan3::phred42>).
Definition: input.hpp:428
sam_file_input(stream_t &&stream, typename traits_type::ref_ids &ref_ids, typename traits_type::ref_sequences &ref_sequences, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: input.hpp:730
std::tuple< gap_decorator< ref_sequence_type >, alignment_query_type > alignment_type
The type of field::alignment (default: std::pair<std::vector<gapped<dna5>>, std::vector<gapped<dna5>>...
Definition: input.hpp:455
sam_record< detail::select_types_with_ids_t< field_types, field_ids, selected_field_ids >, selected_field_ids > record_type
The type of the record, a specialisation of seqan3::record; acts as a tuple of the selected field typ...
Definition: input.hpp:545
sam_file_input()=delete
Default constructor is explicitly deleted, you need to give a stream or file name.
iterator begin()
Returns an iterator to current position in the file.
Definition: input.hpp:779
selected_field_ids_ selected_field_ids
A seqan3::fields list with the fields selected for the record.
Definition: input.hpp:366
std::conditional_t< std::same_as< typename traits_type::ref_sequences, ref_info_not_given >, dummy_ref_type, ref_sequence_sliced_type > ref_sequence_type
The type of field::ref_seq (default depends on construction).
Definition: input.hpp:408
sam_file_input(std::filesystem::path filename, typename traits_type::ref_ids &ref_ids, typename traits_type::ref_sequences &ref_sequences, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename and given additional reference information.
Definition: input.hpp:677
sam_file_input(sam_file_input &&)=default
Move construction is defaulted.
void const_iterator
The const iterator type is void because files are not const-iterable.
Definition: input.hpp:565
sam_file_input(sam_file_input const &)=delete
Copy construction is explicitly deleted because you cannot have multiple access to the same file.
uint8_t mapq_type
The type of field::mapq is fixed to uint8_t.
Definition: input.hpp:425
sam_flag flag_type
The type of field::flag is fixed to seqan3::sam_flag.
Definition: input.hpp:430
sam_file_input(stream_t &&stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: input.hpp:646
sam_file_input(stream_type &stream, file_format const &, selected_field_ids const &) -> sam_file_input< typename sam_file_input<>::traits_type, selected_field_ids, type_list< file_format >>
Deduce selected fields, file_format, and default the rest.
sam_file_input(stream_type &stream, file_format const &) -> sam_file_input< typename sam_file_input<>::traits_type, typename sam_file_input<>::selected_field_ids, type_list< file_format >>
Deduce file_format, and default the rest.
sam_file_input & operator=(sam_file_input const &)=delete
Copy assignment is explicitly deleted because you cannot have multiple access to the same file.
~sam_file_input()=default
Destructor is defaulted.
sam_file_input(stream_type &&stream, ref_ids_t &, ref_sequences_t &, file_format const &) -> sam_file_input< sam_file_input_default_traits< std::remove_reference_t< ref_sequences_t >, std::remove_reference_t< ref_ids_t >>, typename sam_file_input<>::selected_field_ids, type_list< file_format >>
Deduce ref_sequences_t and ref_ids_t, and file format.
std::tuple< ref_id_type, ref_offset_type, int32_t > mate_type
The type of field::mate is fixed to std::tuple<ref_id_type, ref_offset_type, int32_t>).
Definition: input.hpp:434
reference front() noexcept
Return the record we are currently at in the file.
Definition: input.hpp:832
sam_file_input(stream_type &&stream, file_format const &, selected_field_ids const &) -> sam_file_input< typename sam_file_input<>::traits_type, selected_field_ids, type_list< file_format >>
Deduce selected fields, file_format, and default the rest.
The SAM tag dictionary class that stores all optional SAM fields.
Definition: sam_tag_dictionary.hpp:332
The Concepts library.
T data(T... args)
Provides seqan3::dna15, container aliases and string literals.
Provides seqan3::dna5, container aliases and string literals.
This header includes C++17 filesystem support and imports it into namespace std::filesystem (independ...
T format(T... args)
T get(T... args)
sam_flag
An enum flag that describes the properties of an aligned read (given as a SAM record).
Definition: sam_flag.hpp:73
field
An enumerator for the fields used in file formats.
Definition: record.hpp:63
@ flag
The alignment flag (bit information), uint16_t value.
@ ref_offset
Sequence (seqan3::field::ref_seq) relative start position (0-based), unsigned value.
@ ref_seq
The (reference) "sequence" information, usually a range of nucleotides or amino acids.
@ alignment
The (pairwise) alignment stored in an object that models seqan3::detail::pairwise_alignment.
@ cigar
The cigar vector (std::vector<seqan3::cigar>) representing the alignment in SAM/BAM format.
@ mapq
The mapping quality of the seqan3::field::seq alignment, usually a Phred-scaled score.
@ bit_score
The bit score (statistical significance indicator), unsigned value.
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
@ mate
The mate pair information given as a std::tuple of reference name, offset and template length.
@ header_ptr
A pointer to the seqan3::sam_file_header object storing header information.
@ ref_id
The identifier of the (reference) sequence that seqan3::field::seq was aligned to.
@ evalue
The e-value (length normalized bit score), double value.
@ id
The identifier, usually a string.
@ tags
The optional tags in the SAM format, stored in a dictionary.
@ seq
The "sequence", usually a range of nucleotides or amino acids.
@ qual
The qualities, usually in Phred score notation.
constexpr bool contains
Whether a type occurs in a type list or not.
Definition: traits.hpp:231
decltype(detail::transform< trait_t >(list_t{})) transform
Apply a transformation trait to every type in the list and return a seqan3::type_list of the results.
Definition: traits.hpp:471
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:151
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:189
constexpr auto repeat_n
A view factory that repeats a given value n times.
Definition: repeat_n.hpp:95
auto const move
A view that turns lvalue-references into rvalue-references.
Definition: move.hpp:74
Provides the seqan3::detail::in_file_iterator class template.
The generic alphabet concept that covers most data types used in ranges.
Resolves to std::ranges::explicitly_convertible_to<type1, type2>(). <dl class="no-api">This entity i...
The generic concept for alignment file input formats.
The requirements a traits_type for seqan3::sam_file_input must meet.
A more refined container concept than seqan3::container.
Refines seqan3::alphabet and adds assignability.
A concept that indicates whether a writable alphabet represents quality scores.
Provides exceptions used in the I/O module.
Stream concepts.
Provides various utility functions required only for input.
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
Provides seqan3::phred42 quality scores.
Provides quality alphabet composites.
Adaptations of concepts from the Ranges TS.
Provides the seqan3::format_bam.
Provides the seqan3::format_sam.
Provides seqan3::sam_file_input_format and auxiliary classes.
Provides seqan3::sam_record.
Provides helper data structures for the seqan3::sam_file_output.
T size(T... args)
A class template that holds a choice of seqan3::field.
Definition: record.hpp:172
Thrown if there is an unspecified filesystem or stream error while opening, e.g. permission problem.
Definition: exception.hpp:40
The default traits for seqan3::sam_file_input.
Definition: input.hpp:184
ref_ids_t ref_ids
The type of the reference identifiers is deduced on construction.
Definition: input.hpp:215
ref_sequences_t ref_sequences
The type of the reference sequences is deduced on construction.
Definition: input.hpp:212
Type that contains multiple types.
Definition: type_list.hpp:29
Provides seqan3::tuple_like.
Provides traits for seqan3::type_list.
Provides seqan3::detail::transformation_trait_or.
Provides seqan3::views::repeat_n.
Provides seqan3::views::slice.
T visit(T... args)