SeqAn3 3.2.0
The Modern C++ library for sequence analysis.
input.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <cassert>
16#include <concepts>
17#include <filesystem>
18#include <fstream>
19#include <ranges>
20#include <string>
21#include <variant>
22#include <vector>
23
34#include <seqan3/io/detail/record.hpp>
47
48namespace seqan3
49{
50
51// ---------------------------------------------------------------------------------------------------------------------
52// sam_file_input_traits
53// ---------------------------------------------------------------------------------------------------------------------
54
113template <typename t>
114concept sam_file_input_traits =
115 requires (t v) {
116 // field::seq
121
122 // field::id
124
125 // field::qual
128
129 // field::ref_seq
130 // either ref_info_not_given or a range over ranges over alphabet (e.g. std::vector<dna4_vector>)
131 requires std::same_as<typename t::ref_sequences, ref_info_not_given>
132 || requires () {
133 requires alphabet<std::ranges::range_reference_t<
134 std::ranges::range_reference_t<typename t::ref_sequences>>>;
135 };
136
137 // field::ref_id
139 && (!std::same_as<typename t::ref_sequences, ref_info_not_given>
141 std::ranges::range_reference_t<std::ranges::range_reference_t<typename t::ref_ids>>>);
142 requires std::ranges::forward_range<std::ranges::range_reference_t<typename t::ref_ids>>;
143 requires std::ranges::forward_range<typename t::ref_ids>;
144
145 // field::offset is fixed to int32_t
146 // field::ref_offset is fixed to std::optional<int32_t>
147 // field::flag is fixed to seqan3::sam_flag
148 // field::mapq is fixed to uint8_t
149 // field::evalue is fixed to double
150 // field::bitscore is fixed to double
151 // field::mate is fixed to std::tuple<ref_id_container<ref_id_alphabet>, ref_offset_type, int32_t>
152
153 // field::alignment
154 // the alignment type cannot be configured.
155 // Type of tuple entry 1 (reference) is set to
156 // 1) a std::ranges::subrange over std::ranges::range_value_t<typename t::ref_sequences> if reference information was given
157 // or 2) a "dummy" sequence type:
158 // views::repeat_n(sequence_alphabet{}, size_t{}) | std::views::transform(detail::access_restrictor_fn{})
159 // Type of tuple entry 2 (query) is set to
160 // 1) a std::ranges::subrange over std::ranges::range_value_t<typename t::ref_sequences> if reference information was given
161 // or 2) a "dummy" sequence type:
162 };
164
165// ---------------------------------------------------------------------------------------------------------------------
166// sam_file_input_default_traits
167// ---------------------------------------------------------------------------------------------------------------------
168
184template <typename ref_sequences_t = ref_info_not_given, typename ref_ids_t = std::deque<std::string>>
186{
194
197
199 template <typename _sequence_alphabet>
201
203 template <typename _id_alphabet>
205
208
210 template <typename _quality_alphabet>
212
214 using ref_sequences = ref_sequences_t;
215
217 using ref_ids = ref_ids_t;
219};
220
221// ---------------------------------------------------------------------------------------------------------------------
222// sam_file_input
223// ---------------------------------------------------------------------------------------------------------------------
224
240template <sam_file_input_traits traits_type_ = sam_file_input_default_traits<>,
241 detail::fields_specialisation selected_field_ids_ = fields<field::seq,
242 field::id,
243 field::offset,
244 field::ref_id,
245 field::ref_offset,
246 field::alignment,
247 field::cigar,
248 field::mapq,
249 field::qual,
250 field::flag,
251 field::mate,
252 field::tags,
253 field::header_ptr>,
254 detail::type_list_of_sam_file_input_formats valid_formats_ = type_list<format_sam, format_bam>>
256{
257public:
263 using traits_type = traits_type_;
265 using selected_field_ids = selected_field_ids_;
267 using valid_formats = valid_formats_;
269 using stream_char_type = char;
271
272private:
274 using dummy_ref_type = decltype(views::repeat_n(typename traits_type::sequence_alphabet{}, size_t{})
275 | std::views::transform(detail::access_restrictor_fn{}));
276
278 using ref_sequence_unsliced_type = detail::lazy_conditional_t<
279 std::ranges::range<typename traits_type::ref_sequences const>,
280 detail::lazy<std::ranges::range_reference_t, typename traits_type::ref_sequences const>,
281 dummy_ref_type>;
282
284 using ref_sequence_sliced_type = decltype(std::declval<ref_sequence_unsliced_type>() | views::slice(0, 0));
285
286public:
295 using id_type = typename traits_type::template id_container<char>;
297 using offset_type = int32_t;
305 dummy_ref_type,
306 ref_sequence_sliced_type>;
323 using mapq_type = uint8_t;
325 using quality_type = typename traits_type::template quality_container<typename traits_type::quality_alphabet>;
334
335private:
341
342public:
345
348 id_type,
354 mapq_type,
356 flag_type,
357 mate_type,
359 header_type *>;
360
383 field::id,
395
396 static_assert(
397 []() constexpr {
398 for (field f : selected_field_ids::as_array)
399 if (!field_ids::contains(f))
400 return false;
401 return true;
402 }(),
403 "You selected a field that is not valid for alignment files, please refer to the documentation "
404 "of sam_file_input::field_ids for the accepted values.");
405
410
420 using const_reference = void;
422 using size_type = size_t;
426 using iterator = detail::in_file_iterator<sam_file_input>;
428 using const_iterator = void;
430 using sentinel = std::default_sentinel_t;
432
437 sam_file_input() = delete;
447 ~sam_file_input() = default;
448
467 selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
468 primary_stream{new std::ifstream{}, stream_deleter_default}
469 {
470 init_by_filename(std::move(filename));
471 }
472
492 template <input_stream stream_t, sam_file_input_format file_format>
493 requires std::same_as<typename std::remove_reference_t<stream_t>::char_type, stream_char_type>
494 sam_file_input(stream_t & stream,
495 file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
496 selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
497 primary_stream{&stream, stream_deleter_noop}
498 {
499 init_by_format<file_format>();
500 }
501
503 template <input_stream stream_t, sam_file_input_format file_format>
504 requires std::same_as<typename std::remove_reference_t<stream_t>::char_type, stream_char_type>
505 sam_file_input(stream_t && stream,
506 file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
507 selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
508 primary_stream{new stream_t{std::move(stream)}, stream_deleter_default}
509 {
510 init_by_format<file_format>();
511 }
512
537 typename traits_type::ref_ids & ref_ids,
538 typename traits_type::ref_sequences & ref_sequences,
539 selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
540 primary_stream{new std::ifstream{}, stream_deleter_default}
541 {
542 // initialize reference information
543 set_references(ref_ids, ref_sequences);
544
545 init_by_filename(std::move(filename));
546 }
547
573 template <input_stream stream_t, sam_file_input_format file_format>
574 sam_file_input(stream_t & stream,
575 typename traits_type::ref_ids & ref_ids,
576 typename traits_type::ref_sequences & ref_sequences,
577 file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
578 selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
579 primary_stream{&stream, stream_deleter_noop}
580 {
581 // initialize reference information
582 set_references(ref_ids, ref_sequences);
583
584 init_by_format<file_format>();
585 }
586
588 template <input_stream stream_t, sam_file_input_format file_format>
589 sam_file_input(stream_t && stream,
590 typename traits_type::ref_ids & ref_ids,
591 typename traits_type::ref_sequences & ref_sequences,
592 file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
593 selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
594 primary_stream{new stream_t{std::move(stream)}, stream_deleter_default}
595 {
596 // initialize reference information
597 set_references(ref_ids, ref_sequences);
598
599 init_by_format<file_format>();
600 }
601
603 // explicitly delete rvalues for reference information
605 typename traits_type::ref_ids &&,
606 typename traits_type::ref_sequences &&,
607 selected_field_ids const &) = delete;
608
609 template <input_stream stream_t, sam_file_input_format file_format>
610 sam_file_input(stream_t &&,
611 typename traits_type::ref_ids &&,
612 typename traits_type::ref_sequences &&,
613 file_format const &,
614 selected_field_ids const &) = delete;
617
639 {
640 // buffer first record
641 if (!first_record_was_read)
642 {
643 read_next_record();
644 first_record_was_read = true;
645 }
646
647 return {*this};
648 }
649
663 sentinel end() noexcept
664 {
665 return {};
666 }
667
691 reference front() noexcept
692 {
693 return *begin();
694 }
696
699
713 {
714 // make sure header is read
715 if (!first_record_was_read)
716 {
717 read_next_record();
718 first_record_was_read = true;
719 }
720
721 return *header_ptr;
722 }
723
724protected:
726
728 void init_by_filename(std::filesystem::path filename)
729 {
730 primary_stream->rdbuf()->pubsetbuf(stream_buffer.data(), stream_buffer.size());
731 static_cast<std::basic_ifstream<char> *>(primary_stream.get())
732 ->open(filename, std::ios_base::in | std::ios::binary);
733 // open stream
734 if (!primary_stream->good())
735 throw file_open_error{"Could not open file " + filename.string() + " for reading."};
736
737 secondary_stream = detail::make_secondary_istream(*primary_stream, filename);
738 detail::set_format(format, filename);
739 }
740
742 template <typename format_type>
743 void init_by_format()
744 {
745 static_assert(list_traits::contains<format_type, valid_formats>,
746 "You selected a format that is not in the valid_formats of this file.");
747
748 format = detail::sam_file_input_format_exposer<format_type>{};
749 secondary_stream = detail::make_secondary_istream(*primary_stream);
750 }
751
754
759 record_type record_buffer;
761 std::vector<char> stream_buffer{std::vector<char>(1'000'000)};
763 std::streampos position_buffer{};
765
773 static void stream_deleter_noop(std::basic_istream<stream_char_type> *)
774 {}
776 static void stream_deleter_default(std::basic_istream<stream_char_type> * ptr)
777 {
778 delete ptr;
779 }
780
782 stream_ptr_t primary_stream{nullptr, stream_deleter_noop};
784 stream_ptr_t secondary_stream{nullptr, stream_deleter_noop};
785
787 bool first_record_was_read{false};
789 bool at_end{false};
790
792 using format_type = typename detail::variant_from_tags<valid_formats, detail::sam_file_input_format_exposer>::type;
793
795 format_type format;
797
802 typename traits_type::ref_sequences const * reference_sequences_ptr{nullptr};
803
814 template <std::ranges::forward_range ref_sequences_t>
815 void set_references(typename traits_type::ref_ids & ref_ids, ref_sequences_t && ref_sequences)
816 {
817 assert(std::ranges::distance(ref_ids) == std::ranges::distance(ref_sequences));
818
819 header_ptr = std::unique_ptr<header_type>{std::make_unique<header_type>(ref_ids)};
820 reference_sequences_ptr = &ref_sequences;
821
822 // initialise reference map and ref_dict if ref_ids are non-empty
823 for (int32_t idx = 0; idx < std::ranges::distance(ref_ids); ++idx)
824 {
825 header_ptr->ref_id_info.emplace_back(std::ranges::distance(ref_sequences[idx]), "");
826
827 if constexpr (std::ranges::contiguous_range<std::ranges::range_reference_t<typename traits_type::ref_ids>>
828 && std::ranges::sized_range<std::ranges::range_reference_t<typename traits_type::ref_ids>>
829 && std::ranges::borrowed_range<std::ranges::range_reference_t<typename traits_type::ref_ids>>)
830 {
831 auto && id = header_ptr->ref_ids()[idx];
832 header_ptr->ref_dict[std::span{std::ranges::data(id), std::ranges::size(id)}] = idx;
833 }
834 else
835 {
836 header_ptr->ref_dict[header_ptr->ref_ids()[idx]] = idx;
837 }
838 }
839 }
841
843 void read_next_record()
844 {
845 // clear the record
846 record_buffer.clear();
847 detail::get_or_ignore<field::header_ptr>(record_buffer) = header_ptr.get();
848
849 // at end if we could not read further
850 if (std::istreambuf_iterator<stream_char_type>{*secondary_stream}
852 {
853 at_end = true;
854 return;
855 }
856
857 auto call_read_func = [this](auto & ref_seq_info)
858 {
860 [&](auto & f)
861 {
862 f.read_alignment_record(*secondary_stream,
863 options,
864 ref_seq_info,
865 *header_ptr,
866 position_buffer,
867 detail::get_or_ignore<field::seq>(record_buffer),
868 detail::get_or_ignore<field::qual>(record_buffer),
869 detail::get_or_ignore<field::id>(record_buffer),
870 detail::get_or_ignore<field::offset>(record_buffer),
871 detail::get_or_ignore<field::ref_seq>(record_buffer),
872 detail::get_or_ignore<field::ref_id>(record_buffer),
873 detail::get_or_ignore<field::ref_offset>(record_buffer),
874 detail::get_or_ignore<field::alignment>(record_buffer),
875 detail::get_or_ignore<field::cigar>(record_buffer),
876 detail::get_or_ignore<field::flag>(record_buffer),
877 detail::get_or_ignore<field::mapq>(record_buffer),
878 detail::get_or_ignore<field::mate>(record_buffer),
879 detail::get_or_ignore<field::tags>(record_buffer),
880 detail::get_or_ignore<field::evalue>(record_buffer),
881 detail::get_or_ignore<field::bit_score>(record_buffer));
882 },
883 format);
884 };
885
886 assert(!format.valueless_by_exception());
887
888 if constexpr (!std::same_as<typename traits_type::ref_sequences, ref_info_not_given>)
889 call_read_func(*reference_sequences_ptr);
890 else
891 call_read_func(std::ignore);
892 }
893
895 friend iterator;
896};
897
903template <input_stream stream_type, sam_file_input_format file_format, detail::fields_specialisation selected_field_ids>
904sam_file_input(stream_type && stream, file_format const &, selected_field_ids const &)
905 -> sam_file_input<typename sam_file_input<>::traits_type, // actually use the default
908
910template <input_stream stream_type, sam_file_input_format file_format, detail::fields_specialisation selected_field_ids>
911sam_file_input(stream_type & stream, file_format const &, selected_field_ids const &)
912 -> sam_file_input<typename sam_file_input<>::traits_type, // actually use the default
915
917template <input_stream stream_type, sam_file_input_format file_format>
918sam_file_input(stream_type && stream, file_format const &)
919 -> sam_file_input<typename sam_file_input<>::traits_type, // actually use the default
920 typename sam_file_input<>::selected_field_ids, // actually use the default
922
924template <input_stream stream_type, sam_file_input_format file_format>
925sam_file_input(stream_type & stream, file_format const &)
926 -> sam_file_input<typename sam_file_input<>::traits_type, // actually use the default
927 typename sam_file_input<>::selected_field_ids, // actually use the default
929
931template <std::ranges::forward_range ref_ids_t,
932 std::ranges::forward_range ref_sequences_t,
933 detail::fields_specialisation selected_field_ids>
934sam_file_input(std::filesystem::path path, ref_ids_t &, ref_sequences_t &, selected_field_ids const &)
938 typename sam_file_input<>::valid_formats>; // actually use the default
939
941template <std::ranges::forward_range ref_ids_t, std::ranges::forward_range ref_sequences_t>
942sam_file_input(std::filesystem::path path, ref_ids_t &, ref_sequences_t &) -> sam_file_input<
944 typename sam_file_input<>::selected_field_ids, // actually use the default
945 typename sam_file_input<>::valid_formats>; // actually use the default
946
948template <input_stream stream_type,
949 std::ranges::forward_range ref_ids_t,
950 std::ranges::forward_range ref_sequences_t,
951 sam_file_input_format file_format,
952 detail::fields_specialisation selected_field_ids>
953sam_file_input(stream_type && stream, ref_ids_t &, ref_sequences_t &, file_format const &, selected_field_ids const &)
958
960template <input_stream stream_type,
961 std::ranges::forward_range ref_ids_t,
962 std::ranges::forward_range ref_sequences_t,
963 sam_file_input_format file_format,
964 detail::fields_specialisation selected_field_ids>
965sam_file_input(stream_type & stream, ref_ids_t &, ref_sequences_t &, file_format const &, selected_field_ids const &)
970
972template <input_stream stream_type,
973 std::ranges::forward_range ref_ids_t,
974 std::ranges::forward_range ref_sequences_t,
975 sam_file_input_format file_format>
976sam_file_input(stream_type && stream, ref_ids_t &, ref_sequences_t &, file_format const &) -> sam_file_input<
978 typename sam_file_input<>::selected_field_ids, // actually use the default
980
982template <input_stream stream_type,
983 std::ranges::forward_range ref_ids_t,
984 std::ranges::forward_range ref_sequences_t,
985 sam_file_input_format file_format>
986sam_file_input(stream_type & stream, ref_ids_t &, ref_sequences_t &, file_format const &) -> sam_file_input<
988 typename sam_file_input<>::selected_field_ids, // actually use the default
991
992} // namespace seqan3
Provides seqan3::aa27, container aliases and string literals.
Provides the seqan3::cigar alphabet.
Provides alphabet adaptations for standard char types.
A combined alphabet that can hold values of either of its alternatives..
Definition: alphabet_variant.hpp:120
The 15 letter DNA alphabet, containing all IUPAC smybols minus the gap..
Definition: dna15.hpp:51
The five letter DNA alphabet of A,C,G,T and the unknown character N..
Definition: dna5.hpp:51
A gap decorator allows the annotation of sequences with gap symbols while leaving the underlying sequ...
Definition: gap_decorator.hpp:81
Quality type for traditional Sanger and modern Illumina Phred scores..
Definition: phred42.hpp:47
Stores the header information of alignment files.
Definition: header.hpp:34
A class for reading alignment files, e.g. SAM, BAM, BLAST ...
Definition: input.hpp:256
sam_file_input(std::filesystem::path path, ref_ids_t &, ref_sequences_t &) -> sam_file_input< sam_file_input_default_traits< std::remove_reference_t< ref_sequences_t >, std::remove_reference_t< ref_ids_t > >, typename sam_file_input<>::selected_field_ids, typename sam_file_input<>::valid_formats >
Deduce ref_sequences_t and ref_ids_t, default the rest.
sentinel end() noexcept
Returns a sentinel for comparison with iterator.
Definition: input.hpp:663
size_t size_type
An unsigned integer type, usually std::size_t.
Definition: input.hpp:422
std::optional< int32_t > ref_id_type
The type of field::ref_id is fixed to std::optional<int32_t>.
Definition: input.hpp:314
void const_reference
The const_reference type is void because files are not const-iterable.
Definition: input.hpp:420
sam_file_input(std::filesystem::path path, ref_ids_t &, ref_sequences_t &, selected_field_ids const &) -> sam_file_input< sam_file_input_default_traits< std::remove_reference_t< ref_sequences_t >, std::remove_reference_t< ref_ids_t > >, selected_field_ids, typename sam_file_input<>::valid_formats >
Deduce selected fields, ref_sequences_t and ref_ids_t, default the rest.
valid_formats_ valid_formats
A seqan3::type_list with the possible formats.
Definition: input.hpp:267
char stream_char_type
Character type of the stream(s).
Definition: input.hpp:269
detail::in_file_iterator< sam_file_input > iterator
The iterator type of this view (an input iterator).
Definition: input.hpp:426
sam_file_input(stream_type &&stream, ref_ids_t &, ref_sequences_t &, file_format const &) -> sam_file_input< sam_file_input_default_traits< std::remove_reference_t< ref_sequences_t >, std::remove_reference_t< ref_ids_t > >, typename sam_file_input<>::selected_field_ids, type_list< file_format > >
Deduce ref_sequences_t and ref_ids_t, and file format.
sam_file_input(std::filesystem::path filename, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename.
Definition: input.hpp:466
sam_file_input(stream_type &stream, file_format const &) -> sam_file_input< typename sam_file_input<>::traits_type, typename sam_file_input<>::selected_field_ids, type_list< file_format > >
Deduce file_format, and default the rest.
std::default_sentinel_t sentinel
The type returned by end().
Definition: input.hpp:430
sam_file_input(stream_t &stream, typename traits_type::ref_ids &ref_ids, typename traits_type::ref_sequences &ref_sequences, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: input.hpp:574
typename traits_type::template sequence_container< typename traits_type::sequence_alphabet > sequence_type
The type of field::seq (default std::vector<seqan3::dna5>).
Definition: input.hpp:293
std::optional< int32_t > ref_offset_type
The type of field::ref_offset is fixed to a std::optional<int32_t>.
Definition: input.hpp:321
traits_type_ traits_type
A traits type that defines aliases and template for storage of the fields.
Definition: input.hpp:263
int32_t offset_type
The type of field::offset is fixed to int32_t.
Definition: input.hpp:297
sam_file_input(stream_type &stream, ref_ids_t &, ref_sequences_t &, file_format const &) -> sam_file_input< sam_file_input_default_traits< std::remove_reference_t< ref_sequences_t >, std::remove_reference_t< ref_ids_t > >, typename sam_file_input<>::selected_field_ids, type_list< file_format > >
Deduce selected fields, ref_sequences_t and ref_ids_t, and file format.
sam_file_input_options< typename traits_type::sequence_legal_alphabet > options
The options are public and its members can be set directly.
Definition: input.hpp:698
sam_file_input(stream_type &&stream, file_format const &) -> sam_file_input< typename sam_file_input<>::traits_type, typename sam_file_input<>::selected_field_ids, type_list< file_format > >
Deduce file_format, and default the rest.
sam_file_input(stream_t &stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: input.hpp:494
sam_file_header< typename traits_type::ref_ids > header_type
The type of field::header_ptr (default: sam_file_header<typename traits_type::ref_ids>).
Definition: input.hpp:333
typename traits_type::template id_container< char > id_type
The type of field::id (default std::string by default).
Definition: input.hpp:295
sam_file_input & operator=(sam_file_input &&)=default
Move assignment is defaulted.
sam_file_input(stream_t &&stream, typename traits_type::ref_ids &ref_ids, typename traits_type::ref_sequences &ref_sequences, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: input.hpp:589
std::tuple< gap_decorator< ref_sequence_type >, alignment_query_type > alignment_type
The type of field::alignment (default: std::pair<std::vector<gapped<dna5>>, std::vector<gapped<dna5>>...
Definition: input.hpp:344
sam_record< detail::select_types_with_ids_t< field_types, field_ids, selected_field_ids >, selected_field_ids > record_type
The type of the record, a specialisation of seqan3::record; acts as a tuple of the selected field typ...
Definition: input.hpp:408
sam_file_input()=delete
Default constructor is explicitly deleted, you need to give a stream or file name.
iterator begin()
Returns an iterator to current position in the file.
Definition: input.hpp:638
selected_field_ids_ selected_field_ids
A seqan3::fields list with the fields selected for the record.
Definition: input.hpp:265
sam_file_input(stream_type &stream, ref_ids_t &, ref_sequences_t &, file_format const &, selected_field_ids const &) -> sam_file_input< sam_file_input_default_traits< std::remove_reference_t< ref_sequences_t >, std::remove_reference_t< ref_ids_t > >, selected_field_ids, type_list< file_format > >
Deduce selected fields, ref_sequences_t and ref_ids_t, and file format.
sam_file_input(std::filesystem::path filename, typename traits_type::ref_ids &ref_ids, typename traits_type::ref_sequences &ref_sequences, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename and given additional reference information.
Definition: input.hpp:536
sam_file_input & operator=(sam_file_input const &)=delete
Copy assignment is explicitly deleted because you cannot have multiple access to the same file.
sam_file_input(sam_file_input &&)=default
Move construction is defaulted.
void const_iterator
The const iterator type is void because files are not const-iterable.
Definition: input.hpp:428
header_type & header()
Access the file's header.
Definition: input.hpp:712
sam_file_input(sam_file_input const &)=delete
Copy construction is explicitly deleted because you cannot have multiple access to the same file.
uint8_t mapq_type
The type of field::mapq is fixed to uint8_t.
Definition: input.hpp:323
sam_flag flag_type
The type of field::flag is fixed to seqan3::sam_flag.
Definition: input.hpp:327
sam_file_input(stream_type &&stream, file_format const &, selected_field_ids const &) -> sam_file_input< typename sam_file_input<>::traits_type, selected_field_ids, type_list< file_format > >
Deduce selected fields, file_format, and default the rest.
sam_file_input(stream_t &&stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: input.hpp:505
sam_file_input(stream_type &&stream, ref_ids_t &, ref_sequences_t &, file_format const &, selected_field_ids const &) -> sam_file_input< sam_file_input_default_traits< std::remove_reference_t< ref_sequences_t >, std::remove_reference_t< ref_ids_t > >, selected_field_ids, type_list< file_format > >
Deduce selected fields, ref_sequences_t and ref_ids_t, and file format.
~sam_file_input()=default
Destructor is defaulted.
std::tuple< ref_id_type, ref_offset_type, int32_t > mate_type
The type of field::mate is fixed to std::tuple<ref_id_type, ref_offset_type, int32_t>).
Definition: input.hpp:331
reference front() noexcept
Return the record we are currently at in the file.
Definition: input.hpp:691
typename traits_type::template quality_container< typename traits_type::quality_alphabet > quality_type
The type of field::qual (default std::vector<seqan3::phred42>).
Definition: input.hpp:325
sam_file_input(stream_type &stream, file_format const &, selected_field_ids const &) -> sam_file_input< typename sam_file_input<>::traits_type, selected_field_ids, type_list< file_format > >
Deduce selected fields, file_format, and default the rest.
The SAM tag dictionary class that stores all optional SAM fields.
Definition: sam_tag_dictionary.hpp:343
T data(T... args)
Provides seqan3::dna15, container aliases and string literals.
Provides seqan3::dna5, container aliases and string literals.
Provides the seqan3::format_bam.
Provides the seqan3::format_sam.
T format(T... args)
Provides seqan3::gap_decorator.
T get(T... args)
sam_flag
An enum flag that describes the properties of an aligned read (given as a SAM record).
Definition: sam_flag.hpp:76
field
An enumerator for the fields used in file formats.
Definition: record.hpp:63
@ flag
The alignment flag (bit information), uint16_t value.
@ ref_offset
Sequence (seqan3::field::ref_seq) relative start position (0-based), unsigned value.
@ alignment
The (pairwise) alignment stored in an object that models seqan3::detail::pairwise_alignment.
@ cigar
The cigar vector (std::vector<seqan3::cigar>) representing the alignment in SAM/BAM format.
@ mapq
The mapping quality of the seqan3::field::seq alignment, usually a Phred-scaled score.
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
@ mate
The mate pair information given as a std::tuple of reference name, offset and template length.
@ header_ptr
A pointer to the seqan3::sam_file_header object storing header information.
@ ref_id
The identifier of the (reference) sequence that seqan3::field::seq was aligned to.
@ id
The identifier, usually a string.
@ tags
The optional tags in the SAM format, stored in a dictionary.
@ seq
The "sequence", usually a range of nucleotides or amino acids.
@ qual
The qualities, usually in Phred score notation.
constexpr bool contains
Whether a type occurs in a type list or not.
Definition: traits.hpp:252
decltype(detail::transform< trait_t >(list_t{})) transform
Apply a transformation trait to every type in the list and return a seqan3::type_list of the results.
Definition: traits.hpp:470
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:146
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:178
constexpr auto repeat_n
A view factory that repeats a given value n times.
Definition: repeat_n.hpp:91
Provides the seqan3::detail::in_file_iterator class template.
The generic alphabet concept that covers most data types used in ranges.
Checks whether from can be explicitly converted to to.
The generic concept for alignment file input formats.
The requirements a traits_type for seqan3::sam_file_input must meet.
A more refined container concept than seqan3::container.
Refines seqan3::alphabet and adds assignability.
A concept that indicates whether a writable alphabet represents quality scores.
Provides exceptions used in the I/O module.
Stream concepts.
Provides various utility functions required only for input.
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
Provides seqan3::phred42 quality scores.
Provides quality alphabet composites.
Provides seqan3::views::repeat_n.
Provides seqan3::sam_file_input_format and auxiliary classes.
Provides seqan3::sam_record.
Provides helper data structures for the seqan3::sam_file_output.
T size(T... args)
Provides seqan3::views::slice.
A class template that holds a choice of seqan3::field.
Definition: record.hpp:128
Thrown if there is an unspecified filesystem or stream error while opening, e.g. permission problem.
Definition: exception.hpp:39
The default traits for seqan3::sam_file_input.
Definition: input.hpp:186
ref_ids_t ref_ids
The type of the reference identifiers is deduced on construction.
Definition: input.hpp:217
ref_sequences_t ref_sequences
The type of the reference sequences is deduced on construction.
Definition: input.hpp:214
Type that contains multiple types.
Definition: type_list.hpp:29
Provides seqan3::detail::transformation_trait_or.
Provides traits for seqan3::type_list.
Provides seqan3::tuple_like.
T visit(T... args)