 |
SeqAn3
3.0.1
The Modern C++ library for sequence analysis.
|
|
Go to the documentation of this file.
36 #include <seqan3/io/detail/record.hpp>
111 template <
typename t>
132 (std::ranges::forward_range<typename t::ref_sequences> &&
133 std::ranges::forward_range<detail::transformation_trait_or_t<reference<typename t::ref_sequences>, dna4_vector>> &&
140 requires std::ranges::forward_range<reference_t<typename t::ref_ids>>;
141 requires std::ranges::forward_range<typename t::ref_ids>;
182 template <
typename ref_sequences_t = ref_info_not_given,
typename ref_
ids_t = std::deque<std::
string>>
197 template <
typename _sequence_alphabet>
201 template <
typename _
id_alphabet>
208 template <
typename _quality_alphabet>
401 using dummy_ref_type = decltype(
views::repeat_n(
typename traits_type::sequence_alphabet{},
size_t{}) |
411 typename traits_type::sequence_alphabet>;
413 using id_type =
typename traits_type::template id_container<char>;
425 detail::transformation_trait_or_t<
447 using quality_type =
typename traits_type::template quality_container<
448 typename traits_type::quality_alphabet>;
467 decltype(std::declval<sequence_type &>() |
views::slice(0, 0))>,
513 static_assert([] () constexpr
515 for (
field f : selected_field_ids::as_array)
516 if (!field_ids::contains(f))
520 "You selected a field that is not valid for aligment files, please refer to the documentation "
521 "of alignment_file_input::field_ids for the accepted values.");
543 using iterator = detail::in_file_iterator<alignment_file_input>;
585 primary_stream{
new std::ifstream{filename, std::ios_base::in | std::ios::binary}, stream_deleter_default}
609 template <input_stream stream_t, alignment_file_input_format file_format>
614 file_format
const & SEQAN3_DOXYGEN_ONLY(format_tag),
616 primary_stream{&stream, stream_deleter_noop}
622 template <input_stream stream_t, alignment_file_input_format file_format>
627 file_format
const & SEQAN3_DOXYGEN_ONLY(format_tag),
629 primary_stream{
new stream_t{
std::move(stream)}, stream_deleter_default}
658 typename traits_type::ref_ids & ref_ids,
659 typename traits_type::ref_sequences & ref_sequences,
661 primary_stream{
new std::ifstream{filename, std::ios_base::in | std::ios::binary}, stream_deleter_default}
664 set_references(ref_ids, ref_sequences);
694 template <input_stream stream_t, alignment_file_input_format file_format>
696 typename traits_type::ref_ids & ref_ids,
697 typename traits_type::ref_sequences & ref_sequences,
698 file_format
const & SEQAN3_DOXYGEN_ONLY(format_tag),
700 primary_stream{&stream, stream_deleter_noop}
703 set_references(ref_ids, ref_sequences);
709 template <input_stream stream_t, alignment_file_input_format file_format>
711 typename traits_type::ref_ids & ref_ids,
712 typename traits_type::ref_sequences & ref_sequences,
713 file_format
const & SEQAN3_DOXYGEN_ONLY(format_tag),
715 primary_stream{
new stream_t{
std::move(stream)}, stream_deleter_default}
718 set_references(ref_ids, ref_sequences);
726 typename traits_type::ref_ids &&,
727 typename traits_type::ref_sequences &&,
730 template <input_stream stream_t, alignment_file_input_format file_format>
732 typename traits_type::ref_ids &&,
733 typename traits_type::ref_sequences &&,
762 if (!first_record_was_read)
765 first_record_was_read =
true;
836 if (!first_record_was_read)
839 first_record_was_read =
true;
852 if (!primary_stream->good())
855 secondary_stream = detail::make_secondary_istream(*primary_stream, filename);
856 detail::set_format(format, filename);
860 template <
typename format_type>
861 void init(format_type
const &)
863 static_assert(list_traits::contains<format_type, valid_formats>,
864 "You selected a format that is not in the valid_formats of this file.");
866 format = detail::alignment_file_input_format_exposer<format_type>{};
867 secondary_stream = detail::make_secondary_istream(*primary_stream);
892 stream_ptr_t primary_stream{
nullptr, stream_deleter_noop};
894 stream_ptr_t secondary_stream{
nullptr, stream_deleter_noop};
897 bool first_record_was_read{
false};
902 using format_type =
typename detail::variant_from_tags<
valid_formats,
903 detail::alignment_file_input_format_exposer>::type;
912 typename traits_type::ref_sequences
const * reference_sequences_ptr{
nullptr};
925 template <std::ranges::forward_range ref_sequences_t>
926 void set_references(
typename traits_type::ref_ids & ref_ids, ref_sequences_t && ref_sequences)
928 assert(std::ranges::distance(ref_ids) == std::ranges::distance(ref_sequences));
931 reference_sequences_ptr = &ref_sequences;
934 for (int32_t idx = 0; idx < std::ranges::distance(ref_ids); ++idx)
936 header_ptr->ref_id_info.emplace_back(std::ranges::distance(ref_sequences[idx]),
"");
938 if constexpr (std::ranges::contiguous_range<reference_t<typename traits_type::ref_ids>> &&
939 std::ranges::sized_range<reference_t<typename traits_type::ref_ids>> &&
942 auto &&
id = header_ptr->ref_ids()[idx];
947 header_ptr->ref_dict[header_ptr->ref_ids()[idx]] = idx;
954 void read_next_record()
957 record_buffer.clear();
958 detail::get_or_ignore<field::header_ptr>(record_buffer) = header_ptr.get();
968 auto call_read_func = [
this] (
auto & ref_seq_info)
972 f.read_alignment_record(*secondary_stream,
976 detail::get_or_ignore<field::seq>(record_buffer),
977 detail::get_or_ignore<field::qual>(record_buffer),
978 detail::get_or_ignore<field::id>(record_buffer),
979 detail::get_or_ignore<field::offset>(record_buffer),
980 detail::get_or_ignore<field::ref_seq>(record_buffer),
981 detail::get_or_ignore<field::ref_id>(record_buffer),
982 detail::get_or_ignore<field::ref_offset>(record_buffer),
983 detail::get_or_ignore<field::alignment>(record_buffer),
984 detail::get_or_ignore<field::cigar>(record_buffer),
985 detail::get_or_ignore<field::flag>(record_buffer),
986 detail::get_or_ignore<field::mapq>(record_buffer),
987 detail::get_or_ignore<field::mate>(record_buffer),
988 detail::get_or_ignore<field::tags>(record_buffer),
989 detail::get_or_ignore<field::evalue>(record_buffer),
990 detail::get_or_ignore<field::bit_score>(record_buffer));
994 assert(!format.valueless_by_exception());
997 call_read_func(*reference_sequences_ptr);
999 call_read_func(std::ignore);
1010 template <input_stream stream_type,
1013 detail::fields_specialisation selected_field_ids>
1014 alignment_file_input(stream_type && stream,
1015 file_format
const &,
1016 selected_field_ids
const &)
1019 type_list<file_format>>;
1022 template <input_stream stream_type,
1024 detail::fields_specialisation selected_field_ids>
1025 alignment_file_input(stream_type & stream,
1026 file_format
const &,
1027 selected_field_ids
const &)
1030 type_list<file_format>>;
1033 template <input_stream stream_type,
1035 alignment_file_input(stream_type && stream,
1036 file_format
const &)
1039 type_list<file_format>>;
1042 template <input_stream stream_type,
1044 alignment_file_input(stream_type & stream,
1045 file_format
const &)
1048 type_list<file_format>>;
1051 template <std::ranges::forward_range ref_ids_t,
1052 std::ranges::forward_range ref_sequences_t,
1053 detail::fields_specialisation selected_field_ids>
1057 selected_field_ids
const &)
1058 -> alignment_file_input<alignment_file_input_default_traits<std::remove_reference_t<ref_sequences_t>,
1064 template <std::ranges::forward_range ref_ids_t,
1065 std::ranges::forward_range ref_sequences_t>
1069 -> alignment_file_input<alignment_file_input_default_traits<std::remove_reference_t<ref_sequences_t>,
1075 template <input_stream stream_type,
1076 std::ranges::forward_range ref_ids_t,
1077 std::ranges::forward_range ref_sequences_t,
1079 detail::fields_specialisation selected_field_ids>
1080 alignment_file_input(stream_type && stream,
1083 file_format
const &,
1084 selected_field_ids
const &)
1085 -> alignment_file_input<alignment_file_input_default_traits<std::remove_reference_t<ref_sequences_t>,
1088 type_list<file_format>>;
1091 template <input_stream stream_type,
1092 std::ranges::forward_range ref_ids_t,
1093 std::ranges::forward_range ref_sequences_t,
1095 detail::fields_specialisation selected_field_ids>
1096 alignment_file_input(stream_type & stream,
1099 file_format
const &,
1100 selected_field_ids
const &)
1101 -> alignment_file_input<alignment_file_input_default_traits<std::remove_reference_t<ref_sequences_t>,
1104 type_list<file_format>>;
1107 template <input_stream stream_type,
1108 std::ranges::forward_range ref_ids_t,
1109 std::ranges::forward_range ref_sequences_t,
1111 alignment_file_input(stream_type && stream,
1114 file_format
const &)
1115 -> alignment_file_input<alignment_file_input_default_traits<std::remove_reference_t<ref_sequences_t>,
1118 type_list<file_format>>;
1121 template <input_stream stream_type,
1122 std::ranges::forward_range ref_ids_t,
1123 std::ranges::forward_range ref_sequences_t,
1125 alignment_file_input(stream_type & stream,
1128 file_format
const &)
1129 -> alignment_file_input<alignment_file_input_default_traits<std::remove_reference_t<ref_sequences_t>,
1132 type_list<file_format>>;
1150 seqan3::detail::fields_specialisation selected_field_ids,
1151 seqan3::detail::type_list_of_alignment_file_input_formats valid_formats>
1152 struct tuple_size<
seqan3::alignment_file_input<traits_type, selected_field_ids, valid_formats>>
1155 static constexpr
size_t value = selected_field_ids::as_array.
size();
1163 template <
size_t elem_no,
1165 seqan3::detail::fields_specialisation selected_field_ids,
1166 seqan3::detail::type_list_of_alignment_file_input_formats valid_formats>
1167 struct tuple_element<elem_no,
seqan3::alignment_file_input<traits_type, selected_field_ids, valid_formats>>
1168 : tuple_element<elem_no, typename seqan3::alignment_file_input<traits_type,
1170 valid_formats>::file_as_tuple_type>
Provides helper data structures for the seqan3::alignment_file_output.
The "sequence", usually a range of nucleotides or amino acids.
Provides quality alphabet composites.
A more refined container concept than seqan3::container.
meta::list< types... > type_list
Type that contains multiple types, an alias for meta::list.
Definition: type_list.hpp:31
Provides seqan3::tuple_like.
Sequence (SEQ) relative start position (0-based), unsigned value.
Provides seqan3::gap_decorator.
Resolves to std::ranges::explicitly_convertible_to<type1, type2>().
The identifier, usually a string.
const auto move
A view that turns lvalue-references into rvalue-references.
Definition: move.hpp:68
The 15 letter DNA alphabet, containing all IUPAC smybols minus the gap.
Definition: dna15.hpp:48
sam_flag
An enum flag that describes the properties of an aligned read (given as a SAM record).
Definition: misc.hpp:70
Provides the seqan3::record template and the seqan3::field enum.
This header includes C++17 filesystem support and imports it into namespace seqan3::filesystem (indep...
The bit score (statistical significance indicator), unsigned value.
The (reference) "sequence" information, usually a range of nucleotides or amino acids.
Sequence (REF_SEQ) relative start position (0-based), unsigned value.
constexpr bool contains
Whether a type occurs in a pack or not.
Definition: traits.hpp:193
A class template that holds a choice of seqan3::field.
Definition: record.hpp:165
A combined alphabet that can hold values of either of its alternatives.
Definition: alphabet_variant.hpp:129
The concept std::same_as<T, U> is satisfied if and only if T and U denote the same type.
Meta-header for the nucleotide submodule; includes all headers from alphabet/nucleotide/.
Provides seqan3::views::slice.
Provides seqan3::views::repeat_n.
Provides seqan3::aa27, container aliases and string literals.
The cigar vector (std::vector<seqan3::cigar>) representing the alignment in SAM/BAM format.
The mapping quality of the SEQ alignment, usually a ohred-scaled score.
Provides exceptions used in the I/O module.
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:36
The optional tags in the SAM format, stored in a dictionary.
Quality type for traditional Sanger and modern Illumina Phred scores (typical range).
Definition: phred42.hpp:43
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:116
Exposes the reference of another type.
Definition: pre.hpp:70
Provides alphabet adaptations for standard char types.
Provides seqan3::concatenated_sequences.
Adaptations of concepts from the Ranges TS.
The SAM tag dictionary class that stores all optional SAM fields.
Definition: sam_tag_dictionary.hpp:324
The generic alphabet concept that covers most data types used in ranges.
SeqAn specific customisations in the standard namespace.
Provides the seqan3::cigar alphabet.
The identifier of the (reference) sequence that SEQ was aligned to.
field
An enumerator for the fields used in file formats.
Definition: record.hpp:64
The class template that file records are based on; behaves like an std::tuple.
Definition: record.hpp:225
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:141
The qualities, usually in phred-score notation.
constexpr auto repeat_n
A view factory that repeats a given value n times.
Definition: repeat_n.hpp:94
A gap decorator allows the annotation of sequences with gap symbols while leaving the underlying sequ...
Definition: gap_decorator.hpp:84
The five letter DNA alphabet of A,C,G,T and the unknown character N.
Definition: dna5.hpp:48
seqan3::type_list< trait_t< pack_t >... > transform
Apply a transformation trait to every type in the pack and return a seqan3::type_list of the results.
Definition: traits.hpp:307
Provides the seqan3::detail::in_file_iterator class template.
The alignment flag (bit information), uint16_t value.
Thrown if there is an unspecified filesystem or stream error while opening, e.g. permission problem.
Definition: exception.hpp:39
Provides traits for seqan3::type_list.
Refines seqan3::alphabet and adds assignability.
A concept that indicates whether a writable alphabet represents quality scores.
Specifies a range whose iterators may outlive the range and remain valid.
Provides seqan3::phred42 quality scores.
A pointer to the seqan3::alignment_file_header object storing header information.
The e-value (length normalized bit score), double value.
The (pairwise) alignment stored in an seqan3::alignment object.
The mate pair information given as a std::tuple of reference name, offset and template length.