21 #include <range/v3/algorithm/equal.hpp> 35 #include <seqan3/io/detail/record.hpp> 111 template <
typename t>
114 SEQAN3_CONCEPT SequenceFileInputTraits = requires (t v)
116 requires WritableAlphabet<typename t::sequence_alphabet>;
117 requires WritableAlphabet<typename t::sequence_legal_alphabet>;
118 requires ExplicitlyConvertibleTo<typename t::sequence_legal_alphabet, typename t::sequence_alphabet>;
119 requires SequenceContainer<typename t::template sequence_container<typename t::sequence_alphabet>>;
120 requires SequenceContainer<
typename t::template sequence_container_container<
121 typename t::template sequence_container<typename t::sequence_alphabet>>>;
123 requires WritableAlphabet<typename t::id_alphabet>;
124 requires SequenceContainer<typename t::template id_container<typename t::id_alphabet>>;
125 requires SequenceContainer<
typename t::template id_container_container<
typename t::template id_container<
126 typename t::id_alphabet>>>;
128 requires WritableQualityAlphabet<typename t::quality_alphabet>;
129 requires SequenceContainer<typename t::template quality_container<typename t::quality_alphabet>>;
130 requires SequenceContainer<
typename t::template quality_container_container<
131 typename t::template quality_container<typename t::quality_alphabet>>>;
166 template <
typename _sequence_alphabet>
170 template <
typename _sequence_container>
177 template <
typename _
id_alphabet>
181 template <
typename _
id_container>
188 template <
typename _quality_alphabet>
192 template <
typename _quality_container>
363 Char stream_char_type_ =
char>
386 static_assert([] () constexpr
388 for (
field f : selected_field_ids::as_array)
389 if (!field_ids::contains(f))
393 "You selected a field that is not valid for sequence files, please refer to the documentation " 394 "of sequence_file_input::field_ids for the accepted values.");
396 static_assert([] () constexpr
402 "You may not select field::SEQ_QUAL and either of field::SEQ and field::QUAL at the same time.");
409 using sequence_type =
typename traits_type::template sequence_container<
411 typename traits_type::sequence_alphabet>;
413 using id_type =
typename traits_type::template id_container<
414 typename traits_type::id_alphabet>;
416 using quality_type =
typename traits_type::template quality_container<
417 typename traits_type::quality_alphabet>;
420 template sequence_container<
qualified<
typename traits_type::sequence_alphabet,
421 typename traits_type::quality_alphabet>>;
436 using sequence_column_type =
typename traits_type::template sequence_container_container<sequence_type>;
439 using id_column_type =
typename traits_type::template id_container_container<id_type>;
471 using iterator = detail::in_file_iterator<sequence_file_input>;
512 primary_stream{
new std::ifstream{filename, std::ios_base::in | std::ios::binary}, stream_deleter_default}
514 if (!primary_stream->good())
515 throw file_open_error{
"Could not open file " + filename.
string() +
" for reading."};
518 secondary_stream = detail::make_secondary_istream(*primary_stream, filename);
521 detail::set_format(format, filename);
547 template <IStream2 stream_t,
548 SequenceFileInputFormat file_format>
550 file_format
const & SEQAN3_DOXYGEN_ONLY(format_tag),
552 primary_stream{&stream, stream_deleter_noop},
553 format{detail::sequence_file_input_format<file_format>{}}
555 static_assert(meta::in<valid_formats, file_format>::value,
556 "You selected a format that is not in the valid_formats of this file.");
559 secondary_stream = detail::make_secondary_istream(*primary_stream);
566 template <IStream2 stream_t,
567 SequenceFileInputFormat file_format>
569 file_format
const & SEQAN3_DOXYGEN_ONLY(format_tag),
571 primary_stream{
new stream_t{std::move(stream)}, stream_deleter_default},
572 format{detail::sequence_file_input_format<file_format>{}}
574 static_assert(meta::in<valid_formats, file_format>::value,
575 "You selected a format that is not in the valid_formats of this file.");
578 secondary_stream = detail::make_secondary_istream(*primary_stream);
650 return record_buffer;
662 static_assert(sequence_file_input::selected_field_ids::contains(f),
663 "You requested a field via get that was not selected for the file.");
667 return seqan3::get<f>(file.columns_buffer);
674 return std::move(get<f>(file));
681 static_assert(i < sequence_file_input::selected_field_ids::as_array.
size(),
682 "You requested a field number larger than the number of selected fields for the file.");
685 return std::get<i>(file.columns_buffer);
692 return std::move(get<i>(file));
696 template <
typename t>
701 return std::get<t>(file.columns_buffer);
705 template <
typename t>
708 return std::move(get<t>(file));
739 stream_ptr_t primary_stream{
nullptr, stream_deleter_noop};
741 stream_ptr_t secondary_stream{
nullptr, stream_deleter_noop};
747 using format_type =
typename detail::variant_from_tags<valid_formats, detail::sequence_file_input_format>::type;
753 void read_next_record()
756 record_buffer.clear();
766 assert(!format.valueless_by_exception());
772 f.read(*secondary_stream,
774 detail::get_or_ignore<field::SEQ_QUAL>(record_buffer),
775 detail::get_or_ignore<field::ID>(record_buffer),
776 detail::get_or_ignore<field::SEQ_QUAL>(record_buffer));
780 f.read(*secondary_stream,
782 detail::get_or_ignore<field::SEQ>(record_buffer),
783 detail::get_or_ignore<field::ID>(record_buffer),
784 detail::get_or_ignore<field::QUAL>(record_buffer));
794 auto & sequence_column_buffer = detail::get_or_ignore<field::SEQ>(columns_buffer);
795 auto & id_column_buffer = detail::get_or_ignore<field::ID>(columns_buffer);
796 auto & qual_column_buffer = detail::get_or_ignore<field::QUAL>(columns_buffer);
797 auto & seq_qual_column_buffer = detail::get_or_ignore<field::SEQ_QUAL>(columns_buffer);
800 for (
auto & rec : *
this)
802 if constexpr (selected_field_ids::contains(
field::SEQ))
803 sequence_column_buffer.push_back(std::move(seqan3::get<field::SEQ>(rec)));
804 if constexpr (selected_field_ids::contains(
field::ID))
805 id_column_buffer.push_back(std::move(seqan3::get<field::ID>(rec)));
806 if constexpr (selected_field_ids::contains(
field::QUAL))
807 qual_column_buffer.push_back(std::move(seqan3::get<field::QUAL>(rec)));
809 seq_qual_column_buffer.push_back(std::move(seqan3::get<field::SEQ_QUAL>(rec)));
822 template <IStream2 stream_type,
824 SequenceFileInputFormat file_format,
825 detail::Fields selected_field_ids>
826 sequence_file_input(stream_type && stream,
828 selected_field_ids
const &)
831 type_list<file_format>,
835 template <IStream2 stream_type,
836 SequenceFileInputFormat file_format,
837 detail::Fields selected_field_ids>
838 sequence_file_input(stream_type & stream,
840 selected_field_ids
const &)
843 type_list<file_format>,
861 seqan3::detail::Fields selected_field_ids,
862 seqan3::detail::TypeListOfSequenceFileInputFormats valid_formats,
864 struct tuple_size<
seqan3::sequence_file_input<traits_type, selected_field_ids, valid_formats, stream_char_t>>
867 static constexpr
size_t value = selected_field_ids::as_array.
size();
875 template <
size_t elem_no,
877 seqan3::detail::Fields selected_field_ids,
878 seqan3::detail::TypeListOfSequenceFileInputFormats valid_formats,
880 struct tuple_element<elem_no,
seqan3::sequence_file_input<traits_type, selected_field_ids, valid_formats, stream_char_t>>
881 : tuple_element<elem_no, typename seqan3::sequence_file_input<traits_type,
884 stream_char_t>::file_as_tuple_type>
Provides quality alphabet composites.
The "sequence", usually a range of nucleotides or amino acids.
Provides exceptions used in the I/O module.
This concept encompasses exactly the types char, signed char, unsigned char, wchar_t, char16_t and char32_t.
Provides seqan3::aa27, container aliases and string literals.
SeqAn specific customisations in the standard namespace.
The 15 letter DNA alphabet, containing all IUPAC smybols minus the gap.
Definition: dna15.hpp:48
::ranges::size size
Alias for ranges::size. Obtains the size of a range whose size can be calculated in constant time...
Definition: ranges:189
The main SeqAn3 namespace.
The qualities, usually in phred-score notation.
Joins an arbitrary alphabet with a quality alphabet.
Definition: qualified.hpp:59
Provides seqan3::concatenated_sequences.
The twenty-seven letter amino acid alphabet.
Definition: aa27.hpp:43
A class template that holds a choice of seqan3::field.
Definition: record.hpp:127
Container that stores sequences concatenated internally.
Definition: concatenated_sequences.hpp:89
The five letter DNA alphabet of A,C,G,T and the unknown character N.
Definition: dna5.hpp:48
Sequence and qualities combined in one range.
Provides alphabet adaptations for standard char types.
Provides the seqan3::record template and the seqan3::field enum.
The identifier, usually a string.
Provides seqan3::phred42 quality scores.
Provides various type traits on generic types.
::ranges::default_sentinel_t default_sentinel_t
Alias for ranges::default_sentinel_t. Type of ranges::default_sentinel.
Definition: iterator:351
Meta-header for the nucleotide submodule; includes all headers from alphabet/nucleotide/.
meta::list< types... > type_list
Type that contains multiple types, an alias for meta::list.
Definition: type_list.hpp:27
field
An enumerator for the fields used in file formats.Some of the fields are shared between formats...
Definition: record.hpp:63
Quality type for traditional Sanger and modern Illumina Phred scores (typical range).
Definition: phred42.hpp:43
Provides the seqan3::detail::in_file_iterator class template.
This header includes C++17 filesystem support and imports it into namespace seqan3::filesystem (indep...