SeqAn3 3.3.0
The Modern C++ library for sequence analysis.
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
io/sequence_file/output.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <cassert>
16#include <filesystem>
17#include <fstream>
18#include <ranges>
19#include <string>
20#include <variant>
21#include <vector>
22
26#include <seqan3/io/detail/record.hpp>
29#include <seqan3/io/record.hpp>
43
44namespace seqan3
45{
46
47// ----------------------------------------------------------------------------
48// sequence_file_output
49// ----------------------------------------------------------------------------
50
65template <detail::fields_specialisation selected_field_ids_ = fields<field::seq, field::id, field::qual>,
66 detail::type_list_of_sequence_file_output_formats valid_formats_ =
67 type_list<format_embl, format_fasta, format_fastq, format_genbank, format_sam>>
69{
70public:
76 using selected_field_ids = selected_field_ids_;
78 using valid_formats = valid_formats_;
80 using stream_char_type = char;
82
85
86 static_assert(
87 []() constexpr
88 {
89 for (field f : selected_field_ids::as_array)
90 if (!field_ids::contains(f))
91 return false;
92 return true;
93 }(),
94 "You selected a field that is not valid for sequence files, please refer to the documentation "
95 "of sequence_file_output::field_ids for the accepted values.");
96
103 using value_type = void;
105 using reference = void;
107 using const_reference = void;
109 using size_type = void;
113 using iterator = detail::out_file_iterator<sequence_file_output>;
115 using const_iterator = void;
117 using sentinel = std::default_sentinel_t;
119
135
152 selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
153 primary_stream{new std::ofstream{}, stream_deleter_default}
154 {
155 primary_stream->rdbuf()->pubsetbuf(stream_buffer.data(), stream_buffer.size());
156 static_cast<std::basic_ofstream<char> *>(primary_stream.get())
157 ->open(filename, std::ios_base::out | std::ios::binary);
158
159 if (!primary_stream->good())
160 throw file_open_error{"Could not open file " + filename.string() + " for writing."};
161
162 // possibly add intermediate compression stream
163 secondary_stream = detail::make_secondary_ostream(*primary_stream, filename);
164
165 // initialise format handler or throw if format is not found
166 detail::set_format(format, filename);
167 }
168
184 template <output_stream stream_t, sequence_file_output_format file_format>
185 requires std::same_as<typename std::remove_reference_t<stream_t>::char_type, stream_char_type>
186 sequence_file_output(stream_t & stream,
187 file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
188 selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
189 primary_stream{&stream, stream_deleter_noop},
190 secondary_stream{&stream, stream_deleter_noop},
191 format{detail::sequence_file_output_format_exposer<file_format>{}}
192 {
193 static_assert(list_traits::contains<file_format, valid_formats>,
194 "You selected a format that is not in the valid_formats of this file.");
195 }
196
198 template <output_stream stream_t, sequence_file_output_format file_format>
199 requires std::same_as<typename std::remove_reference_t<stream_t>::char_type, stream_char_type>
200 sequence_file_output(stream_t && stream,
201 file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
202 selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
203 primary_stream{new stream_t{std::move(stream)}, stream_deleter_default},
204 secondary_stream{&*primary_stream, stream_deleter_noop},
205 format{detail::sequence_file_output_format_exposer<file_format>{}}
206 {
207 static_assert(list_traits::contains<file_format, valid_formats>,
208 "You selected a format that is not in the valid_formats of this file.");
209 }
211
233 iterator begin() noexcept
234 {
235 return {*this};
236 }
237
252 sentinel end() noexcept
253 {
254 return {};
255 }
256
275 template <typename record_t>
276 void push_back(record_t && r)
277 requires detail::record_like<record_t>
278 {
279 write_record(detail::get_or_ignore<field::seq>(r),
280 detail::get_or_ignore<field::id>(r),
281 detail::get_or_ignore<field::qual>(r));
282 }
283
305 template <typename tuple_t>
306 void push_back(tuple_t && t)
307 requires tuple_like<tuple_t> && (!detail::record_like<tuple_t>)
308 {
309 // index_of might return npos, but this will be handled well by get_or_ignore (and just return ignore)
310 write_record(detail::get_or_ignore<selected_field_ids::index_of(field::seq)>(t),
311 detail::get_or_ignore<selected_field_ids::index_of(field::id)>(t),
312 detail::get_or_ignore<selected_field_ids::index_of(field::qual)>(t));
313 }
314
338 template <typename arg_t, typename... arg_types>
339 void emplace_back(arg_t && arg, arg_types &&... args)
340 {
341 push_back(std::tie(arg, args...));
342 }
343
365 template <std::ranges::input_range rng_t>
368 {
369 for (auto && record : range)
370 push_back(std::forward<decltype(record)>(record));
371 return *this;
372 }
373
401 template <std::ranges::input_range rng_t>
404 {
405 f = range;
406 return f;
407 }
408
410 template <std::ranges::input_range rng_t>
413 {
414 f = range;
415 return std::move(f);
416 }
418
421
426 {
427 return *secondary_stream;
428 }
430
431protected:
434 std::vector<char> stream_buffer{std::vector<char>(1'000'000)};
435
443 static void stream_deleter_noop(std::basic_ostream<stream_char_type> *)
444 {}
446 static void stream_deleter_default(std::basic_ostream<stream_char_type> * ptr)
447 {
448 delete ptr;
449 }
450
452 stream_ptr_t primary_stream{nullptr, stream_deleter_noop};
454 stream_ptr_t secondary_stream{nullptr, stream_deleter_noop};
455
457 using format_type =
458 typename detail::variant_from_tags<valid_formats, detail::sequence_file_output_format_exposer>::type;
460 format_type format;
462
464 template <typename seq_t, typename id_t, typename qual_t>
465 void write_record(seq_t && seq, id_t && id, qual_t && qual)
466 {
467 assert(!format.valueless_by_exception());
469 [&](auto & f)
470 {
471 {
472 f.write_sequence_record(*secondary_stream, options, seq, id, qual);
473 }
474 },
475 format);
476 }
477
479 friend iterator;
480};
481
488template <output_stream stream_t, sequence_file_output_format file_format>
490 file_format const &)
493
495template <output_stream stream_t, sequence_file_output_format file_format>
497 file_format const &)
500
502template <output_stream stream_t,
503 sequence_file_output_format file_format,
504 detail::fields_specialisation selected_field_ids>
505sequence_file_output(stream_t &&, file_format const &, selected_field_ids const &)
507
509template <output_stream stream_t,
510 sequence_file_output_format file_format,
511 detail::fields_specialisation selected_field_ids>
512sequence_file_output(stream_t &, file_format const &, selected_field_ids const &)
515} // namespace seqan3
The generic concept for sequence file out formats.
Definition: sequence_file/output_format_concept.hpp:97
A class for writing sequence files, e.g. FASTA, FASTQ ...
Definition: io/sequence_file/output.hpp:69
sequence_file_output & operator=(sequence_file_output const &)=delete
Copy assignment is explicitly deleted, because you can't have multiple access to the same file.
sequence_file_output(stream_t &, file_format const &, selected_field_ids const &) -> sequence_file_output< selected_field_ids, type_list< file_format > >
This is an overloaded member function, provided for convenience. It differs from the above function o...
sequence_file_output(std::filesystem::path filename, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename.
Definition: io/sequence_file/output.hpp:151
sequence_file_output(stream_t &&, file_format const &) -> sequence_file_output< typename sequence_file_output<>::selected_field_ids, type_list< file_format > >
This is an overloaded member function, provided for convenience. It differs from the above function o...
void const_iterator
The const iterator type is void, because files are not const-iterable.
Definition: io/sequence_file/output.hpp:115
char stream_char_type
Character type of the stream(s).
Definition: io/sequence_file/output.hpp:80
std::default_sentinel_t sentinel
The type returned by end().
Definition: io/sequence_file/output.hpp:117
sequence_file_output(stream_t &&, file_format const &, selected_field_ids const &) -> sequence_file_output< selected_field_ids, type_list< file_format > >
Deduction guide for given stream, file format and field ids.
void push_back(tuple_t &&t)
Write a record in form of a std::tuple to the file.
Definition: io/sequence_file/output.hpp:306
sequence_file_output(stream_t &stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: io/sequence_file/output.hpp:186
void push_back(record_t &&r)
Write a seqan3::record to the file.
Definition: io/sequence_file/output.hpp:276
sequence_file_output(sequence_file_output &&)=default
Move construction is defaulted.
sequence_file_output & operator=(sequence_file_output &&)=default
Move assignment is defaulted.
sequence_file_output(sequence_file_output const &)=delete
Copy construction is explicitly deleted, because you can't have multiple access to the same file.
sequence_file_output(stream_t &, file_format const &) -> sequence_file_output< typename sequence_file_output<>::selected_field_ids, type_list< file_format > >
Deduction guide for given stream and file format.
valid_formats_ valid_formats
A seqan3::type_list with the possible formats.
Definition: io/sequence_file/output.hpp:78
selected_field_ids_ selected_field_ids
A seqan3::fields list with the fields selected for the record.
Definition: io/sequence_file/output.hpp:76
sequence_file_output(stream_t &&stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: io/sequence_file/output.hpp:200
friend sequence_file_output operator|(rng_t &&range, sequence_file_output &&f)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: io/sequence_file/output.hpp:411
~sequence_file_output()=default
Destructor is defaulted.
sequence_file_output_options options
The options are public and its members can be set directly.
Definition: io/sequence_file/output.hpp:420
void emplace_back(arg_t &&arg, arg_types &&... args)
Write a record to the file by passing individual fields.
Definition: io/sequence_file/output.hpp:339
sequence_file_output & operator=(rng_t &&range)
Write a range of records (or tuples) to the file.
Definition: io/sequence_file/output.hpp:366
void value_type
The value type (void).
Definition: io/sequence_file/output.hpp:103
sentinel end() noexcept
Returns a sentinel for comparison with iterator.
Definition: io/sequence_file/output.hpp:252
void reference
The reference type (void).
Definition: io/sequence_file/output.hpp:105
friend sequence_file_output & operator|(rng_t &&range, sequence_file_output &f)
Write a range of records (or tuples) to the file.
Definition: io/sequence_file/output.hpp:402
void const_reference
The const reference type (void).
Definition: io/sequence_file/output.hpp:107
sequence_file_output()=delete
Default constructor is explicitly deleted, you need to give a stream or file name.
detail::out_file_iterator< sequence_file_output > iterator
The iterator type of this view (an output iterator).
Definition: io/sequence_file/output.hpp:113
iterator begin() noexcept
Returns an iterator to current position in the file.
Definition: io/sequence_file/output.hpp:233
void size_type
The size type (void).
Definition: io/sequence_file/output.hpp:109
T data(T... args)
Provides seqan3::views::elements.
Provides the seqan3::sequence_file_format_genbank class.
Provides the seqan3::format_sam.
T format(T... args)
T forward(T... args)
T get(T... args)
field
An enumerator for the fields used in file formats.
Definition: record.hpp:63
@ id
The identifier, usually a string.
@ seq
The "sequence", usually a range of nucleotides or amino acids.
@ qual
The qualities, usually in Phred score notation.
Whether a type behaves like a tuple.
Provides various utility functions.
Provides exceptions used in the I/O module.
Stream concepts.
Provides various utility functions required only for output.
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
Provides the seqan3::detail::out_file_iterator class template.
Provides the seqan3::record template and the seqan3::field enum.
Provides seqan3::detail::record_like.
Provides seqan3::sequence_file_output_format and auxiliary classes.
Provides seqan3::sequence_file_output_options.
T size(T... args)
A class template that holds a choice of seqan3::field.
Definition: record.hpp:128
The class template that file records are based on; behaves like a std::tuple.
Definition: record.hpp:193
The options type defines various option members that influence the behaviour of all or some formats.
Definition: sequence_file/output_options.hpp:26
Type that contains multiple types.
Definition: type_list.hpp:29
T tie(T... args)
Provides traits for seqan3::type_list.
Provides seqan3::tuple_like.
Provides seqan3::views::convert.
T visit(T... args)
Provides seqan3::views::zip.