SeqAn3 3.1.0
The Modern C++ library for sequence analysis.
output.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <cassert>
16#include <seqan3/std/filesystem>
17#include <fstream>
18#include <optional>
19#include <seqan3/std/ranges>
20#include <string>
21#include <type_traits>
22#include <variant>
23#include <vector>
24
27#include <seqan3/io/record.hpp>
30#include <seqan3/io/detail/record.hpp>
40
41namespace seqan3
42{
43
44// ----------------------------------------------------------------------------
45// structure_file_output
46// ----------------------------------------------------------------------------
47
60template <detail::fields_specialisation selected_field_ids_ = fields<field::seq, field::id, field::structure>,
61 detail::type_list_of_structure_file_output_formats valid_formats_ = type_list<format_vienna>>
63{
64public:
70 using selected_field_ids = selected_field_ids_;
72 using valid_formats = valid_formats_;
74 using stream_char_type = char;
76
88
89 static_assert([] () constexpr
90 {
91 for (field f : selected_field_ids::as_array)
92 if (!field_ids::contains(f))
93 return false;
94 return true;
95 }(),
96 "You selected a field that is not valid for structure files, please refer to the documentation "
97 "of structure_file_output::field_ids for the accepted values.");
98
99 static_assert([] () constexpr
100 {
104 }(), "You may not select field::structured_seq and either of field::seq and field::structure "
105 "at the same time.");
106
113 using value_type = void;
115 using reference = void;
117 using const_reference = void;
119 using size_type = void;
123 using iterator = detail::out_file_iterator<structure_file_output>;
125 using const_iterator = void;
127 using sentinel = std::default_sentinel_t;
129
145
162 selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
163 primary_stream{new std::ofstream{}, stream_deleter_default}
164 {
165 primary_stream->rdbuf()->pubsetbuf(stream_buffer.data(), stream_buffer.size());
166 static_cast<std::basic_ofstream<char> *>(primary_stream.get())->open(filename,
167 std::ios_base::out | std::ios::binary);
168
169 if (!primary_stream->good())
170 throw file_open_error{"Could not open file " + filename.string() + " for writing."};
171
172 // possibly add intermediate compression stream
173 secondary_stream = detail::make_secondary_ostream(*primary_stream, filename);
174
175 // initialise format handler or throw if format is not found
176 detail::set_format(format, filename);
177 }
178
195 template <output_stream stream_t, structure_file_output_format file_format>
197 requires std::same_as<typename std::remove_reference_t<stream_t>::char_type, char>
199 structure_file_output(stream_t & stream,
200 file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
201 selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
202 primary_stream{&stream, stream_deleter_noop},
203 secondary_stream{&stream, stream_deleter_noop},
204 format{detail::structure_file_output_format_exposer<file_format>{}}
205 {
206 static_assert(list_traits::contains<file_format, valid_formats>,
207 "You selected a format that is not in the valid_formats of this file.");
208 }
209
211 template <output_stream stream_t, structure_file_output_format file_format>
213 requires std::same_as<typename std::remove_reference_t<stream_t>::char_type, char>
215 structure_file_output(stream_t && stream,
216 file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
217 selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
218 primary_stream{new stream_t{std::move(stream)}, stream_deleter_default},
219 secondary_stream{&*primary_stream, stream_deleter_noop},
220 format{detail::structure_file_output_format_exposer<file_format>{}}
221 {
222 static_assert(list_traits::contains<file_format, valid_formats>,
223 "You selected a format that is not in the valid_formats of this file.");
224 }
226
248 iterator begin() noexcept
249 {
250 return {*this};
251 }
252
267 sentinel end() noexcept
268 {
269 return {};
270 }
271
290 template <typename record_t>
291 void push_back(record_t && r)
293 requires detail::record_like<record_t>
295 {
296 write_record(detail::get_or_ignore<field::seq>(r),
297 detail::get_or_ignore<field::id>(r),
298 detail::get_or_ignore<field::bpp>(r),
299 detail::get_or_ignore<field::structure>(r),
300 detail::get_or_ignore<field::structured_seq>(r),
301 detail::get_or_ignore<field::energy>(r),
302 detail::get_or_ignore<field::react>(r),
303 detail::get_or_ignore<field::react_err>(r),
304 detail::get_or_ignore<field::comment>(r),
305 detail::get_or_ignore<field::offset>(r));
306 }
307
329 template <typename tuple_t>
330 void push_back(tuple_t && t)
332 requires tuple_like<tuple_t> && (!detail::record_like<tuple_t>)
334 {
335 // index_of might return npos, but this will be handled well by get_or_ignore (and just return ignore)
336 write_record(detail::get_or_ignore<selected_field_ids::index_of(field::seq)>(t),
337 detail::get_or_ignore<selected_field_ids::index_of(field::id)>(t),
338 detail::get_or_ignore<selected_field_ids::index_of(field::bpp)>(t),
339 detail::get_or_ignore<selected_field_ids::index_of(field::structure)>(t),
340 detail::get_or_ignore<selected_field_ids::index_of(field::structured_seq)>(t),
341 detail::get_or_ignore<selected_field_ids::index_of(field::energy)>(t),
342 detail::get_or_ignore<selected_field_ids::index_of(field::react)>(t),
343 detail::get_or_ignore<selected_field_ids::index_of(field::react_err)>(t),
344 detail::get_or_ignore<selected_field_ids::index_of(field::comment)>(t),
345 detail::get_or_ignore<selected_field_ids::index_of(field::offset)>(t));
346 }
347
371 template <typename arg_t, typename ...arg_types>
372 void emplace_back(arg_t && arg, arg_types && ... args)
373 {
374 push_back(std::tie(arg, args...));
375 }
376
398 template <std::ranges::input_range rng_t>
403 {
404 for (auto && record : range)
405 push_back(std::forward<decltype(record)>(record));
406 return *this;
407 }
408
436 template <std::ranges::input_range rng_t>
441 {
442 f = range;
443 return f;
444 }
445
447 template <std::ranges::input_range rng_t>
452 {
453 f = range;
454 return std::move(f);
455 }
457
460
465 {
466 return *secondary_stream;
467 }
469protected:
472 std::vector<char> stream_buffer{std::vector<char>(1'000'000)};
473
481 static void stream_deleter_noop(std::basic_ostream<stream_char_type> *) {}
483 static void stream_deleter_default(std::basic_ostream<stream_char_type> * ptr) { delete ptr; }
484
486 stream_ptr_t primary_stream{nullptr, stream_deleter_noop};
488 stream_ptr_t secondary_stream{nullptr, stream_deleter_noop};
489
491 using format_type = typename detail::variant_from_tags<valid_formats,
492 detail::structure_file_output_format_exposer>::type;
494 format_type format;
496
498 template <typename seq_type,
499 typename id_type,
500 typename bpp_type,
501 typename structure_type,
502 typename structured_seq_type,
503 typename energy_type,
504 typename react_type,
505 typename comment_type,
506 typename offset_type>
507 void write_record(seq_type && seq,
508 id_type && id,
509 bpp_type && bpp,
510 structure_type && structure,
511 structured_seq_type && structured_seq,
512 energy_type && energy,
513 react_type && react,
514 react_type && react_error,
515 comment_type && comment,
516 offset_type && offset)
517 {
518 static_assert(detail::decays_to_ignore_v<structured_seq_type> ||
519 (detail::decays_to_ignore_v<seq_type> && detail::decays_to_ignore_v<structure_type>),
520 "You may not select field::structured_seq and either of field::seq and field::structure "
521 "at the same time.");
522
523 assert(!format.valueless_by_exception());
524 std::visit([&] (auto & f)
525 {
526 if constexpr (!detail::decays_to_ignore_v<structured_seq_type>)
527 {
528 f.write_structure_record(*secondary_stream,
529 options,
530 structured_seq | views::elements<0>,
531 id,
532 bpp,
533 structured_seq | views::elements<1>,
534 energy,
535 react,
536 react_error,
537 comment,
538 offset);
539 }
540 else
541 {
542 f.write_structure_record(*secondary_stream,
543 options,
544 seq,
545 id,
546 bpp,
547 structure,
548 energy,
549 react,
550 react_error,
551 comment,
552 offset);
553 }
554 }, format);
555 }
556
558 friend iterator;
559};
560
567template <output_stream stream_t,
569 detail::fields_specialisation selected_field_ids>
570structure_file_output(stream_t &&, file_format const &, selected_field_ids const &)
573
575template <output_stream stream_t,
577 detail::fields_specialisation selected_field_ids>
578structure_file_output(stream_t &, file_format const &, selected_field_ids const &)
582
583} // namespace seqan3
A class for writing structured sequence files, e.g. Stockholm, Connect, Vienna, ViennaRNA bpp matrix ...
Definition: output.hpp:63
std::default_sentinel_t sentinel
The type returned by end().
Definition: output.hpp:127
void push_back(tuple_t &&t)
Write a record in form of a std::tuple to the file.
Definition: output.hpp:330
structure_file_output(stream_t &stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: output.hpp:199
void const_reference
The const reference type (void).
Definition: output.hpp:117
detail::out_file_iterator< structure_file_output > iterator
The iterator type of this view (an output iterator).
Definition: output.hpp:123
char stream_char_type
Character type of the stream(s).
Definition: output.hpp:74
void push_back(record_t &&r)
Write a seqan3::record to the file.
Definition: output.hpp:291
structure_file_output(stream_t &&stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: output.hpp:215
structure_file_output & operator=(structure_file_output &&)=default
Move assignment is defaulted.
structure_file_output_options options
The options are public and its members can be set directly.
Definition: output.hpp:459
selected_field_ids_ selected_field_ids
A seqan3::fields list with the fields selected for the record.
Definition: output.hpp:70
structure_file_output()=delete
Default constructor is explicitly deleted, you need to give a stream or file name.
structure_file_output(std::filesystem::path filename, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename.
Definition: output.hpp:161
friend structure_file_output operator|(rng_t &&range, structure_file_output &&f)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: output.hpp:448
void emplace_back(arg_t &&arg, arg_types &&... args)
Write a record to the file by passing individual fields.
Definition: output.hpp:372
friend structure_file_output & operator|(rng_t &&range, structure_file_output &f)
Write a range of records (or tuples) to the file.
Definition: output.hpp:437
void size_type
The size type (void).
Definition: output.hpp:119
structure_file_output(stream_t &&, file_format const &, selected_field_ids const &) -> structure_file_output< selected_field_ids, type_list< file_format > >
Deduction of the selected fields, the file format and the stream type.
void reference
The reference type (void).
Definition: output.hpp:115
iterator begin() noexcept
Returns an iterator to current position in the file.
Definition: output.hpp:248
sentinel end() noexcept
Returns a sentinel for comparison with iterator.
Definition: output.hpp:267
structure_file_output(stream_t &, file_format const &, selected_field_ids const &) -> structure_file_output< selected_field_ids, type_list< file_format > >
This is an overloaded member function, provided for convenience. It differs from the above function o...
structure_file_output & operator=(structure_file_output const &)=delete
Copy assignment is explicitly deleted, because you can't have multiple access to the same file.
structure_file_output(structure_file_output &&)=default
Move construction is defaulted.
void value_type
The value type (void).
Definition: output.hpp:113
valid_formats_ valid_formats
A seqan3::type_list with the possible formats.
Definition: output.hpp:72
structure_file_output & operator=(rng_t &&range)
Write a range of records (or tuples) to the file.
Definition: output.hpp:399
void const_iterator
The const iterator type is void, because files are not const-iterable.
Definition: output.hpp:125
~structure_file_output()=default
Destructor is defaulted.
structure_file_output(structure_file_output const &)=delete
Copy construction is explicitly deleted, because you can't have multiple access to the same file.
Provides seqan3::views::elements.
The <filesystem> header from C++17's standard library.
Provides the seqan3::format_vienna.
T format(T... args)
T forward(T... args)
T get(T... args)
field
An enumerator for the fields used in file formats.
Definition: record.hpp:63
@ energy
Energy of a folded sequence, represented by one float number.
@ comment
Comment field of arbitrary content, usually a string.
@ structure
Fixed interactions, usually a string of structure alphabet characters.
@ bpp
Base pair probability matrix of interactions, usually a matrix of float numbers.
@ react
Reactivity values of the sequence characters given in a vector of float numbers.
@ react_err
Reactivity error values given in a vector corresponding to seqan3::field::react.
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
@ structured_seq
Sequence and fixed interactions combined in one range.
@ id
The identifier, usually a string.
@ seq
The "sequence", usually a range of nucleotides or amino acids.
constexpr bool contains
Whether a type occurs in a type list or not.
Definition: traits.hpp:231
The generic concept for structure file out formats.
Whether a type behaves like a tuple.
Provides exceptions used in the I/O module.
Stream concepts.
Provides various utility functions required only for output.
The main SeqAn3 namespace.
Definition: cigar_operation_table.hpp:2
Provides the seqan3::detail::out_file_iterator class template.
The <ranges> header from C++20's standard library.
Provides the seqan3::record template and the seqan3::field enum.
Provides seqan3::detail::record_like.
A class template that holds a choice of seqan3::field.
Definition: record.hpp:128
The class template that file records are based on; behaves like a std::tuple.
Definition: record.hpp:191
The options type defines various option members that influence the behaviour of all or some formats.
Definition: output_options.hpp:26
Type that contains multiple types.
Definition: type_list.hpp:29
Provides seqan3::structure_file_output_format and auxiliary classes.
Provides seqan3::structure_file_output_options.
T tie(T... args)
Provides traits for seqan3::type_list.
Provides seqan3::tuple_like.
Provides seqan3::views::convert.
T visit(T... args)
Provides seqan3::views::zip.