SeqAn3 3.4.0-rc.1
The Modern C++ library for sequence analysis.
Loading...
Searching...
No Matches
seqan3::sam_file_output< selected_field_ids_, valid_formats_, ref_ids_type > Class Template Reference

A class for writing SAM files, both SAM and its binary representation BAM are supported. More...

#include <seqan3/io/sam_file/output.hpp>

Public Types

using field_ids = fields< field::seq, field::id, field::ref_id, field::ref_offset, field::cigar, field::mapq, field::flag, field::qual, field::mate, field::tags, field::header_ptr >
 The subset of seqan3::field IDs that are valid for this file.
 
Template arguments

Exposed as member types for public access.

using selected_field_ids = selected_field_ids_
 A seqan3::fields list with the fields selected for the record.
 
using valid_formats = valid_formats_
 A seqan3::type_list with the possible formats.
 
using stream_char_type = char
 Character type of the stream(s).
 
Range associated types

Most of the range associated types are void for output ranges.

using value_type = void
 The value type (void).
 
using reference = void
 The reference type (void).
 
using const_reference = void
 The const reference type (void).
 
using size_type = void
 The size type (void).
 
using difference_type = std::ptrdiff_t
 A signed integer type, usually std::ptrdiff_t.
 
using iterator = detail::out_file_iterator< sam_file_output >
 The iterator type of this view (an output iterator).
 
using const_iterator = void
 The const iterator type is void, because files are not const-iterable.
 
using sentinel = std::default_sentinel_t
 The type returned by end().
 

Public Member Functions

auto & header ()
 Access the file's header.
 
Constructors, destructor and assignment
 sam_file_output ()=delete
 Default constructor is explicitly deleted, you need to give a stream or file name.
 
 sam_file_output (sam_file_output const &)=delete
 Copy construction is explicitly deleted, because you can't have multiple access to the same file.
 
sam_file_outputoperator= (sam_file_output const &)=delete
 Copy assignment is explicitly deleted, because you can't have multiple access to the same file.
 
 sam_file_output (sam_file_output &&)=default
 Move construction is defaulted.
 
sam_file_outputoperator= (sam_file_output &&)=default
 Move assignment is defaulted.
 
 ~sam_file_output ()
 The destructor will write the header if it has not been written before.
 
 sam_file_output (std::filesystem::path filename, selected_field_ids const &fields_tag=selected_field_ids{})
 Construct from filename.
 
template<output_stream stream_type, sam_file_output_format file_format>
requires std::same_as<typename std::remove_reference_t<stream_type>::char_type, stream_char_type>
 sam_file_output (stream_type &stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
 Construct from an existing stream and with specified format.
 
template<output_stream stream_type, sam_file_output_format file_format>
requires std::same_as<typename std::remove_reference_t<stream_type>::char_type, stream_char_type>
 sam_file_output (stream_type &&stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename ref_ids_type_ , std::ranges::forward_range ref_lengths_type>
requires std::same_as<std::remove_reference_t<ref_ids_type_>, ref_ids_type>
 sam_file_output (std::filesystem::path const &filename, ref_ids_type_ &&ref_ids, ref_lengths_type &&ref_lengths, selected_field_ids const &fields_tag=selected_field_ids{})
 Construct from filename.
 
template<output_stream stream_type, sam_file_output_format file_format, typename ref_ids_type_ , std::ranges::forward_range ref_lengths_type>
requires std::same_as<std::remove_reference_t<ref_ids_type_>, ref_ids_type>
 sam_file_output (stream_type &&stream, ref_ids_type_ &&ref_ids, ref_lengths_type &&ref_lengths, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
 Construct from an existing stream and with specified format.
 

Public Attributes

sam_file_output_options options
 The options are public and its members can be set directly.
 

Related Symbols

(Note that these are not member symbols.)

Type deduction guides
template<detail::fields_specialisation selected_field_ids>
 sam_file_output (std::filesystem::path, selected_field_ids const &) -> sam_file_output< selected_field_ids, typename sam_file_output<>::valid_formats, ref_info_not_given >
 Deduces selected_field_ids from input and sets sam_file_output::ref_ids_type to seqan3::detail::ref_info_not_given. valid_formats is set to the default.
 
template<output_stream stream_type, sam_file_output_format file_format, detail::fields_specialisation selected_field_ids>
 sam_file_output (stream_type &&, file_format const &, selected_field_ids const &) -> sam_file_output< selected_field_ids, type_list< file_format >, ref_info_not_given >
 Deduces selected_field_ids, and the valid format from input and sets sam_file_output::ref_ids_type to seqan3::detail::ref_info_not_given.
 
template<output_stream stream_type, sam_file_output_format file_format, detail::fields_specialisation selected_field_ids>
 sam_file_output (stream_type &, file_format const &, selected_field_ids const &) -> sam_file_output< selected_field_ids, type_list< file_format >, ref_info_not_given >
 Deduces selected_field_ids, and the valid format from input and sets sam_file_output::ref_ids_type to seqan3::detail::ref_info_not_given.
 
template<output_stream stream_type, sam_file_output_format file_format>
 sam_file_output (stream_type &&, file_format const &) -> sam_file_output< typename sam_file_output<>::selected_field_ids, type_list< file_format >, ref_info_not_given >
 Deduces the valid format from input and sets sam_file_output::ref_ids_type to seqan3::detail::ref_info_not_given. selected_field_ids is set to the default.
 
template<output_stream stream_type, sam_file_output_format file_format>
 sam_file_output (stream_type &, file_format const &) -> sam_file_output< typename sam_file_output<>::selected_field_ids, type_list< file_format >, ref_info_not_given >
 Deduces the valid format from input and sets sam_file_output::ref_ids_type to seqan3::detail::ref_info_not_given. selected_field_ids is set to the default.
 
template<detail::fields_specialisation selected_field_ids, std::ranges::forward_range ref_ids_type, std::ranges::forward_range ref_lengths_type>
 sam_file_output (std::filesystem::path const &, ref_ids_type &&, ref_lengths_type &&, selected_field_ids const &) -> sam_file_output< selected_field_ids, typename sam_file_output<>::valid_formats, std::remove_reference_t< ref_ids_type > >
 Deduces selected_field_ids and ref_ids_type from input. valid_formats is set to the default.
 
template<std::ranges::forward_range ref_ids_type, std::ranges::forward_range ref_lengths_type>
 sam_file_output (std::filesystem::path const &, ref_ids_type &&, ref_lengths_type &&) -> sam_file_output< typename sam_file_output<>::selected_field_ids, typename sam_file_output<>::valid_formats, std::remove_reference_t< ref_ids_type > >
 Deduces ref_ids_type from input. Valid formats, and selected_field_ids are set to the default.
 
template<output_stream stream_type, std::ranges::forward_range ref_ids_type, std::ranges::forward_range ref_lengths_type, sam_file_output_format file_format, detail::fields_specialisation selected_field_ids>
 sam_file_output (stream_type &&, ref_ids_type &&, ref_lengths_type &&, file_format const &, selected_field_ids const &) -> sam_file_output< selected_field_ids, type_list< file_format >, std::remove_reference_t< ref_ids_type > >
 Deduces selected_field_ids, the valid format, and the ref_ids_type from input.
 
template<output_stream stream_type, std::ranges::forward_range ref_ids_type, std::ranges::forward_range ref_lengths_type, sam_file_output_format file_format, detail::fields_specialisation selected_field_ids>
 sam_file_output (stream_type &, ref_ids_type &&, ref_lengths_type &&, file_format const &, selected_field_ids const &) -> sam_file_output< selected_field_ids, type_list< file_format >, std::remove_reference_t< ref_ids_type > >
 Deduces selected_field_ids, the valid format, and the ref_ids_type from input.
 
template<output_stream stream_type, std::ranges::forward_range ref_ids_type, std::ranges::forward_range ref_lengths_type, sam_file_output_format file_format>
 sam_file_output (stream_type &&, ref_ids_type &&, ref_lengths_type &&, file_format const &) -> sam_file_output< typename sam_file_output<>::selected_field_ids, type_list< file_format >, std::remove_reference_t< ref_ids_type > >
 Deduces the valid format, and the ref_ids_type from input. selected_field_ids set to the default.
 
template<output_stream stream_type, std::ranges::forward_range ref_ids_type, std::ranges::forward_range ref_lengths_type, sam_file_output_format file_format>
 sam_file_output (stream_type &, ref_ids_type &&, ref_lengths_type &&, file_format const &) -> sam_file_output< typename sam_file_output<>::selected_field_ids, type_list< file_format >, std::remove_reference_t< ref_ids_type > >
 Deduces the valid format, and the ref_ids_type from input. selected_field_ids set to the default.
 

Range interface

Provides functions for record based writing of the file.

iterator begin () noexcept
 Returns an iterator to current position in the file.
 
sentinel end () noexcept
 Returns a sentinel for comparison with iterator.
 
template<typename record_t >
requires detail::record_like<record_t>
void push_back (record_t &&r)
 Write a seqan3::record to the file.
 
template<typename tuple_t >
requires tuple_like<tuple_t> && (!detail::record_like<tuple_t>)
void push_back (tuple_t &&t)
 Write a record in form of a std::tuple to the file.
 
template<typename arg_t , typename... arg_types>
requires (sizeof...(arg_types) + 1 <= selected_field_ids::size)
void emplace_back (arg_t &&arg, arg_types &&... args)
 Write a record to the file by passing individual fields.
 
template<typename rng_t >
requires std::ranges::input_range<rng_t> && tuple_like<std::ranges::range_reference_t<rng_t>>
sam_file_outputoperator= (rng_t &&range)
 Write a range of records (or tuples) to the file.
 
template<typename rng_t >
requires std::ranges::input_range<rng_t> && tuple_like<std::ranges::range_reference_t<rng_t>>
sam_file_outputoperator| (rng_t &&range, sam_file_output &f)
 Write a range of records (or tuples) to the file.
 
template<typename rng_t >
requires std::ranges::input_range<rng_t> && tuple_like<std::ranges::range_reference_t<rng_t>>
sam_file_output operator| (rng_t &&range, sam_file_output &&f)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 

Detailed Description

template<detail::fields_specialisation selected_field_ids_ = fields<field::seq, field::id, field::ref_id, field::ref_offset, field::cigar, field::mapq, field::qual, field::flag, field::mate, field::tags, field::header_ptr>, detail::type_list_of_sam_file_output_formats valid_formats_ = type_list<format_sam, format_bam>, typename ref_ids_type = ref_info_not_given>
class seqan3::sam_file_output< selected_field_ids_, valid_formats_, ref_ids_type >

A class for writing SAM files, both SAM and its binary representation BAM are supported.

Template Parameters
selected_field_idsA seqan3::fields type with the list and order of fields IDs; only relevant if these can't be deduced.
valid_formatsA seqan3::type_list of the selectable formats (each must model seqan3::sam_file_output_format).

Writing SAM files

Construction and specialisation

The seqan3::sam_file_output class comes with two constructors, one for construction from a file name and one for construction from an existing stream and a known format. The first one automatically picks the format based on the extension of the file name. The second can be used if you have a non-file stream, like std::cout or std::ostringstream, that you want to read from and/or if you cannot use file-extension based detection, but know that your output file has a certain format.

In most cases the template parameters are deduced completely automatically:
// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <filesystem>
int main()
{
auto tmp_file = std::filesystem::temp_directory_path() / "my.sam";
seqan3::sam_file_output fout{tmp_file}; // SAM format detected, std::ofstream opened for file
}
A class for writing SAM files, both SAM and its binary representation BAM are supported.
Definition io/sam_file/output.hpp:71
Provides seqan3::sam_file_output and corresponding traits classes.
T remove(T... args)
T temp_directory_path(T... args)
Writing to std::cout:
// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <iostream>
int main()
{
}
The SAM format (tag).
Definition format_sam.hpp:105
Note that this is not the same as writing sam_file_output<> (with angle brackets). In the latter case they are explicitly set to their default values, in the former case automatic deduction happens which chooses different parameters depending on the constructor arguments. For opening from file, sam_file_output<> would have also worked, but for opening from stream it would not have.

Writing record-wise

// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <sstream>
#include <string>
#include <vector>
int main()
{
std::string read_id;
// ... e.g. compute and alignment
using alignment_type =
alignment_type dummy_alignment{}; // an empty dummy alignment
// the record type specifies the fields we want to write
// initialize record
record_type rec{read, ref_id, dummy_alignment};
// Write the record
fout.push_back(rec);
// same as
fout.push_back(record_type{read, ref_id, dummy_alignment});
// as all our fields are empty so this would print an
}
Provides seqan3::dna5, container aliases and string literals.
@ ref_id
The identifier of the (reference) sequence that seqan3::field::seq was aligned to.
T push_back(T... args)
A class template that holds a choice of seqan3::field.
Definition record.hpp:125
The class template that file records are based on; behaves like a std::tuple.
Definition record.hpp:190
Type that contains multiple types.
Definition type_list.hpp:26
Provides seqan3::type_list.
The easiest way to write to a SAM/BAM file is to use the push_back() member functions. These work similarly to how they work on a std::vector. You may also use a tuple like interface or the emplace_back() function but this is not recommended since one would have to keep track of the correct order of many fields (14 in total). For the record based interface using push_back() please also see the seqan3::record documentation on how to specify a record with the correct field and type lists.

You may also use the output file's iterator for writing, however, this rarely provides an advantage.

Writing record-wise (custom fields)

If you want to omit non-required parameter or change the order of the parameters, you can pass a non-empty fields trait object to the seqan3::sam_file_output constructor to select the fields that are used for interpreting the arguments.

The following snippet demonstrates the usage of such a field_traits object.
// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <filesystem>
#include <sstream>
#include <tuple>
int main()
{
// I only want to print the mapping position (field::ref_offset) and flag:
unsigned mapping_pos{1300};
// ...
fout.emplace_back(mapping_pos, flag); // note that the order the arguments is now different, because
// or: you specified that REF_OFFSET should be first
fout.push_back(std::tie(mapping_pos, flag));
}
sam_flag
An enum flag that describes the properties of an aligned read (given as a SAM record).
Definition sam_flag.hpp:73
@ none
None of the flags below are set.
@ flag
The alignment flag (bit information), uint16_t value.
T tie(T... args)
A different way of passing custom fields to the file is to pass a seqan3::record – instead of a tuple – to push_back(). The seqan3::record clearly indicates which of its elements has which seqan3::field so the file will use that information instead of the template argument. This is especially handy when reading from one file and writing to another, because you don't have to configure the output file to match the input file, it will just work:
// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <filesystem>
#include <sstream>
auto sam_file_raw = R"(@HD VN:1.6 SO:coordinate GO:none
@SQ SN:ref LN:45
r001 99 ref 7 30 8M2I4M1D3M = 37 39 TTAGATAAAGGATACTG *
r003 0 ref 29 30 5S6M * 0 0 GCCTAAGCTAA * SA:Z:ref,29,-,6H5M,17,0;
r003 2064 ref 29 17 6H5M * 0 0 TAGGC * SA:Z:ref,9,+,5S6M,30,1;
r001 147 ref 237 30 9M = 7 -39 CAGCGGCAT * NM:i:1
)";
int main()
{
// fin uses custom fields, fout uses the default fields.
// output doesn't have to match the configuration of the input
for (auto & r : fin)
fout.push_back(r); // copy all the records.
}
A class for reading SAM files, both SAM and its binary representation BAM are supported.
Definition sam_file/input.hpp:239
void push_back(record_t &&r)
Write a seqan3::record to the file.
Definition io/sam_file/output.hpp:406
Meta-header for the IO / SAM File submodule .
This will copy the seqan3::field::flag and seqan3::field::ref_offset value into the new output file.
Note
Note that the other SAM columns in the output file will have a default value, so unless you specify to read all SAM columns (see seqan3::format_sam) the output file will not be equal to the input file.

Writing record-wise in batches

You can write multiple records at once, by assigning to the file:
// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <sstream>
#include <string>
#include <tuple>
#include <vector>
int main()
{
using namespace seqan3::literals;
{"NATA"_dna5, "2nd"},
{"GATA"_dna5, "Third"}}; // a range of "records"
fout = range; // will iterate over the records and write them
// equivalent to:
range | fout;
}
The SeqAn namespace for literals.

File I/O pipelines

Record-wise writing in batches also works for writing from input files directly to output files, because input files are also input ranges in SeqAn:
// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <sstream>
auto sam_file_raw = R"(First 0 * 0 0 * * 0 0 ACGT *
2nd 0 * 0 0 * * 0 0 NATA *
Third 0 * 0 0 * * 0 0 GATA *
)";
int main()
{
// copying a file in one line:
// with seqan3::sam_file_output as a variable:
fout = fin;
// or in pipe notation:
}
This can be combined with file-based views to create I/O pipelines:
#include <ranges>
#include <sstream>
auto sam_file_raw = R"(@HD VN:1.6 SO:coordinate GO:none
@SQ SN:ref LN:45
r001 99 ref 7 30 * = 37 39 TTAGATAAAGGATACTG *
r003 0 ref 29 30 * * 0 0 GCCTAAGCTAA * SA:Z:ref,29,-,6H5M,17,0;
r003 2064 ref 29 17 * * 0 0 TAGGC * SA:Z:ref,9,+,5S6M,30,1;
r001 147 ref 237 30 * = 7 -39 CAGCGGCAT * NM:i:1
)";
int main()
{
auto input_file = seqan3::sam_file_input{std::istringstream{sam_file_raw}, seqan3::format_sam{}};
input_file | std::views::take(3) // take only the first 3 records
}

Formats

We currently support writing the following formats:

Remarks
For a complete overview, take a look at SAM File

Constructor & Destructor Documentation

◆ sam_file_output() [1/4]

template<detail::fields_specialisation selected_field_ids_ = fields<field::seq, field::id, field::ref_id, field::ref_offset, field::cigar, field::mapq, field::qual, field::flag, field::mate, field::tags, field::header_ptr>, detail::type_list_of_sam_file_output_formats valid_formats_ = type_list<format_sam, format_bam>, typename ref_ids_type = ref_info_not_given>
seqan3::sam_file_output< selected_field_ids_, valid_formats_, ref_ids_type >::sam_file_output ( std::filesystem::path  filename,
selected_field_ids const &  fields_tag = selected_field_ids{} 
)
inline

Construct from filename.

Parameters
[in]filenamePath to the file you wish to open.
[in]fields_tagA seqan3::fields tag. [optional]

In addition to the file name, you may specify a custom seqan3::fields type which may be easier than defining all the template parameters.

Compression

This constructor transparently applies a compression stream on top of the file stream in case the given file extension suggests the user wants this. See the section on compression and decompression for more information.

Example:

In most cases the template parameters are deduced completely automatically:

// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <filesystem>
int main()
{
auto tmp_file = std::filesystem::temp_directory_path() / "my.sam";
seqan3::sam_file_output fout{tmp_file}; // SAM format detected, std::ofstream opened for file
}

Writing with custom selected fields:

// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <sstream>
int main()
{
// no need to specify the template arguments <...> for format specialization:
}

◆ sam_file_output() [2/4]

template<detail::fields_specialisation selected_field_ids_ = fields<field::seq, field::id, field::ref_id, field::ref_offset, field::cigar, field::mapq, field::qual, field::flag, field::mate, field::tags, field::header_ptr>, detail::type_list_of_sam_file_output_formats valid_formats_ = type_list<format_sam, format_bam>, typename ref_ids_type = ref_info_not_given>
template<output_stream stream_type, sam_file_output_format file_format>
requires std::same_as<typename std::remove_reference_t<stream_type>::char_type, stream_char_type>
seqan3::sam_file_output< selected_field_ids_, valid_formats_, ref_ids_type >::sam_file_output ( stream_type &  stream,
file_format const &  format_tag,
selected_field_ids const &  fields_tag = selected_field_ids{} 
)
inline

Construct from an existing stream and with specified format.

Template Parameters
stream_typeThe type of stream to write to; must model seqan3::output_stream.
file_formatThe format of the file in the stream, must satisfy seqan3::sam_file_output_format.
Parameters
[out]streamThe stream to write to, must be derived of std::basic_ostream<stream_char_t>.
[in]format_tagThe file format tag.
[in]fields_tagA seqan3::fields tag. [optional]

Compression

This constructor does not apply compression transparently (because there is no way to know if the user wants this). However, you can just pass e.g. seqan3::contrib::gz_ostream to this constructor if you explicitly want compression. See the section on compression and decompression for more information.

◆ sam_file_output() [3/4]

template<detail::fields_specialisation selected_field_ids_ = fields<field::seq, field::id, field::ref_id, field::ref_offset, field::cigar, field::mapq, field::qual, field::flag, field::mate, field::tags, field::header_ptr>, detail::type_list_of_sam_file_output_formats valid_formats_ = type_list<format_sam, format_bam>, typename ref_ids_type = ref_info_not_given>
template<typename ref_ids_type_ , std::ranges::forward_range ref_lengths_type>
requires std::same_as<std::remove_reference_t<ref_ids_type_>, ref_ids_type>
seqan3::sam_file_output< selected_field_ids_, valid_formats_, ref_ids_type >::sam_file_output ( std::filesystem::path const &  filename,
ref_ids_type_ &&  ref_ids,
ref_lengths_type &&  ref_lengths,
selected_field_ids const &  fields_tag = selected_field_ids{} 
)
inline

Construct from filename.

Template Parameters
ref_ids_type_The type of range over reference ids; must model std::forward_range.
ref_lengths_typeThe type of range over reference lengths; must model std::forward_range.
Parameters
[in]filenamePath to the file you wish to open.
[in]ref_idsA range over reference ids.
[in]ref_lengthsA range over lengths of reference sequences (same order as ref_ids).
[in]fields_tagA seqan3::fields tag. [optional]

In addition to the file name, you may specify a custom seqan3::fields type which may be easier than defining all the template parameters.

Compression

This constructor transparently applies a compression stream on top of the file stream in case the given file extension suggests the user wants this. See the section on compression and decompression for more information.

Example:

In most cases the template parameters are deduced completely automatically:

// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <filesystem>
#include <string>
#include <vector>
int main()
{
auto tmp_file = std::filesystem::temp_directory_path() / "my.sam";
std::vector<std::string> ref_ids{"ref1", "ref2"};
std::vector<size_t> ref_lengths{1234, 5678};
seqan3::sam_file_output fout{tmp_file, ref_ids, ref_lengths};
}

Writing with custom selected fields:

// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <sstream>
int main()
{
// no need to specify the template arguments <...> for format specialization:
}

◆ sam_file_output() [4/4]

template<detail::fields_specialisation selected_field_ids_ = fields<field::seq, field::id, field::ref_id, field::ref_offset, field::cigar, field::mapq, field::qual, field::flag, field::mate, field::tags, field::header_ptr>, detail::type_list_of_sam_file_output_formats valid_formats_ = type_list<format_sam, format_bam>, typename ref_ids_type = ref_info_not_given>
template<output_stream stream_type, sam_file_output_format file_format, typename ref_ids_type_ , std::ranges::forward_range ref_lengths_type>
requires std::same_as<std::remove_reference_t<ref_ids_type_>, ref_ids_type>
seqan3::sam_file_output< selected_field_ids_, valid_formats_, ref_ids_type >::sam_file_output ( stream_type &&  stream,
ref_ids_type_ &&  ref_ids,
ref_lengths_type &&  ref_lengths,
file_format const &  format_tag,
selected_field_ids const &  fields_tag = selected_field_ids{} 
)
inline

Construct from an existing stream and with specified format.

Template Parameters
stream_typeThe type of stream to write to; must model seqan3::output_stream.
file_formatThe format of the file in the stream, must model seqan3::sam_file_output_format.
ref_ids_type_The type of range over reference ids; must model std::forward_range.
ref_lengths_typeThe type of range over reference lengths; must model std::forward_range.
Parameters
[in]streamThe stream to operate on (this must be std::move'd in!).
[in]ref_idsA range over reference ids.
[in]ref_lengthsA range over lengths of reference sequences (same order as ref_ids).
[in]format_tagThe file format tag.
[in]fields_tagA seqan3::fields tag. [optional]

Compression

This constructor does not apply compression transparently (because there is no way to know if the user wants this). However, you can just pass e.g. seqan3::contrib::gz_ostream to this constructor if you explicitly want compression. See the section on compression and decompression for more information.

Member Function Documentation

◆ begin()

template<detail::fields_specialisation selected_field_ids_ = fields<field::seq, field::id, field::ref_id, field::ref_offset, field::cigar, field::mapq, field::qual, field::flag, field::mate, field::tags, field::header_ptr>, detail::type_list_of_sam_file_output_formats valid_formats_ = type_list<format_sam, format_bam>, typename ref_ids_type = ref_info_not_given>
iterator seqan3::sam_file_output< selected_field_ids_, valid_formats_, ref_ids_type >::begin ( )
inlinenoexcept

Returns an iterator to current position in the file.

Returns
An iterator pointing to the current position in the file.

You can write to the file by assigning to the iterator, but using push_back() is usually more intuitive.

Complexity

Constant.

Exceptions

No-throw guarantee.

Example

// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <sstream>
#include <string>
#include <tuple>
int main()
{
auto stream_it = fout.begin();
seqan3::dna5_vector seq;
// ...
// assign to file iterator
*stream_it = std::tie(seq, id);
// is the same as:
fout.push_back(std::tie(seq, id));
}
@ id
The identifier, usually a string.
@ seq
The "sequence", usually a range of nucleotides or amino acids.

◆ emplace_back()

template<detail::fields_specialisation selected_field_ids_ = fields<field::seq, field::id, field::ref_id, field::ref_offset, field::cigar, field::mapq, field::qual, field::flag, field::mate, field::tags, field::header_ptr>, detail::type_list_of_sam_file_output_formats valid_formats_ = type_list<format_sam, format_bam>, typename ref_ids_type = ref_info_not_given>
template<typename arg_t , typename... arg_types>
requires (sizeof...(arg_types) + 1 <= selected_field_ids::size)
void seqan3::sam_file_output< selected_field_ids_, valid_formats_, ref_ids_type >::emplace_back ( arg_t &&  arg,
arg_types &&...  args 
)
inline

Write a record to the file by passing individual fields.

Template Parameters
arg_tType of the first field.
arg_typesTypes of further fields.
Parameters
[in]argThe first field to write.
[in]argsFurther fields.

The fields are assumed to correspond to the field IDs given in selected_field_ids, however passing less is accepted if the format does not require all of them.

Complexity

Constant.

Exceptions

Basic exception safety.

Example

// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <sstream>
int main()
{
// I only want to print the mapping position (field::ref_offset) and flag:
unsigned mapping_pos{1300};
// ...
fout.emplace_back(mapping_pos, flag); // note that the order the arguments is now different, because
// or: you specified that REF_OFFSET should be first
fout.push_back(std::tie(mapping_pos, flag));
}

◆ end()

template<detail::fields_specialisation selected_field_ids_ = fields<field::seq, field::id, field::ref_id, field::ref_offset, field::cigar, field::mapq, field::qual, field::flag, field::mate, field::tags, field::header_ptr>, detail::type_list_of_sam_file_output_formats valid_formats_ = type_list<format_sam, format_bam>, typename ref_ids_type = ref_info_not_given>
sentinel seqan3::sam_file_output< selected_field_ids_, valid_formats_, ref_ids_type >::end ( )
inlinenoexcept

Returns a sentinel for comparison with iterator.

Returns
An end that is never reached.

This element acts as a placeholder; attempting to dereference it results in undefined behaviour. It always compares false against an iterator.

Complexity

Constant.

Exceptions

No-throw guarantee.

◆ header()

template<detail::fields_specialisation selected_field_ids_ = fields<field::seq, field::id, field::ref_id, field::ref_offset, field::cigar, field::mapq, field::qual, field::flag, field::mate, field::tags, field::header_ptr>, detail::type_list_of_sam_file_output_formats valid_formats_ = type_list<format_sam, format_bam>, typename ref_ids_type = ref_info_not_given>
auto & seqan3::sam_file_output< selected_field_ids_, valid_formats_, ref_ids_type >::header ( )
inline

Access the file's header.

Example

// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <sstream>
#include <string>
#include <vector>
int main()
{
std::vector<std::string> ref_ids{"ref1", "ref2"};
std::vector<size_t> ref_lengths{1234, 5678};
// always give reference information if you want to have your header properly initialised
// add information to the header of the file.
fout.header().comments.push_back("This is a comment");
}
See also
seqan3::sam_file_header

◆ operator=()

template<detail::fields_specialisation selected_field_ids_ = fields<field::seq, field::id, field::ref_id, field::ref_offset, field::cigar, field::mapq, field::qual, field::flag, field::mate, field::tags, field::header_ptr>, detail::type_list_of_sam_file_output_formats valid_formats_ = type_list<format_sam, format_bam>, typename ref_ids_type = ref_info_not_given>
template<typename rng_t >
requires std::ranges::input_range<rng_t> && tuple_like<std::ranges::range_reference_t<rng_t>>
sam_file_output & seqan3::sam_file_output< selected_field_ids_, valid_formats_, ref_ids_type >::operator= ( rng_t &&  range)
inline

Write a range of records (or tuples) to the file.

Template Parameters
rng_tType of the range, must satisfy std::ranges::output_range and have a reference type that satisfies seqan3::tuple_like.
Parameters
[in]rangeThe range to write.

This function simply iterates over the argument and calls push_back() on each element.

Complexity

Linear in the number of records.

Exceptions

Basic exception safety.

Example

// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <sstream>
#include <string>
#include <tuple>
#include <vector>
int main()
{
using namespace seqan3::literals;
{"NATA"_dna5, "2nd"},
{"GATA"_dna5, "Third"}}; // a range of "records"
fout = range; // will iterate over the records and write them
// equivalent to:
range | fout;
}

◆ push_back() [1/2]

template<detail::fields_specialisation selected_field_ids_ = fields<field::seq, field::id, field::ref_id, field::ref_offset, field::cigar, field::mapq, field::qual, field::flag, field::mate, field::tags, field::header_ptr>, detail::type_list_of_sam_file_output_formats valid_formats_ = type_list<format_sam, format_bam>, typename ref_ids_type = ref_info_not_given>
template<typename record_t >
requires detail::record_like<record_t>
void seqan3::sam_file_output< selected_field_ids_, valid_formats_, ref_ids_type >::push_back ( record_t &&  r)
inline

Write a seqan3::record to the file.

Template Parameters
record_tType of the record, a specialisation of seqan3::record.
Parameters
[in]rThe record to write.

Complexity

Constant.

Exceptions

Basic exception safety.

Example

// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <sstream>
#include <string>
int main()
{
// ...
fout.push_back(r);
}

◆ push_back() [2/2]

template<detail::fields_specialisation selected_field_ids_ = fields<field::seq, field::id, field::ref_id, field::ref_offset, field::cigar, field::mapq, field::qual, field::flag, field::mate, field::tags, field::header_ptr>, detail::type_list_of_sam_file_output_formats valid_formats_ = type_list<format_sam, format_bam>, typename ref_ids_type = ref_info_not_given>
template<typename tuple_t >
requires tuple_like<tuple_t> && (!detail::record_like<tuple_t>)
void seqan3::sam_file_output< selected_field_ids_, valid_formats_, ref_ids_type >::push_back ( tuple_t &&  t)
inline

Write a record in form of a std::tuple to the file.

Template Parameters
tuple_tType of the record, a specialisation of std::tuple.
Parameters
[in]tThe record to write.

The fields in the tuple are assumed to correspond to the field IDs given in selected_field_ids, however passing less is accepted if the format does not require all of them.

Complexity

Constant.

Exceptions

Basic exception safety.

Example

// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <sstream>
#include <string>
#include <tuple>
int main()
{
seqan3::dna5_vector seq;
// ...
fout.push_back(std::tie(seq, id));
}

Friends And Related Symbol Documentation

◆ operator|

template<detail::fields_specialisation selected_field_ids_ = fields<field::seq, field::id, field::ref_id, field::ref_offset, field::cigar, field::mapq, field::qual, field::flag, field::mate, field::tags, field::header_ptr>, detail::type_list_of_sam_file_output_formats valid_formats_ = type_list<format_sam, format_bam>, typename ref_ids_type = ref_info_not_given>
template<typename rng_t >
requires std::ranges::input_range<rng_t> && tuple_like<std::ranges::range_reference_t<rng_t>>
sam_file_output & operator| ( rng_t &&  range,
sam_file_output< selected_field_ids_, valid_formats_, ref_ids_type > &  f 
)
friend

Write a range of records (or tuples) to the file.

Template Parameters
rng_tType of the range, must satisfy std::ranges::input_range and have a reference type that satisfies seqan3::tuple_like.
Parameters
[in]rangeThe range to write.
[in]fThe file being written to.

This operator enables sam_file_output to be at the end of a piping operation. It just calls operator=() internally.

Complexity

Linear in the number of records.

Exceptions

Basic exception safety.

Example

// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <sstream>
#include <string>
#include <tuple>
#include <vector>
int main()
{
using namespace seqan3::literals;
{"NATA"_dna5, "2nd"},
{"GATA"_dna5, "Third"}}; // a range of "records"
fout = range; // will iterate over the records and write them
// equivalent to:
range | fout;
}

This is especially useful in combination with file-based filters:

#include <ranges>
#include <sstream>
auto sam_file_raw = R"(@HD VN:1.6 SO:coordinate GO:none
@SQ SN:ref LN:45
r001 99 ref 7 30 * = 37 39 TTAGATAAAGGATACTG *
r003 0 ref 29 30 * * 0 0 GCCTAAGCTAA * SA:Z:ref,29,-,6H5M,17,0;
r003 2064 ref 29 17 * * 0 0 TAGGC * SA:Z:ref,9,+,5S6M,30,1;
r001 147 ref 237 30 * = 7 -39 CAGCGGCAT * NM:i:1
)";
int main()
{
auto input_file = seqan3::sam_file_input{std::istringstream{sam_file_raw}, seqan3::format_sam{}};
input_file | std::views::take(3) // take only the first 3 records
}

The documentation for this class was generated from the following file:
Hide me