This document provides example recipes on how to carry out particular tasks using the SeqAn functionalities in C++. Please note that these recipes are not ordered. You can use the links in the table of contents or the search function of your browser to navigate them.

It will take some time, but we hope to expand this document into containing numerous great examples. If you have suggestions for how to improve the Cookbook and/or examples you would like included, please feel free to contact us.

Read sequence files

#include <string>
 
#include <seqan3/core/debug_stream.hpp>         // for debug_stream
#include <seqan3/io/sequence_file/input.hpp>    // for sequence_file_input
 
int main ()
{
    std::filesystem::path tmp_dir = std::filesystem::temp_directory_path(); // get the tmp directory
 
    // Initialise a file input object with a FastA file.
    seqan3::sequence_file_input file_in{tmp_dir/"seq.fasta"};
 
    // Retrieve the sequences and ids.
    for (auto & [seq, id, qual] : file_in)
    {
        seqan3::debug_stream << "ID:     " << id << '\n';
        seqan3::debug_stream << "SEQ:    " << seq << '\n';
        seqan3::debug_stream << "Empty Qual." << qual << '\n';  // qual is empty for FastAfiles
    }
 
    return 0;
}

Write a custom validator

This recipe implements a validator that checks if a numeric argument is an integral square (i.e. 0, 1, 4, 9...). Invalid values throw a seqan3::validation_error.

#include <cmath>
 
struct custom_validator
{
    using option_value_type = double; // used for all arithmetic types
 
    void operator() (option_value_type const & val) const
    {
        if ((std::round(val)                         != val) ||  // not an integer
            (std::pow(std::round(std::sqrt(val)), 2) != val))    // not a square
        {
            throw seqan3::validation_error{"The provided number is not an arithmetic square."};
        }
    }
 
    std::string get_help_page_message () const
    {
        return "Value must be the square of an integral number.";
    }
};

Construction and assignment of alphabet symbols

#include <seqan3/alphabet/all.hpp> // for working with alphabets directly
 
using seqan3::operator""_dna4;
 
int main ()
{
    // Two objects of seqan3::dna4 alphabet constructed with a char literal.
    seqan3::dna4 ade = 'A'_dna4;
    seqan3::dna4 gua = 'G'_dna4;
 
    // Two additional objects assigned explicitly from char or rank.
    seqan3::dna4 cyt, thy;
    cyt.assign_char('C');
    thy.assign_rank(3);
 
    // Further code here...

return 0;

}

    // Get the rank type of the alphabet (here uint8_t).
    using rank_type = seqan3::alphabet_rank_t<seqan3::dna4>;
 
    // Retrieve the numerical representation (rank) of the objects.
    rank_type rank_a = ade.to_rank();   // => 0
    rank_type rank_g = gua.to_rank();   // => 2

Reverse complement and the six-frame translation of a string using views

This recipe creates a small program that

reads a string from the command line (first argument to the program)
"converts" the string to a range of seqan3::dna5 (Bonus: throws an exception if loss of information occurs)
prints the string and it's reverse complement
prints the six-frame translation of the string

#include <seqan3/core/debug_stream.hpp>
#include <seqan3/argument_parser/all.hpp>       // optional: include the argument_parser
#include <seqan3/range/views/all.hpp>           // include all of SeqAn's views
#include <seqan3/std/ranges>                    // include all of the standard library's views
 
 
int main(int argc, char** argv)
{
    // We use the seqan3::argument_parser which was introduced in the second chapter 
    // of the tutorial: "Parsing command line arguments with SeqAn".
    seqan3::argument_parser myparser{"Assignment-3", argc, argv}; // initialize
    std::string s{};
 
    myparser.add_positional_option(s, "Please specify the DNA string.");
 
    try
    {
       myparser.parse();
    }
    catch (seqan3::argument_parser_error const & ext) // the user did something wrong
    {
       std::cerr << "[PARSER ERROR]" << ext.what() << '\n'; // you can customize your error message
       return 0;
    }
 
    auto s_as_dna = s | seqan3::views::char_to<seqan3::dna5>;
    // Bonus:
    //auto s_as_dna = s | std::views::transform([] (char const c)
    //{
    //    return seqan3::assign_char_strictly_to(c, seqan3::dna5{});
    //});
 
    seqan3::debug_stream << "Original: " << s_as_dna << '\n';
    seqan3::debug_stream << "RevComp:  " << (s_as_dna | std::views::reverse | seqan3::views::complement) << '\n';
    seqan3::debug_stream << "Frames:   " << (s_as_dna | seqan3::views::translate) << '\n';
}

Reading records

After construction, you can now read the sequence records. Our file object behaves like a range so you can use a range-based for loop to conveniently iterate over the file:

#include <sstream>
 
#include <seqan3/core/debug_stream.hpp>
#include <seqan3/io/sequence_file/input.hpp>
 
auto input = R"(> TEST1
ACGT
> Test2
AGGCTGA
> Test3
GGAGTATAATATATATATATATAT)";
 
int main()
{
 
    seqan3::sequence_file_input fin{std::istringstream{input}, seqan3::format_fasta{}};
 
    for (auto & rec : fin)
    {
        seqan3::debug_stream << "ID:  " << seqan3::get<seqan3::field::id>(rec) << '\n';
        seqan3::debug_stream << "SEQ: " << seqan3::get<seqan3::field::seq>(rec) << '\n';
        // a quality field also exists, but is not printed, because we know it's empty for FastA files.
    }
}

Attention: An input file is a single input range, which means you can only iterate over it once!

Note: It is important to write auto & and not just auto, otherwise you will copy the record on every iteration.

You can also use structured binding, i.e. for (auto & [seq, id, qual] : fin) But beware: with structured bindings you do need to get the order of elements correct!

You can also read a file in chunks:

Reading records in chunks

seqan3::sequence_file_input fin{std::filesystem::temp_directory_path()/"my.fastq"};
 
// `&&` is important because seqan3::views::chunk returns temporaries!
for (auto && records : fin | ranges::views::chunk(10))
{
    // `records` contains 10 elements (or less at the end)
    seqan3::debug_stream << "Taking the next 10 sequences:\n";
    seqan3::debug_stream << "ID:  " << seqan3::get<seqan3::field::id>(*records.begin()) << '\n';
}                                                                                           // prints first ID in batch

The example above will iterate over the file by reading 10 records at a time. If no 10 records are available anymore, it will just print the remaining records.

Applying a filter to a file

On some occasions you are only interested in sequence records that fulfill a certain criterion, e.g. having a minimum sequence length or a minimum average quality.

This recipe can be used to filter the sequences in your file by a custom criterion.

seqan3::sequence_file_input fin{std::filesystem::temp_directory_path()/"my.fastq"};
 
// std::views::filter takes a function object (a lambda in this case) as input that returns a boolean
auto minimum_quality_filter = std::views::filter([] (auto const & rec)
{
    auto qual = seqan3::get<seqan3::field::qual>(rec) | std::views::transform([] (auto q) { return q.to_phred(); });
    double sum = std::accumulate(qual.begin(), qual.end(), 0);
    return sum / std::ranges::size(qual) >= 40; // minimum average quality >= 40
});
 
for (auto & rec : fin | minimum_quality_filter)
{
    seqan3::debug_stream << "ID: " << seqan3::get<seqan3::field::id>(rec) << '\n';
}

Reading paired-end reads

In modern Next Generation Sequencing experiments you often have paired-end read data which is split into two files. The read pairs are identified by their identical name/id and position in the two files.

This recipe can be used to handle one pair of reads at a time.

// for simplicity we take the same file
seqan3::sequence_file_input fin1{std::filesystem::temp_directory_path()/"my.fastq"};
seqan3::sequence_file_input fin2{std::filesystem::temp_directory_path()/"my.fastq"};
 
for (auto && [rec1, rec2] : seqan3::views::zip(fin1, fin2)) // && is important!
{                                                           // because seqan3::views::zip returns temporaries
    if (seqan3::get<seqan3::field::id>(rec1) != seqan3::get<seqan3::field::id>(rec2))
        throw std::runtime_error("Oh oh your pairs don't match.");
}

Storing records in a std::vector

This recipe creates a small program that reads in a FASTA file and stores all the records in a std::vector.

#include <seqan3/core/debug_stream.hpp>
#include <seqan3/io/sequence_file/all.hpp>
#include <seqan3/std/filesystem>
#include <seqan3/std/ranges> // std::ranges::copy
 
int main()
{
    std::filesystem::path tmp_dir = std::filesystem::temp_directory_path(); // get the temp directory
 
    seqan3::sequence_file_input fin{tmp_dir/"my.fasta"};
 
    using record_type = decltype(fin)::record_type;
    std::vector<record_type> records{};
 
    // You can use a for loop:
 
    // for (auto & rec : fin)
    // {
    //     records.push_back(std::move(rec));
    // }
 
    // But you can also do this:
    std::ranges::copy(fin, std::cpp20::back_inserter(records));
 
    seqan3::debug_stream << records << '\n';
}

Note that you can move the record out of the file if you want to store it somewhere without copying.

record_type rec = std::move(*fin.begin()); // avoid copying

Writing records

The easiest way to write to a sequence file is to use the seqan3::sequence_file_output::push_back() or seqan3::sequence_file_output::emplace_back() member functions. These work similarly to how they work on a std::vector.

#include <sstream>
#include <string>
#include <tuple>
 
#include <seqan3/alphabet/nucleotide/dna5.hpp>
#include <seqan3/io/sequence_file/output.hpp>
 
int main()
{
    using seqan3::operator""_dna5;
 
    seqan3::sequence_file_output fout{std::ostringstream{}, seqan3::format_fasta{}};
 
    for (int i = 0; i < 5; ++i) // ...
    {
        std::string id{"test_id"};
        seqan3::dna5_vector seq{"ACGT"_dna5};
 
        // ...
 
        fout.emplace_back(seq, id);          // as individual variables
        // or:
        fout.push_back(std::tie(seq, id));   // as a tuple
    }
}

File conversion

auto tmp_dir = std::filesystem::temp_directory_path();
 
seqan3::sequence_file_output{tmp_dir/"output.fasta"} = seqan3::sequence_file_input{tmp_dir/"my.fastq"};

Define a custom scoring scheme

#include <seqan3/alignment/scoring/aminoacid_scoring_scheme.hpp>

#include <seqan3/alignment/scoring/nucleotide_scoring_scheme.hpp>

using seqan3::operator""_dna4;
using seqan3::operator""_aa27;
 
// Define a simple scoring scheme with match and mismatch cost and get the score.
seqan3::nucleotide_scoring_scheme nc_scheme{seqan3::match_score{4}, seqan3::mismatch_score{-5}};
auto sc_nc = nc_scheme.score('A'_dna4, 'C'_dna4); // sc_nc == -5.
 
// Define a amino acid similarity matrix and get the score.
seqan3::aminoacid_scoring_scheme aa_scheme{};
aa_scheme.set_similarity_matrix(seqan3::aminoacid_similarity_matrix::BLOSUM30);
auto sc_aa = aa_scheme.score('M'_aa27, 'K'_aa27); // sc_aa == 2.

Attention: SeqAn's alignment algorithm computes the maximal similarity score, thus the match score must be set to a positive value and the scores for mismatch and gap must be negative in order to maximize over the matching letters.

Calculate edit distance for a set of sequences

This recipe can be used to calculate the edit distance for all six pairwise combinations. Here we only allow at most 7 errors and filter all alignments with 6 or less errors.

#include <utility>
#include <vector>
 
#include <seqan3/alignment/pairwise/align_pairwise.hpp>
#include <seqan3/alignment/scoring/nucleotide_scoring_scheme.hpp>
#include <seqan3/alphabet/nucleotide/dna4.hpp>
#include <seqan3/core/debug_stream.hpp>
#include <seqan3/range/views/pairwise_combine.hpp>
#include <seqan3/std/ranges>
 
using seqan3::operator""_dna4;
 
int main()
{
    std::vector vec{"ACGTGACTGACT"_dna4,
                    "ACGAAGACCGAT"_dna4,
                    "ACGTGACTGACT"_dna4,
                    "AGGTACGAGCGACACT"_dna4};
 
    // Configure the alignment kernel.
    auto config = seqan3::align_cfg::method_global{} |
                  seqan3::align_cfg::edit_scheme |
                  seqan3::align_cfg::min_score{-7} |
                  seqan3::align_cfg::output_score{};
 
    auto filter_v = std::views::filter([](auto && res) { return res.score() >= -6;});
 
    for (auto const & res : seqan3::align_pairwise(seqan3::views::pairwise_combine(vec), config) | seqan3::views::persist | filter_v)
    {
        seqan3::debug_stream << "Score: " << res.score() << '\n';
    }
}

Searching for matches

This recipe can be used to search for all occurrences of a substring and print the number of hits and the positions in an ascending ordering.

#include <seqan3/alphabet/nucleotide/dna4.hpp>
#include <seqan3/core/debug_stream.hpp>
#include <seqan3/search/search.hpp>
#include <seqan3/search/fm_index/fm_index.hpp>
 
using seqan3::operator""_dna4;
 
void run_text_single()
{
    seqan3::dna4_vector
                text{"CGCTGTCTGAAGGATGAGTGTCAGCCAGTGTAACCCGATGAGCTACCCAGTAGTCGAACTGGGCCAGACAACCCGGCGCTAATGCACTCA"_dna4};
    seqan3::fm_index index{text};
 
    seqan3::debug_stream << "=====   Running on a single text   =====\n"
                         << "The following hits were found:\n";
 
    for (auto && result : search("GCT"_dna4, index))
        seqan3::debug_stream << result << '\n';
}
 
void run_text_collection()
{
    std::vector<seqan3::dna4_vector> text{"CGCTGTCTGAAGGATGAGTGTCAGCCAGTGTA"_dna4,
                                          "ACCCGATGAGCTACCCAGTAGTCGAACTG"_dna4,
                                          "GGCCAGACAACCCGGCGCTAATGCACTCA"_dna4};
    seqan3::fm_index index{text};
 
    seqan3::debug_stream << "===== Running on a text collection =====\n"
                         << "The following hits were found:\n";
 
    for (auto && result : search("GCT"_dna4, index))
        seqan3::debug_stream << result << '\n';
}
 
int main()
{
   run_text_single();
   seqan3::debug_stream << '\n';
   run_text_collection();
}

If you want to allow errors in your query, you need to configure the approximate search with the following search configuration objects:

seqan3::search_cfg::max_error_total: Maximum number of total errors
seqan3::search_cfg::max_error_substitution: Maximum number of substitutions
seqan3::search_cfg::max_error_insertion: Maximum number of insertions
seqan3::search_cfg::max_error_deletion: Maximum number of deletions These are constructed with absolute numbers or rates:
seqan3::search_cfg::error_count: Absolute number of errors
seqan3::search_cfg::error_rate: Rate of errors $\in[0,1]$

To search for either 1 insertion or 1 deletion you can use the seqan3::search_cfg::error_count:

std::string text{"Garfield the fat cat without a hat."};
seqan3::fm_index index{text};
seqan3::configuration const cfg = seqan3::search_cfg::max_error_total{seqan3::search_cfg::error_count{1}} |
                                  seqan3::search_cfg::max_error_substitution{seqan3::search_cfg::error_count{0}} |
                                  seqan3::search_cfg::max_error_insertion{seqan3::search_cfg::error_count{1}} |
                                  seqan3::search_cfg::max_error_deletion{seqan3::search_cfg::error_count{1}};
seqan3::debug_stream << search("cat"s, index, cfg) << '\n';
// prints: [<query_id:0, reference_id:0, reference_pos:14>,
//          <query_id:0, reference_id:0, reference_pos:17>,
//          <query_id:0, reference_id:0, reference_pos:18>,
//          <query_id:0, reference_id:0, reference_pos:32>]

Reading the CIGAR information into an actual alignment

In SeqAn, the conversion from a CIGAR string to an alignment (two aligned_sequences) is done automatically for you. You can access it by querying seqan3::field::alignment from the record:

    auto filename = std::filesystem::temp_directory_path()/"example.sam";
 
    seqan3::alignment_file_input fin{filename, seqan3::fields<seqan3::field::id, seqan3::field::alignment>{}};
 
    for (auto & [ id, alignment ] : fin)
    {
        seqan3::debug_stream << id << ": " << std::get<1>(alignment) << '\n';
    }

Combining sequence and alignment files

This recipe can be used to:

Read in a FASTA file with the reference and a SAM file with the alignment
Filter the alignment records and only take those with a mapping quality >= 30.
For the resulting alignments, print which read was mapped against with reference id and the number of seqan3::gap's involved in the alignment (either in aligned reference or in read sequence).

#include <string>
#include <vector>
 
#include <seqan3/alphabet/nucleotide/dna5.hpp>
#include <seqan3/alphabet/gap/gap.hpp>
#include <seqan3/core/debug_stream.hpp>
#include <seqan3/io/alignment_file/input.hpp>
#include <seqan3/io/record.hpp>
#include <seqan3/io/sequence_file/input.hpp>
#include <seqan3/std/filesystem>
#include <seqan3/std/ranges>
 
int main()
{
    std::filesystem::path tmp_dir = std::filesystem::temp_directory_path(); // get the temp directory
 
    // read in reference information
    seqan3::sequence_file_input reference_file{tmp_dir/"reference.fasta"};
    std::vector<std::string> ref_ids{};
    std::vector<seqan3::dna5_vector> ref_seqs{};
 
    for (auto && record : reference_file)
    {
        ref_ids.push_back(std::move(seqan3::get<seqan3::field::id>(record)));
        ref_seqs.push_back(std::move(seqan3::get<seqan3::field::seq>(record)));
    }
 
    using field_type = seqan3::fields<seqan3::field::id,
                                      seqan3::field::ref_id,
                                      seqan3::field::mapq,
                                      seqan3::field::alignment>;
 
    seqan3::alignment_file_input mapping_file{tmp_dir/"mapping.sam", ref_ids, ref_seqs, field_type{}};
 
#if !SEQAN3_WORKAROUND_GCC_93983
    auto mapq_filter = std::views::filter([] (auto & rec) { return seqan3::get<seqan3::field::mapq>(rec) >= 30; });
#endif // !SEQAN3_WORKAROUND_GCC_93983
 
#if SEQAN3_WORKAROUND_GCC_93983
    for (auto & [id, ref_id, mapq, alignment] : mapping_file /*| mapq_filter*/)
#else // ^^^ workaround / no workaround vvv
    for (auto & [id, ref_id, mapq, alignment] : mapping_file | mapq_filter)
#endif // SEQAN3_WORKAROUND_GCC_93983
    {
        using seqan3::get;
        size_t sum_ref{};
        for (auto const & char_ref : get<0>(alignment))
            if (char_ref == seqan3::gap{})
                ++sum_ref;
 
        size_t sum_read{};
        for (auto const & char_read : get<1>(alignment))
            if (char_read == seqan3::gap{})
                ++sum_read;
 
        seqan3::debug_stream << id << " mapped against " << ref_id << " with "
                             << sum_read << " gaps in the read sequence and "
                             << sum_ref  << " gaps in the reference sequence.\n";
    }
}

Map reads ans write output to SAM file

For a full recipe on creating your own readmapper, see the very end of the tutorial Implementing your own read mapper with SeqAn.

void map_reads(std::filesystem::path const & query_path,
               std::filesystem::path const & index_path,
               std::filesystem::path const & sam_path,
               reference_storage_t & storage,
               uint8_t const errors)
{
    // we need the alphabet and text layout before loading
    seqan3::bi_fm_index<seqan3::dna5, seqan3::text_layout::collection> index;
    {
        std::ifstream is{index_path, std::ios::binary};
        cereal::BinaryInputArchive iarchive{is};
        iarchive(index);
    }
 
    seqan3::sequence_file_input query_file_in{query_path};
 
    seqan3::alignment_file_output sam_out{sam_path, seqan3::fields<seqan3::field::seq,
                                                                   seqan3::field::id,
                                                                   seqan3::field::ref_id,
                                                                   seqan3::field::ref_offset,
                                                                   seqan3::field::alignment,
                                                                   seqan3::field::qual,
                                                                   seqan3::field::mapq>{}};
 
    seqan3::configuration const search_config = seqan3::search_cfg::max_error_total{
                                                    seqan3::search_cfg::error_count{errors}} |
                                                seqan3::search_cfg::hit_all_best{};
 
    seqan3::configuration const align_config = seqan3::align_cfg::method_global{
                                                   seqan3::align_cfg::free_end_gaps_sequence1_leading{true},
                                                   seqan3::align_cfg::free_end_gaps_sequence2_leading{false},
                                                   seqan3::align_cfg::free_end_gaps_sequence1_trailing{true},
                                                   seqan3::align_cfg::free_end_gaps_sequence2_trailing{false}} |
                                               seqan3::align_cfg::edit_scheme |
                                               seqan3::align_cfg::output_alignment{} |
                                               seqan3::align_cfg::output_begin_position{} |
                                               seqan3::align_cfg::output_score{};
 
    for (auto && record : query_file_in)
    {
        auto & query = seqan3::get<seqan3::field::seq>(record);
        for (auto && result : search(query, index, search_config))
        {
            size_t start = result.reference_begin_position() ? result.reference_begin_position() - 1 : 0;
            std::span text_view{std::data(storage.seqs[result.reference_id()]) + start, query.size() + 1};
 
            for (auto && alignment : seqan3::align_pairwise(std::tie(text_view, query), align_config))
            {
                auto aligned_seq = alignment.alignment();
                size_t ref_offset = alignment.sequence1_begin_position() + 2 + start;
                size_t map_qual = 60u + alignment.score();
 
                sam_out.emplace_back(query,
                                     seqan3::get<seqan3::field::id>(record),
                                     storage.ids[result.reference_id()],
                                     ref_offset,
                                     aligned_seq,
                                     seqan3::get<seqan3::field::qual>(record),
                                     map_qual);
            }
        }
    }
}

Constructing a basic argument parser

#include <seqan3/argument_parser/all.hpp>
#include <seqan3/core/debug_stream.hpp>
 
void run_program(std::filesystem::path const & reference_path,
                 std::filesystem::path const & index_path)
{
    seqan3::debug_stream << "reference_file_path: " << reference_path << '\n';
    seqan3::debug_stream << "index_path           " << index_path << '\n';
}
 
struct cmd_arguments
{
    std::filesystem::path reference_path{};
    std::filesystem::path index_path{"out.index"};
};
 
void initialise_argument_parser(seqan3::argument_parser & parser, cmd_arguments & args)
{
    parser.info.author = "E. coli";
    parser.info.short_description = "Creates an index over a reference.";
    parser.info.version = "1.0.0";
    parser.add_option(args.reference_path, 'r', "reference", "The path to the reference.",
                      seqan3::option_spec::REQUIRED,
                      seqan3::input_file_validator{{"fa","fasta"}});
    parser.add_option(args.index_path, 'o', "output", "The output index file path.",
                      seqan3::option_spec::DEFAULT,
                      seqan3::output_file_validator{{"index"}});
}
 
int main(int argc, char const ** argv)
{
    seqan3::argument_parser parser("Indexer", argc, argv);
    cmd_arguments args{};
 
    initialise_argument_parser(parser, args);
 
    try
    {
        parser.parse();
    }
    catch (seqan3::argument_parser_error const & ext)
    {
        std::cerr << "[PARSER ERROR] " << ext.what() << '\n';
        return -1;
    }
 
    run_program(args.reference_path, args.index_path);
 
    return 0;
}

Constructing a subcommand argument parser

#include <seqan3/argument_parser/all.hpp>
 
// =====================================================================================================================
// pull
// =====================================================================================================================
 
struct pull_arguments
{
    std::string repository{};
    std::string branch{};
    bool progress{false};
};
 
int run_git_pull(seqan3::argument_parser & parser)
{
    pull_arguments args{};
 
    parser.add_positional_option(args.repository, "The repository name to pull from.");
    parser.add_positional_option(args.branch, "The branch name to pull from.");
 
    try
    {
        parser.parse();
    }
    catch (seqan3::argument_parser_error const & ext)
    {
        seqan3::debug_stream << "[Error git pull] " << ext.what() << "\n";
        return -1;
    }
 
    seqan3::debug_stream << "Git pull with repository " << args.repository << " and branch " << args.branch << '\n';
 
    return 0;
}
 
// =====================================================================================================================
// push
// =====================================================================================================================
 
struct push_arguments
{
    std::string repository{};
    std::vector<std::string> branches{};
    bool push_all{false};
};
 
int run_git_push(seqan3::argument_parser & parser)
{
    push_arguments args{};
 
    parser.add_positional_option(args.repository, "The repository name to push to.");
    parser.add_positional_option(args.branches, "The branch names to push (if none are given, push current).");
 
    try
    {
        parser.parse();
    }
    catch (seqan3::argument_parser_error const & ext)
    {
        seqan3::debug_stream << "[Error git push] " << ext.what() << "\n";
        return -1;
    }
 
    seqan3::debug_stream << "Git push with repository " << args.repository << " and branches " << args.branches << '\n';
 
    return 0;
}
 
// =====================================================================================================================
// main
// =====================================================================================================================
 
int main(int argc, char const ** argv)
{
    seqan3::argument_parser top_level_parser{"mygit", argc, argv, true, {"push", "pull"}};
 
    // Add information and flags to your top-level parser just as you would do with a normal one.
    // Note that all flags directed at the top-level parser must be specified BEFORE the subcommand key word.
    // Because of ambiguity, we do not allow any (positional) options for the top-level parser.
    top_level_parser.info.description.push_back("You can push or pull from a remote repository.");
    bool flag{false};
    top_level_parser.add_flag(flag, 'f', "flag", "some flag");
 
    try
    {
        top_level_parser.parse(); // trigger command line parsing
    }
    catch (seqan3::argument_parser_error const & ext) // catch user errors
    {
        seqan3::debug_stream << "[Error] " << ext.what() << "\n"; // customise your error message
        return -1;
    }
 
    seqan3::argument_parser & sub_parser = top_level_parser.get_sub_parser(); // hold a reference to the sub_parser
 
    std::cout << "Proceed to sub parser.\n";
 
    if (sub_parser.info.app_name == std::string_view{"mygit-pull"})
        run_git_pull(sub_parser);
    else if (sub_parser.info.app_name == std::string_view{"mygit-push"})
        run_git_push(sub_parser);
    else
        throw std::logic_error{"I do not know sub parser " + sub_parser.info.app_name};
}

Serialise data structures with cereal

#include <fstream>
#include <vector>
 
#include <cereal/archives/binary.hpp> // includes the cereal::BinaryInputArchive and cereal::BinaryOutputArchive
#include <cereal/types/vector.hpp>    // includes cerealisation support for std::vector
 
#include <seqan3/core/debug_stream.hpp>
#include <seqan3/test/tmp_filename.hpp>
 
// Written for std::vector, other types also work.
void load(std::vector<int16_t> & data, seqan3::test::tmp_filename & tmp_file)
{
    std::ifstream is(tmp_file.get_path(), std::ios::binary); // Where input can be found.
    cereal::BinaryInputArchive archive(is);                  // Create an input archive from the input stream.
    archive(data);                                           // Load data.
}
 
// Written for std::vector, other types also work.
void store(std::vector<int16_t> const & data, seqan3::test::tmp_filename & tmp_file)
{
    std::ofstream os(tmp_file.get_path(), std::ios::binary); // Where output should be stored.
    cereal::BinaryOutputArchive archive(os);                 // Create an ouput archive from the output stream.
    archive(data);                                           // Store data.
}
 
int main()
{
    // The following example is for an std::vector but any seqan3 data structure that is documented as serialisable
    // could be used, e.g. seqan3::fm_index.
    seqan3::test::tmp_filename tmp_file{"data.out"}; // This is a temporary file, use any other filename.
 
    std::vector<int16_t> vec{1,2,3,4};
    store(vec, tmp_file);                            // Calls store on a std::vector.
    // This vector is needed to load the information into it.
    std::vector<int16_t> vec2;
    load(vec2, tmp_file);                            // Calls load on a std::vector.
 
    seqan3::debug_stream << vec2 << '\n';             // Prints [1,2,3,4].
 
    return 0;
}

A custom dna4 alphabet that converts all unknown characters to <tt>A</tt>

When assigning from char or converting from a larger nucleotide alphabet to a smaller one, loss of information can occur since obviously some bases are not available. When converting to seqan3::dna5 or seqan3::rna5, non-canonical bases (letters other than A, C, G, T, U) are converted to ‘'N’to preserve ambiguity at that position. For seqan3::dna4 and seqan3::rna4 there is no letter'N'to represent ambiguity, so the conversion fromchar` for IUPAC characters tries to choose the best fitting alternative (see seqan3::dna4 for more details).

If you would like to always convert unknown characters to A instead, you can create your own alphabet with a respective char conversion table very easily like this:

#include <seqan3/alphabet/nucleotide/nucleotide_base.hpp>
#include <seqan3/core/debug_stream.hpp>
 
// We inherit from seqan3::nucleotide_base s.t. we do not need to implement the full nucleotide interface
// but it is sufficient to define `rank_to_char`, `char_to_rank`, and `complement_table`.
class my_dna4 : public seqan3::nucleotide_base<my_dna4, 4/*alphabet size is 4*/>
{
public:
    using nucleotide_base<my_dna4, 4>::nucleotide_base; // Use constructors of the base class.
 
private:
    // Value to char conversion table.
    static constexpr char_type rank_to_char[alphabet_size] {'A', 'C', 'G', 'T'}; // rank 0,1,2,3
 
    // Char-to-value conversion table. This is where char conversion for assign_char() is handled!
    static constexpr std::array<rank_type, 256> char_to_rank
    {
        [] () constexpr
        {
            std::array<rank_type, 256> conversion_table{}; // By default now, everything has rank 0 which equals `A`.
 
            conversion_table['C'] = conversion_table['c'] = 1;
            conversion_table['G'] = conversion_table['g'] = 2;
            conversion_table['T'] = conversion_table['t'] = 3;
            conversion_table['U'] = conversion_table['T']; // set U equal to T
            conversion_table['u'] = conversion_table['t']; // set u equal to t
 
            return conversion_table;
        }()
    };
 
    // The forward declaration of the complement table. With this, my_dna4 models seqan3::nucleotide_alphabet.
    static const std::array<my_dna4, alphabet_size> complement_table;
 
    friend nucleotide_base<my_dna4, 4>; // Grant seqan3::nucleotide_base access to private/protected members.
    friend nucleotide_base<my_dna4, 4>::base_t; // Grant seqan3::alphabet_base access to private/protected members.
};
 
// Defines the `_my_dna4` *char literal* so you can write `'C'_my_dna4` instead of `my_dna4{}.assign_char('C')`.
constexpr my_dna4 operator""_my_dna4(char const c) noexcept
{
    return my_dna4{}.assign_char(c);
}
 
// The definition of the complement table. With this, my_dna4 models seqan3::nucleotide_alphabet.
constexpr std::array<my_dna4, my_dna4::alphabet_size> my_dna4::complement_table
{
    'T'_my_dna4,    // the complement of 'A'_my_dna4
    'G'_my_dna4,    // the complement of 'C'_my_dna4
    'C'_my_dna4,    // the complement of 'G'_my_dna4
    'A'_my_dna4     // the complement of 'T'_my_dna4
};
 
int main()
{
    my_dna4 my_letter{'C'_my_dna4};
 
    my_letter.assign_char('S'); // Characters other than A,C,G,T are implicitly converted to `A`.
 
    seqan3::debug_stream << my_letter; // "A";
}

If you are interested in custom alphabets, also take a look at our tutorial How to write your own alphabet.

Table of Contents