seqan/3-master-dev/algorithm_8hpp_source.html

// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin

// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik

// SPDX-License-Identifier: BSD-3-Clause


#pragma once


#include <array>

#include <cassert>

#include <concepts>

#include <utility>


#include <seqan3/utility/simd/concept.hpp>

#include <seqan3/utility/simd/detail/builtin_simd.hpp>

#include <seqan3/utility/simd/detail/simd_algorithm_avx2.hpp>

#include <seqan3/utility/simd/detail/simd_algorithm_avx512.hpp>

#include <seqan3/utility/simd/detail/simd_algorithm_sse4.hpp>

#include <seqan3/utility/simd/simd_traits.hpp>


namespace seqan3::detail

{


template <simd::simd_concept simd_t, size_t... I>


constexpr simd_t fill_impl(typename simd_traits<simd_t>::scalar_type const scalar, std::index_sequence<I...>) noexcept

{

    return simd_t{((void)I, scalar)...};

}


template <simd::simd_concept simd_t, typename scalar_t, scalar_t... I>


constexpr simd_t iota_impl(scalar_t const offset, std::integer_sequence<scalar_t, I...>)

{

    return simd_t{static_cast<scalar_t>(offset + I)...};

}


template <size_t divisor, simd_concept simd_t>


constexpr simd_t extract_impl(simd_t const & src, uint8_t const mask)

{

    simd_t dst{};

    constexpr size_t chunk = simd_traits<simd_t>::length / divisor;

    size_t offset = chunk * mask;

    for (size_t i = 0; i < chunk; ++i)

        dst[i] = src[i + offset];


    return dst;

}


template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>


constexpr target_simd_t upcast_signed(source_simd_t const & src)

{

    static_assert(simd_traits<target_simd_t>::max_length == simd_traits<source_simd_t>::max_length,

                  "Target vector has different byte size.");


    if constexpr (simd_traits<source_simd_t>::max_length == 16) // SSE4

        return upcast_signed_sse4<target_simd_t>(src);

    else if constexpr (simd_traits<source_simd_t>::max_length == 32) // AVX2

        return upcast_signed_avx2<target_simd_t>(src);

    else if constexpr (simd_traits<source_simd_t>::max_length == 64) // AVX512

        return upcast_signed_avx512<target_simd_t>(src);

    else

        static_assert(simd_traits<source_simd_t>::max_length <= 32, "simd type is not supported.");

}


template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>


constexpr target_simd_t upcast_unsigned(source_simd_t const & src)

{

    static_assert(simd_traits<target_simd_t>::max_length == simd_traits<source_simd_t>::max_length,

                  "Target vector has different byte size.");


    if constexpr (simd_traits<source_simd_t>::max_length == 16) // SSE4

        return upcast_unsigned_sse4<target_simd_t>(src);

    else if constexpr (simd_traits<source_simd_t>::max_length == 32) // AVX2

        return upcast_unsigned_avx2<target_simd_t>(src);

    else if constexpr (simd_traits<source_simd_t>::max_length == 64) // AVX512

        return upcast_unsigned_avx512<target_simd_t>(src);

    else

        static_assert(simd_traits<source_simd_t>::max_length <= 32, "simd type is not supported.");

}


template <uint8_t index, simd::simd_concept simd_t>


constexpr simd_t extract_half(simd_t const & src)

{

    static_assert(index < 2, "The index must be in the range of [0, 1]");


    return detail::extract_impl<2>(src, index);

}


template <uint8_t index, simd::simd_concept simd_t>

    requires detail::is_builtin_simd_v<simd_t> && detail::is_native_builtin_simd_v<simd_t>

constexpr simd_t extract_half(simd_t const & src)

{

    static_assert(index < 2, "The index must be in the range of [0, 1]");


    if constexpr (simd_traits<simd_t>::length < 2) // In case there are less elements available return unchanged value.

        return src;

    else if constexpr (simd_traits<simd_t>::max_length == 16) // SSE4

        return detail::extract_half_sse4<index>(src);

    else if constexpr (simd_traits<simd_t>::max_length == 32) // AVX2

        return detail::extract_half_avx2<index>(src);

    else if constexpr (simd_traits<simd_t>::max_length == 64) // AVX512

        return detail::extract_half_avx512<index>(src);

    else // Anything else

        return detail::extract_impl<2>(src, index);

}


template <uint8_t index, simd::simd_concept simd_t>


constexpr simd_t extract_quarter(simd_t const & src)

{

    static_assert(index < 4, "The index must be in the range of [0, 1, 2, 3]");


    return detail::extract_impl<4>(src, index);

}


template <uint8_t index, simd::simd_concept simd_t>

    requires detail::is_builtin_simd_v<simd_t> && detail::is_native_builtin_simd_v<simd_t>

constexpr simd_t extract_quarter(simd_t const & src)

{

    static_assert(index < 4, "The index must be in the range of [0, 1, 2, 3]");


    if constexpr (simd_traits<simd_t>::length < 4) // In case there are less elements available return unchanged value.

        return src;

    else if constexpr (simd_traits<simd_t>::max_length == 16) // SSE4

        return detail::extract_quarter_sse4<index>(src);

    else if constexpr (simd_traits<simd_t>::max_length == 32) // AVX2

        return detail::extract_quarter_avx2<index>(src);

#if defined(__AVX512DQ__)

    else if constexpr (simd_traits<simd_t>::max_length == 64) // AVX512

        return detail::extract_quarter_avx512<index>(src);

#endif   // defined(__AVX512DQ__)

    else // Anything else

        return detail::extract_impl<4>(src, index);

}


template <uint8_t index, simd::simd_concept simd_t>


constexpr simd_t extract_eighth(simd_t const & src)

{

    return detail::extract_impl<8>(src, index);

}


template <uint8_t index, simd::simd_concept simd_t>

    requires detail::is_builtin_simd_v<simd_t> && detail::is_native_builtin_simd_v<simd_t>

constexpr simd_t extract_eighth(simd_t const & src)

{

    static_assert(index < 8, "The index must be in the range of [0, 1, 2, 3, 4, 5, 6, 7]");


    if constexpr (simd_traits<simd_t>::length < 8) // In case there are less elements available return unchanged value.

        return src;

    else if constexpr (simd_traits<simd_t>::max_length == 16) // SSE4

        return detail::extract_eighth_sse4<index>(src);

    else if constexpr (simd_traits<simd_t>::max_length == 32) // AVX2

        return detail::extract_eighth_avx2<index>(src);

#if defined(__AVX512DQ__)

    else if constexpr (simd_traits<simd_t>::max_length == 64) // AVX512

        return detail::extract_eighth_avx512<index>(src);

#endif   // defined(__AVX512DQ__)

    else // Anything else

        return detail::extract_impl<8>(src, index);

}


template <simd::simd_concept simd_t>

constexpr void transpose(std::array<simd_t, simd_traits<simd_t>::length> & matrix)

{

    std::array<simd_t, simd_traits<simd_t>::length> tmp{};


    for (size_t i = 0; i < matrix.size(); ++i)

        for (size_t j = 0; j < matrix.size(); ++j)

            tmp[j][i] = matrix[i][j];


    std::swap(tmp, matrix);

}

} // namespace seqan3::detail


namespace seqan3

{


inline namespace simd

{


template <simd::simd_concept simd_t>


constexpr simd_t fill(typename simd_traits<simd_t>::scalar_type const scalar) noexcept

{

    constexpr size_t length = simd_traits<simd_t>::length;

    return detail::fill_impl<simd_t>(scalar, std::make_index_sequence<length>{});

}


template <simd::simd_concept simd_t>


constexpr simd_t iota(typename simd_traits<simd_t>::scalar_type const offset)

{

    constexpr size_t length = simd_traits<simd_t>::length;

    using scalar_type = typename simd_traits<simd_t>::scalar_type;

    return detail::iota_impl<simd_t>(offset, std::make_integer_sequence<scalar_type, length>{});

}


template <simd::simd_concept simd_t>


constexpr simd_t load(void const * mem_addr)

{

    assert(mem_addr != nullptr);

    simd_t tmp{};


    for (size_t i = 0; i < simd_traits<simd_t>::length; ++i)

        tmp[i] = *(static_cast<typename simd_traits<simd_t>::scalar_type const *>(mem_addr) + i);


    return tmp;

}


template <simd::simd_concept simd_t>

    requires detail::is_builtin_simd_v<simd_t> && detail::is_native_builtin_simd_v<simd_t>

constexpr simd_t load(void const * mem_addr)

{

    assert(mem_addr != nullptr);


    if constexpr (simd_traits<simd_t>::max_length == 16)

        return detail::load_sse4<simd_t>(mem_addr);

    else if constexpr (simd_traits<simd_t>::max_length == 32)

        return detail::load_avx2<simd_t>(mem_addr);

    else if constexpr (simd_traits<simd_t>::max_length == 64)

        return detail::load_avx512<simd_t>(mem_addr);

    else

        static_assert(simd_traits<simd_t>::max_length >= 16 && simd_traits<simd_t>::max_length <= 64,

                      "Unsupported simd type.");

}


template <simd::simd_concept simd_t>


constexpr void store(void * mem_addr, simd_t const & simd_vec)

{

    assert(mem_addr != nullptr);

    using scalar_t = typename simd_traits<simd_t>::scalar_type;


    for (size_t i = 0; i < simd_traits<simd_t>::length; ++i)

        *(static_cast<scalar_t *>(mem_addr) + i) = simd_vec[i];

}


template <simd::simd_concept simd_t>

    requires detail::is_builtin_simd_v<simd_t> && detail::is_native_builtin_simd_v<simd_t>

constexpr void store(void * mem_addr, simd_t const & simd_vec)

{

    assert(mem_addr != nullptr);


    if constexpr (simd_traits<simd_t>::max_length == 16)

        detail::store_sse4<simd_t>(mem_addr, simd_vec);

    else if constexpr (simd_traits<simd_t>::max_length == 32)

        detail::store_avx2<simd_t>(mem_addr, simd_vec);

    else if constexpr (simd_traits<simd_t>::max_length == 64)

        detail::store_avx512<simd_t>(mem_addr, simd_vec);

    else

        static_assert(simd_traits<simd_t>::max_length >= 16 && simd_traits<simd_t>::max_length <= 64,

                      "Unsupported simd type.");

}


template <simd::simd_concept simd_t>


constexpr void transpose(std::array<simd_t, simd_traits<simd_t>::length> & matrix)

{

    detail::transpose(matrix);

}


// Implementation for seqan builtin simd.

template <simd::simd_concept simd_t>

    requires detail::is_builtin_simd_v<simd_t> && detail::is_native_builtin_simd_v<simd_t>

          && (simd_traits<simd_t>::max_length == simd_traits<simd_t>::length)

constexpr void transpose(std::array<simd_t, simd_traits<simd_t>::length> & matrix)

{

    if constexpr (simd_traits<simd_t>::length == 16) // SSE4 implementation

        detail::transpose_matrix_sse4(matrix);

    else if constexpr (simd_traits<simd_t>::length == 32) // AVX2 implementation

        detail::transpose_matrix_avx2(matrix);

#if defined(__AVX512BW__)                                 // Requires byte-word extension of AVX512 instruction set.

    else if constexpr (simd_traits<simd_t>::length == 64) // AVX512 implementation

        detail::transpose_matrix_avx512(matrix);

#endif // defined(__AVX512BW__)

    else

        detail::transpose(matrix);

}


template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>


constexpr target_simd_t upcast(source_simd_t const & src)

{

    static_assert(

        simd_traits<target_simd_t>::length <= simd_traits<source_simd_t>::length,

        "The length of the target simd type must be greater or equal than the length of the source simd type.");


    target_simd_t tmp{};

    for (unsigned i = 0; i < simd_traits<target_simd_t>::length; ++i)

        tmp[i] = static_cast<typename simd_traits<target_simd_t>::scalar_type>(src[i]);


    return tmp;

}


template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>

    requires detail::is_builtin_simd_v<target_simd_t> && detail::is_builtin_simd_v<source_simd_t>

          && detail::is_native_builtin_simd_v<source_simd_t>

constexpr target_simd_t upcast(source_simd_t const & src)

{

    static_assert(

        simd_traits<target_simd_t>::length <= simd_traits<source_simd_t>::length,

        "The length of the target simd type must be greater or equal than the length of the source simd type.");


    if constexpr (simd_traits<source_simd_t>::length == simd_traits<target_simd_t>::length)

    {

        static_assert(simd_traits<target_simd_t>::max_length == simd_traits<source_simd_t>::max_length,

                      "Target vector has a different byte size.");

        return reinterpret_cast<target_simd_t>(src); // Same packing so we do not cast.

    }

    else if constexpr (std::signed_integral<typename simd_traits<source_simd_t>::scalar_type>)

    {

        return detail::upcast_signed<target_simd_t>(src);

    }

    else

    {

        static_assert(std::unsigned_integral<typename simd_traits<source_simd_t>::scalar_type>,

                      "Expected unsigned scalar type.");

        return detail::upcast_unsigned<target_simd_t>(src);

    }

}


} // namespace simd


} // namespace seqan3

array

builtin_simd.hpp
Provides seqan3::detail::builtin_simd, seqan3::detail::is_builtin_simd and seqan3::simd::simd_traits<...

cassert

seqan3::mask
Implementation of a masked alphabet to be used for tuple composites.
Definition mask.hpp:35

concepts

seqan3::field::offset
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.

seqan3::simd::transpose
constexpr void transpose(std::array< simd_t, simd_traits< simd_t >::length > &matrix)
Transposes the given simd vector matrix.
Definition algorithm.hpp:420

seqan3::detail::extract_impl
constexpr simd_t extract_impl(simd_t const &src, uint8_t const mask)
Helper function to extract a part of the given simd vector.
Definition algorithm.hpp:58

seqan3::simd::upcast
constexpr target_simd_t upcast(source_simd_t const &src)
Upcasts the given vector into the target vector using sign extension of packed values.
Definition algorithm.hpp:454

seqan3::simd::iota
constexpr simd_t iota(typename simd_traits< simd_t >::scalar_type const offset)
Fills a seqan3::simd::simd_type vector with the scalar values offset, offset+1, offset+2,...
Definition algorithm.hpp:316

seqan3::detail::extract_eighth
constexpr simd_t extract_eighth(simd_t const &src)
Extracts one eighth of the given simd vector and stores it in the lower eighth of the target vector.
Definition algorithm.hpp:242

seqan3::detail::iota_impl
constexpr simd_t iota_impl(scalar_t const offset, std::integer_sequence< scalar_t, I... >)
Helper function for seqan3::simd::iota.
Definition algorithm.hpp:38

seqan3::detail::fill_impl
constexpr simd_t fill_impl(typename simd_traits< simd_t >::scalar_type const scalar, std::index_sequence< I... >) noexcept
Helper function for seqan3::simd::fill.
Definition algorithm.hpp:30

seqan3::detail::extract_quarter
constexpr simd_t extract_quarter(simd_t const &src)
Extracts one quarter of the given simd vector and stores it in the lower quarter of the target vector...
Definition algorithm.hpp:190

seqan3::simd::store
constexpr void store(void *mem_addr, simd_t const &simd_vec)
Store simd_t size bits of integral data into memory.
Definition algorithm.hpp:374

seqan3::simd::load
constexpr simd_t load(void const *mem_addr)
Load simd_t size bits of integral data from memory.
Definition algorithm.hpp:333

seqan3::detail::upcast_unsigned
constexpr target_simd_t upcast_unsigned(source_simd_t const &src)
Upcasts the given vector into the target vector using unsigned extension of packed values.
Definition algorithm.hpp:102

seqan3::simd::fill
constexpr simd_t fill(typename simd_traits< simd_t >::scalar_type const scalar) noexcept
Fills a seqan3::simd::simd_type vector with a scalar value.
Definition algorithm.hpp:300

seqan3::detail::extract_half
constexpr simd_t extract_half(simd_t const &src)
Extracts one half of the given simd vector and stores the result in the lower half of the target vect...
Definition algorithm.hpp:140

seqan3::detail::upcast_signed
constexpr target_simd_t upcast_signed(source_simd_t const &src)
Upcasts the given vector into the target vector using signed extension of packed values.
Definition algorithm.hpp:78

std::index_sequence

seqan3::detail
The internal SeqAn3 namespace.
Definition aligned_sequence_concept.hpp:26

seqan3::detail::transpose_matrix_avx512
void transpose_matrix_avx512(std::array< simd_t, simd_traits< simd_t >::length > &matrix)
Transposes the given simd vector matrix.

seqan3::detail::transpose_matrix_sse4
void transpose_matrix_sse4(std::array< simd_t, simd_traits< simd_t >::length > &matrix)
Transposes the given simd vector matrix.

seqan3::detail::transpose_matrix_avx2
void transpose_matrix_avx2(std::array< simd_t, simd_traits< simd_t >::length > &matrix)
Transposes the given simd vector matrix.

seqan3
The main SeqAn3 namespace.
Definition aligned_sequence_concept.hpp:26

simd_algorithm_avx2.hpp
Provides specific algorithm implementations for AVX2 instruction set.

simd_algorithm_avx512.hpp
Provides specific algorithm implementations for AVX512 instruction set.

simd_algorithm_sse4.hpp
Provides specific algorithm implementations for SSE4 instruction set.

simd_traits.hpp
Provides seqan3::simd::simd_traits.

seqan3::simd_traits
seqan3::simd::simd_traits is the trait class that provides uniform interface to the properties of sim...
Definition simd_traits.hpp:38

seqan3::simd::simd_traits::length
static constexpr auto length
The number of packed values in a simd vector (is not defined if simd_t does not model seqan3::simd::s...
Definition simd_traits.hpp:46

seqan3::simd::simd_traits::max_length
static constexpr auto max_length
The maximum number of packable values in a simd vector, if the underlying type would be [u]int8_t (is...
Definition simd_traits.hpp:51

std::swap
T swap(T... args)

concept.hpp
Provides seqan3::simd::simd_concept.

utility