SeqAn3  3.0.1
The Modern C++ library for sequence analysis.
simd_algorithm.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <array>
16 #include <utility>
17 
24 #include <seqan3/std/concepts>
25 
26 namespace seqan3::detail
27 {
28 
31 template <simd::simd_concept simd_t, size_t... I>
32 constexpr simd_t fill_impl(typename simd_traits<simd_t>::scalar_type const scalar, std::index_sequence<I...>) noexcept
33 {
34  return simd_t{((void)I, scalar)...};
35 }
36 
39 template <simd::simd_concept simd_t, typename scalar_t, scalar_t... I>
40 constexpr simd_t iota_impl(scalar_t const offset, std::integer_sequence<scalar_t, I...>)
41 {
42  return simd_t{static_cast<scalar_t>(offset + I)...};
43 }
44 
59 template <size_t divisor, simd_concept simd_t>
60 constexpr simd_t extract_impl(simd_t const & src, uint8_t const mask)
61 {
62  simd_t dst{};
63  constexpr size_t chunk = simd_traits<simd_t>::length / divisor;
64  size_t offset = chunk * mask;
65  for (size_t i = 0; i < chunk; ++i)
66  dst[i] = src[i + offset];
67 
68  return dst;
69 }
70 
79 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
80 constexpr target_simd_t upcast_signed(source_simd_t const & src)
81 {
82  static_assert(simd_traits<target_simd_t>::max_length == simd_traits<source_simd_t>::max_length,
83  "Target vector has different byte size.");
84 
85  if constexpr (simd_traits<source_simd_t>::max_length == 16) // SSE4
86  return upcast_signed_sse4<target_simd_t>(src);
87  else if constexpr (simd_traits<source_simd_t>::max_length == 32) // AVX2
88  return upcast_signed_avx2<target_simd_t>(src);
89  else if constexpr (simd_traits<source_simd_t>::max_length == 64) // AVX512
90  return upcast_signed_avx512<target_simd_t>(src);
91  else
92  static_assert(simd_traits<source_simd_t>::max_length <= 32, "simd type is not supported.");
93 }
94 
103 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
104 constexpr target_simd_t upcast_unsigned(source_simd_t const & src)
105 {
106  static_assert(simd_traits<target_simd_t>::max_length == simd_traits<source_simd_t>::max_length,
107  "Target vector has different byte size.");
108 
109  if constexpr (simd_traits<source_simd_t>::max_length == 16) // SSE4
110  return upcast_unsigned_sse4<target_simd_t>(src);
111  else if constexpr (simd_traits<source_simd_t>::max_length == 32) // AVX2
112  return upcast_unsigned_avx2<target_simd_t>(src);
113  else if constexpr (simd_traits<source_simd_t>::max_length == 64) // AVX512
114  return upcast_unsigned_avx512<target_simd_t>(src);
115  else
116  static_assert(simd_traits<source_simd_t>::max_length <= 32, "simd type is not supported.");
117 }
118 
141 template <uint8_t index, simd::simd_concept simd_t>
142 constexpr simd_t extract_halve(simd_t const & src)
143 {
144  static_assert(index < 2, "The index must be in the range of [0, 1]");
145 
146  return detail::extract_impl<2>(src, index);
147 }
148 
150 template <uint8_t index, simd::simd_concept simd_t>
151  requires detail::is_builtin_simd_v<simd_t> &&
152  detail::is_native_builtin_simd_v<simd_t> &&
153  simd_traits<simd_t>::max_length == 16
154 constexpr simd_t extract_halve(simd_t const & src)
155 {
156  static_assert(index < 2, "The index must be in the range of [0, 1]");
157 
158  if constexpr (simd_traits<simd_t>::length < 2) // In case there are less elements available return unchanged value.
159  return src;
160  else // if constexpr (simd_traits<simd_t>::max_length == 16) // SSE4
161  return detail::extract_halve_sse4<index>(src);
162 }
164 
187 template <uint8_t index, simd::simd_concept simd_t>
188 constexpr simd_t extract_quarter(simd_t const & src)
189 {
190  static_assert(index < 4, "The index must be in the range of [0, 1, 2, 3]");
191 
192  return detail::extract_impl<4>(src, index);
193 }
194 
196 template <uint8_t index, simd::simd_concept simd_t>
197  requires detail::is_builtin_simd_v<simd_t> &&
198  detail::is_native_builtin_simd_v<simd_t> &&
199  simd_traits<simd_t>::max_length == 16
200 constexpr simd_t extract_quarter(simd_t const & src)
201 {
202  static_assert(index < 4, "The index must be in the range of [0, 1, 2, 3]");
203 
204  if constexpr (simd_traits<simd_t>::length < 4) // In case there are less elements available return unchanged value.
205  return src;
206  else // if constexpr (simd_traits<simd_t>::max_length == 16) // SSE4
207  return detail::extract_quarter_sse4<index>(src);
208 }
210 
233 template <uint8_t index, simd::simd_concept simd_t>
234 constexpr simd_t extract_eighth(simd_t const & src)
235 {
236  return detail::extract_impl<8>(src, index);
237 }
238 
240 template <uint8_t index, simd::simd_concept simd_t>
241  requires detail::is_builtin_simd_v<simd_t> &&
242  detail::is_native_builtin_simd_v<simd_t> &&
243  simd_traits<simd_t>::max_length == 16
244 constexpr simd_t extract_eighth(simd_t const & src)
245 {
246  static_assert(index < 8, "The index must be in the range of [0, 1, 2, 3, 4, 5, 6, 7]");
247 
248  if constexpr (simd_traits<simd_t>::length < 8) // In case there are less elements available return unchanged value.
249  return src;
250  else // if constexpr (simd_traits<simd_t>::max_length == 16) // SSE4
251  return detail::extract_eighth_sse4<index>(src);
252 }
254 
255 } // namespace seqan3::detail
256 
257 namespace seqan3
258 {
259 
260 inline namespace simd
261 {
262 
272 template <simd::simd_concept simd_t>
273 constexpr simd_t fill(typename simd_traits<simd_t>::scalar_type const scalar) noexcept
274 {
275  constexpr size_t length = simd_traits<simd_t>::length;
276  return detail::fill_impl<simd_t>(scalar, std::make_index_sequence<length>{});
277 }
278 
288 template <simd::simd_concept simd_t>
289 constexpr simd_t iota(typename simd_traits<simd_t>::scalar_type const offset)
290 {
291  constexpr size_t length = simd_traits<simd_t>::length;
292  using scalar_type = typename simd_traits<simd_t>::scalar_type;
293  return detail::iota_impl<simd_t>(offset, std::make_integer_sequence<scalar_type, length>{});
294 }
295 
305 template <simd::simd_concept simd_t>
306 constexpr simd_t load(void const * mem_addr)
307 {
308  assert(mem_addr != nullptr);
309  simd_t tmp{};
310 
311  for (size_t i = 0; i < simd_traits<simd_t>::length; ++i)
312  tmp[i] = *(static_cast<typename simd_traits<simd_t>::scalar_type const *>(mem_addr) + i);
313 
314  return tmp;
315 }
316 
318 template <simd::simd_concept simd_t>
319  requires detail::is_builtin_simd_v<simd_t> &&
320  detail::is_native_builtin_simd_v<simd_t>
321 constexpr simd_t load(void const * mem_addr)
322 {
323  assert(mem_addr != nullptr);
324 
325  if constexpr (simd_traits<simd_t>::max_length == 16)
326  return detail::load_sse4<simd_t>(mem_addr);
327  else if constexpr (simd_traits<simd_t>::max_length == 32)
328  return detail::load_avx2<simd_t>(mem_addr);
329  else if constexpr (simd_traits<simd_t>::max_length == 64)
330  return detail::load_avx512<simd_t>(mem_addr);
331  else
332  static_assert(simd_traits<simd_t>::max_length >= 16 && simd_traits<simd_t>::max_length <= 64,
333  "Unsupported simd type.");
334 }
336 
354 template <simd::simd_concept simd_t>
355 constexpr void transpose(std::array<simd_t, simd_traits<simd_t>::length> & matrix)
356 {
358 
359  for (size_t i = 0; i < matrix.size(); ++i)
360  for (size_t j = 0; j < matrix.size(); ++j)
361  tmp[j][i] = matrix[i][j];
362 
363  std::swap(tmp, matrix);
364 }
365 
367 template <simd::simd_concept simd_t>
368  requires detail::is_builtin_simd_v<simd_t> &&
369  detail::is_native_builtin_simd_v<simd_t> &&
370  simd_traits<simd_t>::max_length == 16 &&
371  simd_traits<simd_t>::length == 16
372 constexpr void transpose(std::array<simd_t, simd_traits<simd_t>::length> & matrix)
373 {
374  detail::transpose_matrix_sse4(matrix);
375 }
377 
386 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
387 constexpr target_simd_t upcast(source_simd_t const & src)
388 {
389  static_assert(simd_traits<target_simd_t>::length <= simd_traits<source_simd_t>::length,
390  "The length of the target simd type must be greater or equal than the length of the source simd type.");
391 
392  target_simd_t tmp{};
393  for (unsigned i = 0; i < simd_traits<target_simd_t>::length; ++i)
394  tmp[i] = static_cast<typename simd_traits<target_simd_t>::scalar_type>(src[i]);
395 
396  return tmp;
397 }
398 
400 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
401  requires detail::is_builtin_simd_v<target_simd_t> &&
402  detail::is_builtin_simd_v<source_simd_t> &&
403  detail::is_native_builtin_simd_v<source_simd_t>
404 constexpr target_simd_t upcast(source_simd_t const & src)
405 {
406  static_assert(simd_traits<target_simd_t>::length <= simd_traits<source_simd_t>::length,
407  "The length of the target simd type must be greater or equal than the length of the source simd type.");
408 
409  if constexpr (simd_traits<source_simd_t>::length == simd_traits<target_simd_t>::length)
410  {
411  static_assert(simd_traits<target_simd_t>::max_length == simd_traits<source_simd_t>::max_length,
412  "Target vector has a different byte size.");
413  return reinterpret_cast<target_simd_t>(src); // Same packing so we do not cast.
414  }
415  else if constexpr (std::signed_integral<typename simd_traits<source_simd_t>::scalar_type>)
416  {
417  return detail::upcast_signed<target_simd_t>(src);
418  }
419  else
420  {
421  static_assert(std::unsigned_integral<typename simd_traits<source_simd_t>::scalar_type>,
422  "Expected unsigned scalar type.");
423  return detail::upcast_unsigned<target_simd_t>(src);
424  }
425 }
427 
428 } // inline namespace simd
429 
430 } // namespace seqan3
simd_algorithm_avx2.hpp
Provides specific algorithm implementations for AVX2 instruction set.
unsigned_integral
The concept std::unsigned_integral is satisfied if and only if T is an integral type and std::is_sign...
utility
seqan3::field::offset
Sequence (SEQ) relative start position (0-based), unsigned value.
std::index_sequence
builtin_simd.hpp
Provides seqan3::detail::builtin_simd, seqan3::detail::is_builtin_simd and seqan3::simd::simd_traits<...
signed_integral
The concept std::signed_integral is satisfied if and only if T is an integral type and std::is_signed...
concept.hpp
Provides seqan3::simd::simd_concept.
std::fill
T fill(T... args)
concepts
The Concepts library.
simd_algorithm_sse4.hpp
Provides specific algorithm implementations for SSE4 instruction set.
simd_traits.hpp
Provides seqan3::simd::simd_traits.
array
seqan3
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:36
std::swap
T swap(T... args)
simd_algorithm_avx512.hpp
Provides specific algorithm implementations for AVX512 instruction set.
std
SeqAn specific customisations in the standard namespace.
std::iota
T iota(T... args)