SeqAn3  3.0.2
The Modern C++ library for sequence analysis.
simd_algorithm.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <array>
16 #include <utility>
17 
24 #include <seqan3/std/concepts>
25 
26 namespace seqan3::detail
27 {
28 
31 template <simd::simd_concept simd_t, size_t... I>
32 constexpr simd_t fill_impl(typename simd_traits<simd_t>::scalar_type const scalar, std::index_sequence<I...>) noexcept
33 {
34  return simd_t{((void)I, scalar)...};
35 }
36 
39 template <simd::simd_concept simd_t, typename scalar_t, scalar_t... I>
40 constexpr simd_t iota_impl(scalar_t const offset, std::integer_sequence<scalar_t, I...>)
41 {
42  return simd_t{static_cast<scalar_t>(offset + I)...};
43 }
44 
59 template <size_t divisor, simd_concept simd_t>
60 constexpr simd_t extract_impl(simd_t const & src, uint8_t const mask)
61 {
62  simd_t dst{};
63  constexpr size_t chunk = simd_traits<simd_t>::length / divisor;
64  size_t offset = chunk * mask;
65  for (size_t i = 0; i < chunk; ++i)
66  dst[i] = src[i + offset];
67 
68  return dst;
69 }
70 
79 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
80 constexpr target_simd_t upcast_signed(source_simd_t const & src)
81 {
82  static_assert(simd_traits<target_simd_t>::max_length == simd_traits<source_simd_t>::max_length,
83  "Target vector has different byte size.");
84 
85  if constexpr (simd_traits<source_simd_t>::max_length == 16) // SSE4
86  return upcast_signed_sse4<target_simd_t>(src);
87  else if constexpr (simd_traits<source_simd_t>::max_length == 32) // AVX2
88  return upcast_signed_avx2<target_simd_t>(src);
89  else if constexpr (simd_traits<source_simd_t>::max_length == 64) // AVX512
90  return upcast_signed_avx512<target_simd_t>(src);
91  else
92  static_assert(simd_traits<source_simd_t>::max_length <= 32, "simd type is not supported.");
93 }
94 
103 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
104 constexpr target_simd_t upcast_unsigned(source_simd_t const & src)
105 {
106  static_assert(simd_traits<target_simd_t>::max_length == simd_traits<source_simd_t>::max_length,
107  "Target vector has different byte size.");
108 
109  if constexpr (simd_traits<source_simd_t>::max_length == 16) // SSE4
110  return upcast_unsigned_sse4<target_simd_t>(src);
111  else if constexpr (simd_traits<source_simd_t>::max_length == 32) // AVX2
112  return upcast_unsigned_avx2<target_simd_t>(src);
113  else if constexpr (simd_traits<source_simd_t>::max_length == 64) // AVX512
114  return upcast_unsigned_avx512<target_simd_t>(src);
115  else
116  static_assert(simd_traits<source_simd_t>::max_length <= 32, "simd type is not supported.");
117 }
118 
141 template <uint8_t index, simd::simd_concept simd_t>
142 constexpr simd_t extract_half(simd_t const & src)
143 {
144  static_assert(index < 2, "The index must be in the range of [0, 1]");
145 
146  return detail::extract_impl<2>(src, index);
147 }
148 
150 template <uint8_t index, simd::simd_concept simd_t>
151  requires detail::is_builtin_simd_v<simd_t> &&
152  detail::is_native_builtin_simd_v<simd_t>
153 constexpr simd_t extract_half(simd_t const & src)
154 {
155  static_assert(index < 2, "The index must be in the range of [0, 1]");
156 
157  if constexpr (simd_traits<simd_t>::length < 2) // In case there are less elements available return unchanged value.
158  return src;
159  else if constexpr (simd_traits<simd_t>::max_length == 16) // SSE4
160  return detail::extract_half_sse4<index>(src);
161  else if constexpr (simd_traits<simd_t>::max_length == 32) // AVX2
162  return detail::extract_half_avx2<index>(src);
163  else // Anything else
164  return detail::extract_impl<2>(src, index);
165 }
167 
190 template <uint8_t index, simd::simd_concept simd_t>
191 constexpr simd_t extract_quarter(simd_t const & src)
192 {
193  static_assert(index < 4, "The index must be in the range of [0, 1, 2, 3]");
194 
195  return detail::extract_impl<4>(src, index);
196 }
197 
199 template <uint8_t index, simd::simd_concept simd_t>
200  requires detail::is_builtin_simd_v<simd_t> &&
201  detail::is_native_builtin_simd_v<simd_t>
202 constexpr simd_t extract_quarter(simd_t const & src)
203 {
204  static_assert(index < 4, "The index must be in the range of [0, 1, 2, 3]");
205 
206  if constexpr (simd_traits<simd_t>::length < 4) // In case there are less elements available return unchanged value.
207  return src;
208  else if constexpr (simd_traits<simd_t>::max_length == 16) // SSE4
209  return detail::extract_quarter_sse4<index>(src);
210  else if constexpr (simd_traits<simd_t>::max_length == 32) // AVX2
211  return detail::extract_quarter_avx2<index>(src);
212  else // Anything else
213  return detail::extract_impl<4>(src, index);
214 }
216 
239 template <uint8_t index, simd::simd_concept simd_t>
240 constexpr simd_t extract_eighth(simd_t const & src)
241 {
242  return detail::extract_impl<8>(src, index);
243 }
244 
246 template <uint8_t index, simd::simd_concept simd_t>
247  requires detail::is_builtin_simd_v<simd_t> &&
248  detail::is_native_builtin_simd_v<simd_t>
249 constexpr simd_t extract_eighth(simd_t const & src)
250 {
251  static_assert(index < 8, "The index must be in the range of [0, 1, 2, 3, 4, 5, 6, 7]");
252 
253  if constexpr (simd_traits<simd_t>::length < 8) // In case there are less elements available return unchanged value.
254  return src;
255  else if constexpr (simd_traits<simd_t>::max_length == 16) // SSE4
256  return detail::extract_eighth_sse4<index>(src);
257  else if constexpr (simd_traits<simd_t>::max_length == 32) // AVX2
258  return detail::extract_eighth_avx2<index>(src);
259  else // Anything else
260  return detail::extract_impl<8>(src, index);
261 }
263 
264 } // namespace seqan3::detail
265 
266 namespace seqan3
267 {
268 
269 inline namespace simd
270 {
271 
281 template <simd::simd_concept simd_t>
282 constexpr simd_t fill(typename simd_traits<simd_t>::scalar_type const scalar) noexcept
283 {
284  constexpr size_t length = simd_traits<simd_t>::length;
285  return detail::fill_impl<simd_t>(scalar, std::make_index_sequence<length>{});
286 }
287 
297 template <simd::simd_concept simd_t>
298 constexpr simd_t iota(typename simd_traits<simd_t>::scalar_type const offset)
299 {
300  constexpr size_t length = simd_traits<simd_t>::length;
301  using scalar_type = typename simd_traits<simd_t>::scalar_type;
302  return detail::iota_impl<simd_t>(offset, std::make_integer_sequence<scalar_type, length>{});
303 }
304 
314 template <simd::simd_concept simd_t>
315 constexpr simd_t load(void const * mem_addr)
316 {
317  assert(mem_addr != nullptr);
318  simd_t tmp{};
319 
320  for (size_t i = 0; i < simd_traits<simd_t>::length; ++i)
321  tmp[i] = *(static_cast<typename simd_traits<simd_t>::scalar_type const *>(mem_addr) + i);
322 
323  return tmp;
324 }
325 
327 template <simd::simd_concept simd_t>
328  requires detail::is_builtin_simd_v<simd_t> &&
329  detail::is_native_builtin_simd_v<simd_t>
330 constexpr simd_t load(void const * mem_addr)
331 {
332  assert(mem_addr != nullptr);
333 
334  if constexpr (simd_traits<simd_t>::max_length == 16)
335  return detail::load_sse4<simd_t>(mem_addr);
336  else if constexpr (simd_traits<simd_t>::max_length == 32)
337  return detail::load_avx2<simd_t>(mem_addr);
338  else if constexpr (simd_traits<simd_t>::max_length == 64)
339  return detail::load_avx512<simd_t>(mem_addr);
340  else
341  static_assert(simd_traits<simd_t>::max_length >= 16 && simd_traits<simd_t>::max_length <= 64,
342  "Unsupported simd type.");
343 }
345 
363 template <simd::simd_concept simd_t>
364 constexpr void transpose(std::array<simd_t, simd_traits<simd_t>::length> & matrix)
365 {
367 
368  for (size_t i = 0; i < matrix.size(); ++i)
369  for (size_t j = 0; j < matrix.size(); ++j)
370  tmp[j][i] = matrix[i][j];
371 
372  std::swap(tmp, matrix);
373 }
374 
376 // Implementation for seqan builtin simd.
377 template <simd::simd_concept simd_t>
378  requires detail::is_builtin_simd_v<simd_t> &&
379  detail::is_native_builtin_simd_v<simd_t> &&
380  (simd_traits<simd_t>::max_length == simd_traits<simd_t>::length)
381 constexpr void transpose(std::array<simd_t, simd_traits<simd_t>::length> & matrix)
382 {
383  if constexpr (simd_traits<simd_t>::length == 16) // SSE4 implementation
384  detail::transpose_matrix_sse4(matrix);
385  else if constexpr (simd_traits<simd_t>::length == 32) // AVX2 implementation
386  detail::transpose_matrix_avx2(matrix);
387  else
388  transpose(matrix);
389 }
391 
400 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
401 constexpr target_simd_t upcast(source_simd_t const & src)
402 {
403  static_assert(simd_traits<target_simd_t>::length <= simd_traits<source_simd_t>::length,
404  "The length of the target simd type must be greater or equal than the length of the source simd type.");
405 
406  target_simd_t tmp{};
407  for (unsigned i = 0; i < simd_traits<target_simd_t>::length; ++i)
408  tmp[i] = static_cast<typename simd_traits<target_simd_t>::scalar_type>(src[i]);
409 
410  return tmp;
411 }
412 
414 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
415  requires detail::is_builtin_simd_v<target_simd_t> &&
416  detail::is_builtin_simd_v<source_simd_t> &&
417  detail::is_native_builtin_simd_v<source_simd_t>
418 constexpr target_simd_t upcast(source_simd_t const & src)
419 {
420  static_assert(simd_traits<target_simd_t>::length <= simd_traits<source_simd_t>::length,
421  "The length of the target simd type must be greater or equal than the length of the source simd type.");
422 
423  if constexpr (simd_traits<source_simd_t>::length == simd_traits<target_simd_t>::length)
424  {
425  static_assert(simd_traits<target_simd_t>::max_length == simd_traits<source_simd_t>::max_length,
426  "Target vector has a different byte size.");
427  return reinterpret_cast<target_simd_t>(src); // Same packing so we do not cast.
428  }
429  else if constexpr (std::signed_integral<typename simd_traits<source_simd_t>::scalar_type>)
430  {
431  return detail::upcast_signed<target_simd_t>(src);
432  }
433  else
434  {
435  static_assert(std::unsigned_integral<typename simd_traits<source_simd_t>::scalar_type>,
436  "Expected unsigned scalar type.");
437  return detail::upcast_unsigned<target_simd_t>(src);
438  }
439 }
441 
442 } // inline namespace simd
443 
444 } // namespace seqan3
simd_algorithm_avx2.hpp
Provides specific algorithm implementations for AVX2 instruction set.
utility
std::index_sequence
builtin_simd.hpp
Provides seqan3::detail::builtin_simd, seqan3::detail::is_builtin_simd and seqan3::simd::simd_traits<...
concept.hpp
Provides seqan3::simd::simd_concept.
std::fill
T fill(T... args)
concepts
The Concepts library.
simd_algorithm_sse4.hpp
Provides specific algorithm implementations for SSE4 instruction set.
simd_traits.hpp
Provides seqan3::simd::simd_traits.
array
seqan3
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
std::swap
T swap(T... args)
simd_algorithm_avx512.hpp
Provides specific algorithm implementations for AVX512 instruction set.
std
SeqAn specific customisations in the standard namespace.
std::iota
T iota(T... args)