SeqAn3  3.0.3
The Modern C++ library for sequence analysis.
simd_algorithm_avx512.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <array>
16 
21 
22 //-----------------------------------------------------------------------------
23 // forward declare avx512 simd algorithms that use avx512 intrinsics
24 //-----------------------------------------------------------------------------
25 
26 namespace seqan3::detail
27 {
31 template <simd::simd_concept simd_t>
32 constexpr simd_t load_avx512(void const * mem_addr);
33 
37 template <simd::simd_concept simd_t>
38 constexpr void store_avx512(void * mem_addr, simd_t const & simd_vec);
39 
43 template <simd::simd_concept simd_t>
44 inline void transpose_matrix_avx512(std::array<simd_t, simd_traits<simd_t>::length> & matrix);
45 
49 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
50 constexpr target_simd_t upcast_signed_avx512(source_simd_t const & src);
51 
55 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
56 constexpr target_simd_t upcast_unsigned_avx512(source_simd_t const & src);
57 
61 template <uint8_t index, simd::simd_concept simd_t>
62 constexpr simd_t extract_half_avx512(simd_t const & src);
63 
67 template <uint8_t index, simd::simd_concept simd_t>
68 constexpr simd_t extract_quarter_avx512(simd_t const & src);
69 
73 template <uint8_t index, simd::simd_concept simd_t>
74 constexpr simd_t extract_eighth_avx512(simd_t const & src);
75 
76 }
77 
78 //-----------------------------------------------------------------------------
79 // implementation
80 //-----------------------------------------------------------------------------
81 
82 #ifdef __AVX512F__
83 
84 namespace seqan3::detail
85 {
86 
87 template <simd::simd_concept simd_t>
88 constexpr simd_t load_avx512(void const * mem_addr)
89 {
90  return reinterpret_cast<simd_t>(_mm512_loadu_si512(mem_addr));
91 }
92 
93 template <simd::simd_concept simd_t>
94 constexpr void store_avx512(void * mem_addr, simd_t const & simd_vec)
95 {
96  _mm512_storeu_si512(mem_addr, reinterpret_cast<__m512i const &>(simd_vec));
97 }
98 
99 // TODO: not implemented and used yet, if you implement it don't forget to add it to seqan3::simd::transpose
100 template <simd::simd_concept simd_t>
101 inline void transpose_matrix_avx512(std::array<simd_t, simd_traits<simd_t>::length> & matrix);
102 
103 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
104 constexpr target_simd_t upcast_signed_avx512(source_simd_t const & src)
105 {
106  __m512i const & tmp = reinterpret_cast<__m512i const &>(src);
107  if constexpr (simd_traits<source_simd_t>::length == 64) // cast from epi8 ...
108  {
109  if constexpr (simd_traits<target_simd_t>::length == 32) // to epi16
110  return reinterpret_cast<target_simd_t>(_mm512_cvtepi8_epi16(_mm512_castsi512_si256(tmp)));
111  if constexpr (simd_traits<target_simd_t>::length == 16) // to epi32
112  return reinterpret_cast<target_simd_t>(_mm512_cvtepi8_epi32(_mm512_castsi512_si128(tmp)));
113  if constexpr (simd_traits<target_simd_t>::length == 8) // to epi64
114  return reinterpret_cast<target_simd_t>(_mm512_cvtepi8_epi64(_mm512_castsi512_si128(tmp)));
115  }
116  else if constexpr (simd_traits<source_simd_t>::length == 32) // cast from epi16 ...
117  {
118  if constexpr (simd_traits<target_simd_t>::length == 16) // to epi32
119  return reinterpret_cast<target_simd_t>(_mm512_cvtepi16_epi32(_mm512_castsi512_si256(tmp)));
120  if constexpr (simd_traits<target_simd_t>::length == 8) // to epi64
121  return reinterpret_cast<target_simd_t>(_mm512_cvtepi16_epi64(_mm512_castsi512_si128(tmp)));
122  }
123  else // cast from epi32 to epi64
124  {
125  static_assert(simd_traits<source_simd_t>::length == 16, "Expected 32 bit scalar type.");
126  return reinterpret_cast<target_simd_t>(_mm512_cvtepi32_epi64(_mm512_castsi512_si256(tmp)));
127  }
128 }
129 
130 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
131 constexpr target_simd_t upcast_unsigned_avx512(source_simd_t const & src)
132 {
133  __m512i const & tmp = reinterpret_cast<__m512i const &>(src);
134  if constexpr (simd_traits<source_simd_t>::length == 64) // cast from epi8 ...
135  {
136  if constexpr (simd_traits<target_simd_t>::length == 32) // to epi16
137  return reinterpret_cast<target_simd_t>(_mm512_cvtepu8_epi16(_mm512_castsi512_si256(tmp)));
138  if constexpr (simd_traits<target_simd_t>::length == 16) // to epi32
139  return reinterpret_cast<target_simd_t>(_mm512_cvtepu8_epi32(_mm512_castsi512_si128(tmp)));
140  if constexpr (simd_traits<target_simd_t>::length == 8) // to epi64
141  return reinterpret_cast<target_simd_t>(_mm512_cvtepu8_epi64(_mm512_castsi512_si128(tmp)));
142  }
143  else if constexpr (simd_traits<source_simd_t>::length == 32) // cast from epi16 ...
144  {
145  if constexpr (simd_traits<target_simd_t>::length == 16) // to epi32
146  return reinterpret_cast<target_simd_t>(_mm512_cvtepu16_epi32(_mm512_castsi512_si256(tmp)));
147  if constexpr (simd_traits<target_simd_t>::length == 8) // to epi64
148  return reinterpret_cast<target_simd_t>(_mm512_cvtepu16_epi64(_mm512_castsi512_si128(tmp)));
149  }
150  else // cast from epi32 to epi64
151  {
152  static_assert(simd_traits<source_simd_t>::length == 16, "Expected 32 bit scalar type.");
153  return reinterpret_cast<target_simd_t>(_mm512_cvtepu32_epi64(_mm512_castsi512_si256(tmp)));
154  }
155 }
156 
157 // TODO: not implemented and used yet, if you implement it don't forget to add it to seqan3::detail::extract_half
158 template <uint8_t index, simd::simd_concept simd_t>
159 constexpr simd_t extract_half_avx512(simd_t const & src);
160 
161 // TODO: not implemented and used yet, if you implement it don't forget to add it to seqan3::detail::extract_quarter
162 template <uint8_t index, simd::simd_concept simd_t>
163 constexpr simd_t extract_quarter_avx512(simd_t const & src);
164 
165 // TODO: not implemented and used yet, if you implement it don't forget to add it to seqan3::detail::extract_eighth
166 template <uint8_t index, simd::simd_concept simd_t>
167 constexpr simd_t extract_eighth_avx512(simd_t const & src);
168 
169 } // namespace seqan3::detail
170 
171 #endif // __AVX512F__
Provides seqan3::simd::simd_concept.
Provides seqan3::detail::builtin_simd, seqan3::detail::is_builtin_simd and seqan3::simd::simd_traits<...
Provides intrinsics include for builtin simd.
Provides seqan3::simd::simd_traits.