SeqAn3  3.1.0
The Modern C++ library for sequence analysis.
algorithm.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <array>
16 #include <cassert>
17 #include <seqan3/std/concepts>
18 #include <utility>
19 
26 
27 namespace seqan3::detail
28 {
29 
32 template <simd::simd_concept simd_t, size_t... I>
33 constexpr simd_t fill_impl(typename simd_traits<simd_t>::scalar_type const scalar, std::index_sequence<I...>) noexcept
34 {
35  return simd_t{((void)I, scalar)...};
36 }
37 
40 template <simd::simd_concept simd_t, typename scalar_t, scalar_t... I>
41 constexpr simd_t iota_impl(scalar_t const offset, std::integer_sequence<scalar_t, I...>)
42 {
43  return simd_t{static_cast<scalar_t>(offset + I)...};
44 }
45 
60 template <size_t divisor, simd_concept simd_t>
61 constexpr simd_t extract_impl(simd_t const & src, uint8_t const mask)
62 {
63  simd_t dst{};
64  constexpr size_t chunk = simd_traits<simd_t>::length / divisor;
65  size_t offset = chunk * mask;
66  for (size_t i = 0; i < chunk; ++i)
67  dst[i] = src[i + offset];
68 
69  return dst;
70 }
71 
80 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
81 constexpr target_simd_t upcast_signed(source_simd_t const & src)
82 {
83  static_assert(simd_traits<target_simd_t>::max_length == simd_traits<source_simd_t>::max_length,
84  "Target vector has different byte size.");
85 
86  if constexpr (simd_traits<source_simd_t>::max_length == 16) // SSE4
87  return upcast_signed_sse4<target_simd_t>(src);
88  else if constexpr (simd_traits<source_simd_t>::max_length == 32) // AVX2
89  return upcast_signed_avx2<target_simd_t>(src);
90  else if constexpr (simd_traits<source_simd_t>::max_length == 64) // AVX512
91  return upcast_signed_avx512<target_simd_t>(src);
92  else
93  static_assert(simd_traits<source_simd_t>::max_length <= 32, "simd type is not supported.");
94 }
95 
104 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
105 constexpr target_simd_t upcast_unsigned(source_simd_t const & src)
106 {
107  static_assert(simd_traits<target_simd_t>::max_length == simd_traits<source_simd_t>::max_length,
108  "Target vector has different byte size.");
109 
110  if constexpr (simd_traits<source_simd_t>::max_length == 16) // SSE4
111  return upcast_unsigned_sse4<target_simd_t>(src);
112  else if constexpr (simd_traits<source_simd_t>::max_length == 32) // AVX2
113  return upcast_unsigned_avx2<target_simd_t>(src);
114  else if constexpr (simd_traits<source_simd_t>::max_length == 64) // AVX512
115  return upcast_unsigned_avx512<target_simd_t>(src);
116  else
117  static_assert(simd_traits<source_simd_t>::max_length <= 32, "simd type is not supported.");
118 }
119 
142 template <uint8_t index, simd::simd_concept simd_t>
143 constexpr simd_t extract_half(simd_t const & src)
144 {
145  static_assert(index < 2, "The index must be in the range of [0, 1]");
146 
147  return detail::extract_impl<2>(src, index);
148 }
149 
151 template <uint8_t index, simd::simd_concept simd_t>
152  requires detail::is_builtin_simd_v<simd_t> &&
153  detail::is_native_builtin_simd_v<simd_t>
154 constexpr simd_t extract_half(simd_t const & src)
155 {
156  static_assert(index < 2, "The index must be in the range of [0, 1]");
157 
158  if constexpr (simd_traits<simd_t>::length < 2) // In case there are less elements available return unchanged value.
159  return src;
160  else if constexpr (simd_traits<simd_t>::max_length == 16) // SSE4
161  return detail::extract_half_sse4<index>(src);
162  else if constexpr (simd_traits<simd_t>::max_length == 32) // AVX2
163  return detail::extract_half_avx2<index>(src);
164  else // Anything else
165  return detail::extract_impl<2>(src, index);
166 }
168 
191 template <uint8_t index, simd::simd_concept simd_t>
192 constexpr simd_t extract_quarter(simd_t const & src)
193 {
194  static_assert(index < 4, "The index must be in the range of [0, 1, 2, 3]");
195 
196  return detail::extract_impl<4>(src, index);
197 }
198 
200 template <uint8_t index, simd::simd_concept simd_t>
201  requires detail::is_builtin_simd_v<simd_t> &&
202  detail::is_native_builtin_simd_v<simd_t>
203 constexpr simd_t extract_quarter(simd_t const & src)
204 {
205  static_assert(index < 4, "The index must be in the range of [0, 1, 2, 3]");
206 
207  if constexpr (simd_traits<simd_t>::length < 4) // In case there are less elements available return unchanged value.
208  return src;
209  else if constexpr (simd_traits<simd_t>::max_length == 16) // SSE4
210  return detail::extract_quarter_sse4<index>(src);
211  else if constexpr (simd_traits<simd_t>::max_length == 32) // AVX2
212  return detail::extract_quarter_avx2<index>(src);
213  else // Anything else
214  return detail::extract_impl<4>(src, index);
215 }
217 
240 template <uint8_t index, simd::simd_concept simd_t>
241 constexpr simd_t extract_eighth(simd_t const & src)
242 {
243  return detail::extract_impl<8>(src, index);
244 }
245 
247 template <uint8_t index, simd::simd_concept simd_t>
248  requires detail::is_builtin_simd_v<simd_t> &&
249  detail::is_native_builtin_simd_v<simd_t>
250 constexpr simd_t extract_eighth(simd_t const & src)
251 {
252  static_assert(index < 8, "The index must be in the range of [0, 1, 2, 3, 4, 5, 6, 7]");
253 
254  if constexpr (simd_traits<simd_t>::length < 8) // In case there are less elements available return unchanged value.
255  return src;
256  else if constexpr (simd_traits<simd_t>::max_length == 16) // SSE4
257  return detail::extract_eighth_sse4<index>(src);
258  else if constexpr (simd_traits<simd_t>::max_length == 32) // AVX2
259  return detail::extract_eighth_avx2<index>(src);
260  else // Anything else
261  return detail::extract_impl<8>(src, index);
262 }
264 
265 } // namespace seqan3::detail
266 
267 namespace seqan3
268 {
269 
270 inline namespace simd
271 {
272 
282 template <simd::simd_concept simd_t>
283 constexpr simd_t fill(typename simd_traits<simd_t>::scalar_type const scalar) noexcept
284 {
285  constexpr size_t length = simd_traits<simd_t>::length;
286  return detail::fill_impl<simd_t>(scalar, std::make_index_sequence<length>{});
287 }
288 
298 template <simd::simd_concept simd_t>
299 constexpr simd_t iota(typename simd_traits<simd_t>::scalar_type const offset)
300 {
301  constexpr size_t length = simd_traits<simd_t>::length;
302  using scalar_type = typename simd_traits<simd_t>::scalar_type;
303  return detail::iota_impl<simd_t>(offset, std::make_integer_sequence<scalar_type, length>{});
304 }
305 
315 template <simd::simd_concept simd_t>
316 constexpr simd_t load(void const * mem_addr)
317 {
318  assert(mem_addr != nullptr);
319  simd_t tmp{};
320 
321  for (size_t i = 0; i < simd_traits<simd_t>::length; ++i)
322  tmp[i] = *(static_cast<typename simd_traits<simd_t>::scalar_type const *>(mem_addr) + i);
323 
324  return tmp;
325 }
326 
328 template <simd::simd_concept simd_t>
329  requires detail::is_builtin_simd_v<simd_t> &&
330  detail::is_native_builtin_simd_v<simd_t>
331 constexpr simd_t load(void const * mem_addr)
332 {
333  assert(mem_addr != nullptr);
334 
335  if constexpr (simd_traits<simd_t>::max_length == 16)
336  return detail::load_sse4<simd_t>(mem_addr);
337  else if constexpr (simd_traits<simd_t>::max_length == 32)
338  return detail::load_avx2<simd_t>(mem_addr);
339  else if constexpr (simd_traits<simd_t>::max_length == 64)
340  return detail::load_avx512<simd_t>(mem_addr);
341  else
342  static_assert(simd_traits<simd_t>::max_length >= 16 && simd_traits<simd_t>::max_length <= 64,
343  "Unsupported simd type.");
344 }
346 
357 template <simd::simd_concept simd_t>
358 constexpr void store(void * mem_addr, simd_t const & simd_vec)
359 {
360  assert(mem_addr != nullptr);
361  using scalar_t = typename simd_traits<simd_t>::scalar_type;
362 
363  for (size_t i = 0; i < simd_traits<simd_t>::length; ++i)
364  *(static_cast<scalar_t *>(mem_addr) + i) = simd_vec[i];
365 }
366 
368 template <simd::simd_concept simd_t>
369  requires detail::is_builtin_simd_v<simd_t> &&
370  detail::is_native_builtin_simd_v<simd_t>
371 constexpr void store(void * mem_addr, simd_t const & simd_vec)
372 {
373  assert(mem_addr != nullptr);
374 
375  if constexpr (simd_traits<simd_t>::max_length == 16)
376  detail::store_sse4<simd_t>(mem_addr, simd_vec);
377  else if constexpr (simd_traits<simd_t>::max_length == 32)
378  detail::store_avx2<simd_t>(mem_addr, simd_vec);
379  else if constexpr (simd_traits<simd_t>::max_length == 64)
380  detail::store_avx512<simd_t>(mem_addr, simd_vec);
381  else
382  static_assert(simd_traits<simd_t>::max_length >= 16 && simd_traits<simd_t>::max_length <= 64,
383  "Unsupported simd type.");
384 }
386 
404 template <simd::simd_concept simd_t>
405 constexpr void transpose(std::array<simd_t, simd_traits<simd_t>::length> & matrix)
406 {
408 
409  for (size_t i = 0; i < matrix.size(); ++i)
410  for (size_t j = 0; j < matrix.size(); ++j)
411  tmp[j][i] = matrix[i][j];
412 
413  std::swap(tmp, matrix);
414 }
415 
417 // Implementation for seqan builtin simd.
418 template <simd::simd_concept simd_t>
419  requires detail::is_builtin_simd_v<simd_t> &&
420  detail::is_native_builtin_simd_v<simd_t> &&
421  (simd_traits<simd_t>::max_length == simd_traits<simd_t>::length)
422 constexpr void transpose(std::array<simd_t, simd_traits<simd_t>::length> & matrix)
423 {
424  if constexpr (simd_traits<simd_t>::length == 16) // SSE4 implementation
425  detail::transpose_matrix_sse4(matrix);
426  else if constexpr (simd_traits<simd_t>::length == 32) // AVX2 implementation
427  detail::transpose_matrix_avx2(matrix);
428  else
429  transpose(matrix);
430 }
432 
441 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
442 constexpr target_simd_t upcast(source_simd_t const & src)
443 {
444  static_assert(simd_traits<target_simd_t>::length <= simd_traits<source_simd_t>::length,
445  "The length of the target simd type must be greater or equal than the length of the source simd type.");
446 
447  target_simd_t tmp{};
448  for (unsigned i = 0; i < simd_traits<target_simd_t>::length; ++i)
449  tmp[i] = static_cast<typename simd_traits<target_simd_t>::scalar_type>(src[i]);
450 
451  return tmp;
452 }
453 
455 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
456  requires detail::is_builtin_simd_v<target_simd_t> &&
457  detail::is_builtin_simd_v<source_simd_t> &&
458  detail::is_native_builtin_simd_v<source_simd_t>
459 constexpr target_simd_t upcast(source_simd_t const & src)
460 {
461  static_assert(simd_traits<target_simd_t>::length <= simd_traits<source_simd_t>::length,
462  "The length of the target simd type must be greater or equal than the length of the source simd type.");
463 
464  if constexpr (simd_traits<source_simd_t>::length == simd_traits<target_simd_t>::length)
465  {
466  static_assert(simd_traits<target_simd_t>::max_length == simd_traits<source_simd_t>::max_length,
467  "Target vector has a different byte size.");
468  return reinterpret_cast<target_simd_t>(src); // Same packing so we do not cast.
469  }
470  else if constexpr (std::signed_integral<typename simd_traits<source_simd_t>::scalar_type>)
471  {
472  return detail::upcast_signed<target_simd_t>(src);
473  }
474  else
475  {
476  static_assert(std::unsigned_integral<typename simd_traits<source_simd_t>::scalar_type>,
477  "Expected unsigned scalar type.");
478  return detail::upcast_unsigned<target_simd_t>(src);
479  }
480 }
482 
483 } // inline namespace simd
484 
485 } // namespace seqan3
Provides seqan3::detail::builtin_simd, seqan3::detail::is_builtin_simd and seqan3::simd::simd_traits<...
The <concepts> header from C++20's standard library.
T fill(T... args)
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
constexpr auto chunk
A chunk view.
Definition: chunk.hpp:29
T iota(T... args)
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
SeqAn specific customisations in the standard namespace.
Provides specific algorithm implementations for AVX2 instruction set.
Provides specific algorithm implementations for AVX512 instruction set.
Provides specific algorithm implementations for SSE4 instruction set.
Provides seqan3::simd::simd_traits.
T swap(T... args)
Provides seqan3::simd::simd_concept.