PointerVector.hpp
Go to the documentation of this file.
1 // Distributed under the MIT License.
2 // See LICENSE.txt for details.
3 
4 /// \file
5 /// Defines class PointerVector
6 
7 #pragma once
8 
9 #include <cmath>
10 #include <functional>
11 
12 // The Utilities/Blaze.hpp configures Blaze
13 #include "ErrorHandling/Assert.hpp"
14 #include "Utilities/Blaze.hpp"
15 
16 #include <blaze/math/CustomVector.h>
17 #include <blaze/system/Version.h>
18 #include <blaze/util/typetraits/RemoveConst.h>
19 
20 // clang-tidy: do not use pointer arithmetic
21 #define SPECTRE_BLAZE_ALLOCATOR(_TYPE_T, _SIZE_V) \
22  new _TYPE_T[_SIZE_V] // NOLINT
23 #define SPECTRE_BLAZE_DEALLOCATOR blaze::ArrayDelete()
24 
25 // Blaze version compatibility definitions:
26 // between Blaze 3.2 and 3.4, there have been several minor changes to type
27 // definitions. Here, we define the aliases to the appropriate tokens for the
28 // respective versions.
29 #if ((BLAZE_MAJOR_VERSION == 3) && (BLAZE_MINOR_VERSION == 2))
30 const bool blaze_unaligned = blaze::unaligned;
31 template <typename T>
32 using BlazePow = blaze::Pow<T>;
33 #else // we only support blaze 3.2+, so this is all later versions
34 const bool blaze_unaligned = blaze::unaligned != 0;
35 template <typename T>
36 using BlazePow = blaze::UnaryPow<T>;
37 #endif
38 
39 #if ((BLAZE_MAJOR_VERSION == 3) && (BLAZE_MINOR_VERSION <= 3))
40 template <typename T>
41 using blaze_enable_if_t = blaze::EnableIf_<T>;
42 template <typename T>
43 using blaze_remove_const_t = blaze::RemoveConst_<T>;
44 template <typename T>
45 using blaze_simd_trait_t = blaze::SIMDTrait_<T>;
46 template <typename T>
47 using blaze_element_type_t = blaze::ElementType_<T>;
48 template <typename T>
49 using blaze_result_type_t = blaze::ResultType_<T>;
50 template <typename T1, typename T2>
51 using blaze_mult_trait_t = blaze::MultTrait_<T1, T2>;
52 template <typename T1, typename T2>
53 using blaze_div_trait_t = blaze::DivTrait_<T1, T2>;
54 template <typename T1, typename T2>
55 using blaze_cross_trait_t = blaze::CrossTrait_<T1, T2>;
56 template <typename T>
57 using blaze_const_iterator_t = blaze::ConstIterator_<T>;
58 template <typename T>
59 using blaze_is_numeric = blaze::IsNumeric<T>;
60 template <typename T>
61 const bool blaze_is_numeric_v = blaze_is_numeric<T>::value;
62 
63 #else // we only support blaze 3.2+, so this is all later versions
64 template <bool B>
65 using blaze_enable_if_t = blaze::EnableIf_t<B>;
66 template <typename T>
67 using blaze_remove_const_t = blaze::RemoveConst_t<T>;
68 template <typename T>
69 using blaze_simd_trait_t = blaze::SIMDTrait_t<T>;
70 template <typename T>
71 using blaze_element_type_t = blaze::ElementType_t<T>;
72 template <typename T>
73 using blaze_result_type_t = blaze::ResultType_t<T>;
74 template <typename T1, typename T2>
75 using blaze_mult_trait_t = blaze::MultTrait_t<T1, T2>;
76 template <typename T1, typename T2>
77 using blaze_div_trait_t = blaze::DivTrait_t<T1, T2>;
78 template <typename T1, typename T2>
79 using blaze_cross_trait_t = blaze::CrossTrait_t<T1, T2>;
80 template <typename T>
81 using blaze_const_iterator_t = blaze::ConstIterator_t<T>;
82 template <typename T>
83 const bool blaze_is_numeric = blaze::IsNumeric_v<T>;
84 template <typename T>
85 const bool blaze_is_numeric_v = blaze_is_numeric<T>;
86 #endif // ((BLAZE_MAJOR_VERSION == 3) && (BLAZE_MINOR_VERSION <= 3))
87 
88 namespace blaze {
89 template <typename T>
90 BLAZE_ALWAYS_INLINE SIMDdouble step_function(const SIMDf64<T>& v) noexcept
91 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
92 {
93  return _mm512_set_pd((~v).eval().value[7] < 0.0 ? 0.0 : 1.0,
94  (~v).eval().value[6] < 0.0 ? 0.0 : 1.0,
95  (~v).eval().value[5] < 0.0 ? 0.0 : 1.0,
96  (~v).eval().value[4] < 0.0 ? 0.0 : 1.0,
97  (~v).eval().value[3] < 0.0 ? 0.0 : 1.0,
98  (~v).eval().value[2] < 0.0 ? 0.0 : 1.0,
99  (~v).eval().value[1] < 0.0 ? 0.0 : 1.0,
100  (~v).eval().value[0] < 0.0 ? 0.0 : 1.0);
101 }
102 #elif BLAZE_AVX_MODE
103 {
104  return _mm256_set_pd((~v).eval().value[3] < 0.0 ? 0.0 : 1.0,
105  (~v).eval().value[2] < 0.0 ? 0.0 : 1.0,
106  (~v).eval().value[1] < 0.0 ? 0.0 : 1.0,
107  (~v).eval().value[0] < 0.0 ? 0.0 : 1.0);
108 }
109 #elif BLAZE_SSE2_MODE
110 {
111  return _mm_set_pd((~v).eval().value[1] < 0.0 ? 0.0 : 1.0,
112  (~v).eval().value[0] < 0.0 ? 0.0 : 1.0);
113 }
114 #else
115 {
116  return SIMDdouble{(~v).value < 0.0 ? 0.0 : 1.0};
117 }
118 #endif
119 
120 BLAZE_ALWAYS_INLINE double step_function(const double& v) noexcept {
121  return v < 0.0 ? 0.0 : 1.0;
122 }
123 
124 struct StepFunction {
125  explicit inline StepFunction() = default;
126 
127  template <typename T>
128  BLAZE_ALWAYS_INLINE decltype(auto) operator()(const T& a) const noexcept {
129  return step_function(a);
130  }
131 
132  template <typename T>
133  BLAZE_ALWAYS_INLINE decltype(auto) load(const T& a) const noexcept {
134  BLAZE_CONSTRAINT_MUST_BE_SIMD_PACK(T);
135  return step_function(a);
136  }
137 };
138 } // namespace blaze
139 
140 template <typename VT, bool TF>
141 BLAZE_ALWAYS_INLINE decltype(auto) step_function(
142  const blaze::DenseVector<VT, TF>& vec) noexcept {
143  return map(~vec, blaze::StepFunction{});
144 }
145 
146 template <typename VT, bool TF>
147 BLAZE_ALWAYS_INLINE decltype(auto) StepFunction(
148  const blaze::DenseVector<VT, TF>& vec) noexcept {
149  return map(~vec, blaze::StepFunction{});
150 }
151 
152 // Blaze 3.3 and newer already has atan2 implemented
153 #if ((BLAZE_MAJOR_VERSION == 3) && (BLAZE_MINOR_VERSION == 2))
154 namespace blaze {
155 template <typename T0, typename T1>
156 BLAZE_ALWAYS_INLINE const SIMDfloat atan2(const SIMDf32<T0>& a,
157  const SIMDf32<T1>& b) noexcept
158 #if BLAZE_SVML_MODE && (BLAZE_AVX512F_MODE || BLAZE_MIC_MODE)
159 {
160  return _mm512_atan2_ps((~a).eval().value, (~b).eval().value);
161 }
162 #elif BLAZE_SVML_MODE && BLAZE_AVX_MODE
163 {
164  return _mm256_atan2_ps((~a).eval().value, (~b).eval().value);
165 }
166 #elif BLAZE_SVML_MODE && BLAZE_SSE_MODE
167 {
168  return _mm_atan2_ps((~a).eval().value, (~b).eval().value);
169 }
170 #else
171  = delete;
172 #endif
173 
174 template <typename T0, typename T1>
175 BLAZE_ALWAYS_INLINE const SIMDdouble atan2(const SIMDf64<T0>& a,
176  const SIMDf64<T1>& b) noexcept
177 #if BLAZE_SVML_MODE && (BLAZE_AVX512F_MODE || BLAZE_MIC_MODE)
178 {
179  return _mm512_atan2_pd((~a).eval().value, (~b).eval().value);
180 }
181 #elif BLAZE_SVML_MODE && BLAZE_AVX_MODE
182 {
183  return _mm256_atan2_pd((~a).eval().value, (~b).eval().value);
184 }
185 #elif BLAZE_SVML_MODE && BLAZE_SSE_MODE
186 {
187  return _mm_atan2_pd((~a).eval().value, (~b).eval().value);
188 }
189 #else
190  = delete;
191 #endif
192 
193 template <typename T0, typename T1>
194 using HasSIMDAtan2 = std::integral_constant<
196  std::is_arithmetic<std::decay_t<T0>>::value and bool( // NOLINT
197  BLAZE_SVML_MODE) and // NOLINT
198  (bool(BLAZE_SSE_MODE) || bool(BLAZE_AVX_MODE) || // NOLINT
199  bool(BLAZE_MIC_MODE) || bool(BLAZE_AVX512F_MODE))>; // NOLINT
200 
201 struct Atan2 {
202  template <typename T1, typename T2>
203  BLAZE_ALWAYS_INLINE decltype(auto) operator()(const T1& a, const T2& b) const
204  noexcept {
205  using std::atan2;
206  return atan2(a, b);
207  }
208 
209  template <typename T1, typename T2>
210  static constexpr bool simdEnabled() noexcept {
211  return HasSIMDAtan2<T1, T2>::value;
212  }
213 
214  template <typename T1, typename T2>
215  BLAZE_ALWAYS_INLINE decltype(auto) load(const T1& a, const T2& b) const
216  noexcept {
217  using std::atan2;
218  BLAZE_CONSTRAINT_MUST_BE_SIMD_PACK(T1);
219  BLAZE_CONSTRAINT_MUST_BE_SIMD_PACK(T2);
220  return atan2(a, b);
221  }
222 };
223 } // namespace blaze
224 
225 template <typename VT0, typename VT1, bool TF>
226 BLAZE_ALWAYS_INLINE decltype(auto) atan2(
227  const blaze::DenseVector<VT0, TF>& y,
228  const blaze::DenseVector<VT1, TF>& x) noexcept {
229  return map(~y, ~x, blaze::Atan2{});
230 }
231 #endif // ((BLAZE_MAJOR_VERSION == 3) && (BLAZE_MINOR_VERSION == 2))
232 
233 // hypot function support
234 // Blaze 3.3 and newer already has hypot implemented
235 #if ((BLAZE_MAJOR_VERSION == 3) && (BLAZE_MINOR_VERSION == 2))
236 namespace blaze {
237 template <typename T0, typename T1>
238 BLAZE_ALWAYS_INLINE const SIMDfloat hypot(const SIMDf32<T0>& a,
239  const SIMDf32<T1>& b) noexcept
240 #if BLAZE_SVML_MODE && (BLAZE_AVX512F_MODE || BLAZE_MIC_MODE)
241 {
242  return _mm512_hypot_ps((~a).eval().value, (~b).eval().value);
243 }
244 #elif BLAZE_SVML_MODE && BLAZE_AVX_MODE
245 {
246  return _mm256_hypot_ps((~a).eval().value, (~b).eval().value);
247 }
248 #elif BLAZE_SVML_MODE && BLAZE_SSE_MODE
249 {
250  return _mm_hypot_ps((~a).eval().value, (~b).eval().value);
251 }
252 #else
253  = delete;
254 #endif
255 
256 template <typename T0, typename T1>
257 BLAZE_ALWAYS_INLINE const SIMDdouble hypot(const SIMDf64<T0>& a,
258  const SIMDf64<T1>& b) noexcept
259 #if BLAZE_SVML_MODE && (BLAZE_AVX512F_MODE || BLAZE_MIC_MODE)
260 {
261  return _mm512_hypot_pd((~a).eval().value, (~b).eval().value);
262 }
263 #elif BLAZE_SVML_MODE && BLAZE_AVX_MODE
264 {
265  return _mm256_hypot_pd((~a).eval().value, (~b).eval().value);
266 }
267 #elif BLAZE_SVML_MODE && BLAZE_SSE_MODE
268 {
269  return _mm_hypot_pd((~a).eval().value, (~b).eval().value);
270 }
271 #else
272  = delete;
273 #endif
274 
275 template <typename T0, typename T1>
276 using HasSIMDHypot = std::integral_constant<
278  std::is_arithmetic<std::decay_t<T0>>::value and bool( // NOLINT
279  BLAZE_SVML_MODE) and // NOLINT
280  (bool(BLAZE_SSE_MODE) || bool(BLAZE_AVX_MODE) || // NOLINT
281  bool(BLAZE_MIC_MODE) || bool(BLAZE_AVX512F_MODE))>; // NOLINT
282 
283 struct Hypot {
284  template <typename T1, typename T2>
285  BLAZE_ALWAYS_INLINE decltype(auto) operator()(const T1& a, const T2& b) const
286  noexcept {
287  using std::hypot;
288  return hypot(a, b);
289  }
290 
291  template <typename T1, typename T2>
292  static constexpr bool simdEnabled() noexcept {
293  return HasSIMDHypot<T1, T2>::value;
294  }
295 
296  template <typename T1, typename T2>
297  BLAZE_ALWAYS_INLINE decltype(auto) load(const T1& a, const T2& b) const
298  noexcept {
299  using std::hypot;
300  BLAZE_CONSTRAINT_MUST_BE_SIMD_PACK(T1);
301  BLAZE_CONSTRAINT_MUST_BE_SIMD_PACK(T2);
302  return hypot(a, b);
303  }
304 };
305 } // namespace blaze
306 
307 template <typename VT0, typename VT1, bool TF>
308 BLAZE_ALWAYS_INLINE decltype(auto) hypot(
309  const blaze::DenseVector<VT0, TF>& x,
310  const blaze::DenseVector<VT1, TF>& y) noexcept {
311  return map(~x, ~y, blaze::Hypot{});
312 }
313 #endif // ((BLAZE_MAJOR_VERSION == 3) && (BLAZE_MINOR_VERSION == 2))
314 
315 namespace blaze {
316 template <typename ST>
318  public:
319  explicit inline DivideScalarByVector(ST scalar) : scalar_(scalar) {}
320 
321  template <typename T>
322  BLAZE_ALWAYS_INLINE decltype(auto) operator()(const T& a) const {
323  return scalar_ / a;
324  }
325 
326  template <typename T>
327  static constexpr bool simdEnabled() {
328  return blaze::HasSIMDDiv<T, ST>::value;
329  }
330 
331  template <typename T>
332  BLAZE_ALWAYS_INLINE decltype(auto) load(const T& a) const {
333  BLAZE_CONSTRAINT_MUST_BE_SIMD_PACK(T);
334  return set(scalar_) / a;
335  }
336 
337  private:
338  ST scalar_;
339 };
340 
341 template <typename Scalar, typename VT, bool TF,
342  typename = blaze_enable_if_t<blaze_is_numeric<Scalar>>>
343 BLAZE_ALWAYS_INLINE decltype(auto) operator/(
344  Scalar scalar, const blaze::DenseVector<VT, TF>& vec) {
345  return forEach(~vec, DivideScalarByVector<Scalar>(scalar));
346 }
347 
348 template <typename ST>
349 struct AddScalar {
350  public:
351  explicit inline AddScalar(ST scalar) : scalar_(scalar) {}
352 
353  template <typename T>
354  BLAZE_ALWAYS_INLINE decltype(auto) operator()(const T& a) const {
355  return a + scalar_;
356  }
357 
358  template <typename T>
359  static constexpr bool simdEnabled() {
360  return blaze::HasSIMDAdd<T, ST>::value;
361  }
362 
363  template <typename T>
364  BLAZE_ALWAYS_INLINE decltype(auto) load(const T& a) const {
365  BLAZE_CONSTRAINT_MUST_BE_SIMD_PACK(T);
366  return a + set(scalar_);
367  }
368 
369  private:
370  ST scalar_;
371 };
372 
373 template <typename VT, bool TF, typename Scalar,
374  typename = blaze_enable_if_t<blaze_is_numeric<Scalar>>>
375 decltype(auto) operator+(const blaze::DenseVector<VT, TF>& vec, Scalar scalar) {
376  return forEach(~vec, AddScalar<Scalar>(scalar));
377 }
378 
379 template <typename Scalar, typename VT, bool TF,
380  typename = blaze_enable_if_t<blaze_is_numeric<Scalar>>>
381 decltype(auto) operator+(Scalar scalar, const blaze::DenseVector<VT, TF>& vec) {
382  return forEach(~vec, AddScalar<Scalar>(scalar));
383 }
384 
385 template <typename VT, bool TF, typename Scalar,
386  typename = blaze_enable_if_t<blaze_is_numeric<Scalar>>>
387 VT& operator+=(blaze::DenseVector<VT, TF>& vec, Scalar scalar) {
388  (~vec) = (~vec) + scalar;
389  return ~vec;
390 }
391 
392 template <typename ST>
393 struct SubScalarRhs {
394  public:
395  explicit inline SubScalarRhs(ST scalar) : scalar_(scalar) {}
396 
397  template <typename T>
398  BLAZE_ALWAYS_INLINE decltype(auto) operator()(const T& a) const {
399  return a - scalar_;
400  }
401 
402  template <typename T>
403  static constexpr bool simdEnabled() {
404  return blaze::HasSIMDSub<T, ST>::value;
405  }
406 
407  template <typename T>
408  BLAZE_ALWAYS_INLINE decltype(auto) load(const T& a) const {
409  BLAZE_CONSTRAINT_MUST_BE_SIMD_PACK(T);
410  return a - set(scalar_);
411  }
412 
413  private:
414  ST scalar_;
415 };
416 
417 template <typename ST>
418 struct SubScalarLhs {
419  public:
420  explicit inline SubScalarLhs(ST scalar) : scalar_(scalar) {}
421 
422  template <typename T>
423  BLAZE_ALWAYS_INLINE decltype(auto) operator()(const T& a) const {
424  return scalar_ - a;
425  }
426 
427  template <typename T>
428  static constexpr bool simdEnabled() {
429  return blaze::HasSIMDSub<T, ST>::value;
430  }
431 
432  template <typename T>
433  BLAZE_ALWAYS_INLINE decltype(auto) load(const T& a) const {
434  BLAZE_CONSTRAINT_MUST_BE_SIMD_PACK(T);
435  return set(scalar_) - a;
436  }
437 
438  private:
439  ST scalar_;
440 };
441 
442 template <typename VT, bool TF, typename Scalar,
443  typename = blaze_enable_if_t<blaze_is_numeric<Scalar>>>
444 decltype(auto) operator-(const blaze::DenseVector<VT, TF>& vec, Scalar scalar) {
445  return forEach(~vec, SubScalarRhs<Scalar>(scalar));
446 }
447 
448 template <typename VT, bool TF, typename Scalar,
449  typename = blaze_enable_if_t<blaze_is_numeric<Scalar>>>
450 decltype(auto) operator-(Scalar scalar, const blaze::DenseVector<VT, TF>& vec) {
451  return forEach(~vec, SubScalarLhs<Scalar>(scalar));
452 }
453 
454 template <typename VT, bool TF, typename Scalar,
455  typename = blaze_enable_if_t<blaze_is_numeric<Scalar>>>
456 VT& operator-=(blaze::DenseVector<VT, TF>& vec, Scalar scalar) {
457  (~vec) = (~vec) - scalar;
458  return ~vec;
459 }
460 } // namespace blaze
461 /// \endcond
462 
463 namespace blaze {
464 // Enable support for reference wrappers with Blaze
465 template <typename T>
466 struct UnderlyingElement<std::reference_wrapper<T>> {
467  using Type = typename UnderlyingElement<std::decay_t<T>>::Type;
468 };
469 } // namespace blaze
470 
471 /*!
472  * \ingroup UtilitiesGroup
473  * \brief A raw pointer endowed with expression template support via the Blaze
474  * library
475  *
476  * PointerVector can be used instead of a raw pointer to pass around size
477  * information and to be able to have the pointer array support expression
478  * templates. The primary use case for PointerVector is inside the Data class
479  * so that Data has support for expression templates but not incurring any
480  * overhead for them.
481  *
482  * See the Blaze documentation for CustomVector for details on the template
483  * parameters to PointerVector since CustomVector is what PointerVector is
484  * modeled after.
485  *
486  * One additional feature that Blaze's CustomVector (currently) does not support
487  * is the ability to change the result type so that CustomVector can be used for
488  * the expression template backend for different vector types. PointerVector
489  * allows this by passing the `ExprResultType` template parameter. For example,
490  * `DataVector` sets `ExprResultType = DataVector`.
491  */
492 template <typename Type, bool AF = blaze_unaligned, bool PF = blaze::unpadded,
493  bool TF = blaze::defaultTransposeFlag,
494  typename ExprResultType =
495  blaze::DynamicVector<blaze_remove_const_t<Type>, TF>>
497  : public blaze::DenseVector<PointerVector<Type, AF, PF, TF, ExprResultType>,
498  TF> {
499  /// \cond
500  public:
502  using BaseType = blaze::DenseVector<This, TF>;
503  using ResultType = ExprResultType;
505  using ElementType = Type;
506  using SIMDType = blaze_simd_trait_t<ElementType>;
507  using ReturnType = const Type&;
508  using CompositeType = const PointerVector&;
509 
510  using Reference = Type&;
511  using ConstReference = const Type&;
512  using Pointer = Type*;
513  using ConstPointer = const Type*;
514 
515  using Iterator = blaze::DenseIterator<Type, AF>;
516  using ConstIterator = blaze::DenseIterator<const Type, AF>;
517 
518  enum : bool { simdEnabled = blaze::IsVectorizable<Type>::value };
519  enum : bool { smpAssignable = !blaze::IsSMPAssignable<Type>::value };
520 
521  PointerVector() = default;
522  PointerVector(Type* ptr, size_t size) : v_(ptr), size_(size) {}
523  PointerVector(const PointerVector& /*rhs*/) = default;
524  PointerVector& operator=(const PointerVector& /*rhs*/) = default;
525  PointerVector(PointerVector&& /*rhs*/) = default;
526  PointerVector& operator=(PointerVector&& /*rhs*/) = default;
527  ~PointerVector() = default;
528 
529  /*!\name Data access functions */
530  //@{
531  Type& operator[](const size_t i) noexcept {
532  ASSERT(i < size(), "i = " << i << ", size = " << size());
533  // clang-tidy: do not use pointer arithmetic
534  return v_[i]; // NOLINT
535  }
536  const Type& operator[](const size_t i) const noexcept {
537  ASSERT(i < size(), "i = " << i << ", size = " << size());
538  // clang-tidy: do not use pointer arithmetic
539  return v_[i]; // NOLINT
540  }
541  Reference at(size_t index);
542  ConstReference at(size_t index) const;
543  Pointer data() noexcept { return v_; }
544  ConstPointer data() const noexcept { return v_; }
545  Iterator begin() noexcept { return Iterator(v_); }
546  ConstIterator begin() const noexcept { return ConstIterator(v_); }
547  ConstIterator cbegin() const noexcept { return ConstIterator(v_); }
548  Iterator end() noexcept {
549  // clang-tidy: do not use pointer arithmetic
550  return Iterator(v_ + size_); // NOLINT
551  }
552  ConstIterator end() const noexcept {
553  // clang-tidy: do not use pointer arithmetic
554  return ConstIterator(v_ + size_); // NOLINT
555  }
556  ConstIterator cend() const noexcept { return ConstIterator(v_ + size_); }
557  //@}
558 
559  /*!\name Assignment operators */
560  //@{
561  PointerVector& operator=(const Type& rhs);
563 
564  template <typename Other, size_t N>
565  PointerVector& operator=(const Other (&array)[N]);
566 
567  template <typename VT>
568  PointerVector& operator=(const blaze::Vector<VT, TF>& rhs);
569  template <typename VT>
570  PointerVector& operator+=(const blaze::Vector<VT, TF>& rhs);
571  template <typename VT>
572  PointerVector& operator-=(const blaze::Vector<VT, TF>& rhs);
573  template <typename VT>
574  PointerVector& operator*=(const blaze::Vector<VT, TF>& rhs);
575  template <typename VT>
576  PointerVector& operator/=(const blaze::Vector<VT, TF>& rhs);
577  template <typename VT>
578  PointerVector& operator%=(const blaze::Vector<VT, TF>& rhs);
579 
580  template <typename Other>
581  std::enable_if_t<blaze_is_numeric_v<Other>, This>& operator*=(Other rhs);
582 
583  template <typename Other>
584  std::enable_if_t<blaze_is_numeric_v<Other>, This>& operator/=(Other rhs);
585  //@}
586 
587  /*!\name Utility functions */
588  //@{
589  void clear() noexcept {
590  size_ = 0;
591  v_ = nullptr;
592  }
593 
594  size_t spacing() const noexcept { return size_; }
595 
596  size_t size() const noexcept { return size_; }
597  //@}
598 
599  /*!\name Resource management functions */
600  //@{
601  void reset() { clear(); }
602 
603  void reset(Type* ptr, size_t n) noexcept {
604  v_ = ptr;
605  size_ = n;
606  }
607  //@}
608 
609  private:
610  template <typename VT>
611  using VectorizedAssign = std::integral_constant<
612  bool, blaze::useOptimizedKernels && simdEnabled && VT::simdEnabled &&
613  blaze::IsSIMDCombinable<Type, blaze_element_type_t<VT>>::value>;
614 
615  template <typename VT>
616  using VectorizedAddAssign = std::integral_constant<
617  bool,
618  blaze::useOptimizedKernels && simdEnabled && VT::simdEnabled &&
619  blaze::IsSIMDCombinable<Type, blaze_element_type_t<VT>>::value &&
620  blaze::HasSIMDAdd<Type, blaze_element_type_t<VT>>::value>;
621 
622  template <typename VT>
623  using VectorizedSubAssign = std::integral_constant<
624  bool,
625  blaze::useOptimizedKernels && simdEnabled && VT::simdEnabled &&
626  blaze::IsSIMDCombinable<Type, blaze_element_type_t<VT>>::value &&
627  blaze::HasSIMDSub<Type, blaze_element_type_t<VT>>::value>;
628 
629  template <typename VT>
630  using VectorizedMultAssign = std::integral_constant<
631  bool,
632  blaze::useOptimizedKernels && simdEnabled && VT::simdEnabled &&
633  blaze::IsSIMDCombinable<Type, blaze_element_type_t<VT>>::value &&
634  blaze::HasSIMDMult<Type, blaze_element_type_t<VT>>::value>;
635 
636  template <typename VT>
637  using VectorizedDivAssign = std::integral_constant<
638  bool,
639  blaze::useOptimizedKernels && simdEnabled && VT::simdEnabled &&
640  blaze::IsSIMDCombinable<Type, blaze_element_type_t<VT>>::value &&
641  blaze::HasSIMDDiv<Type, blaze_element_type_t<VT>>::value>;
642 
643  //! The number of elements packed within a single SIMD element.
644  enum : size_t { SIMDSIZE = blaze::SIMDTrait<ElementType>::size };
645 
646  public:
647  /*!\name Expression template evaluation functions */
648  //@{
649  template <typename Other>
650  bool canAlias(const Other* alias) const noexcept;
651  template <typename Other>
652  bool isAliased(const Other* alias) const noexcept;
653 
654  bool isAligned() const noexcept;
655  bool canSMPAssign() const noexcept;
656 
657  BLAZE_ALWAYS_INLINE SIMDType load(size_t index) const noexcept;
658  BLAZE_ALWAYS_INLINE SIMDType loada(size_t index) const noexcept;
659  BLAZE_ALWAYS_INLINE SIMDType loadu(size_t index) const noexcept;
660 
661  BLAZE_ALWAYS_INLINE void store(size_t index, const SIMDType& value) noexcept;
662  BLAZE_ALWAYS_INLINE void storea(size_t index, const SIMDType& value) noexcept;
663  BLAZE_ALWAYS_INLINE void storeu(size_t index, const SIMDType& value) noexcept;
664  BLAZE_ALWAYS_INLINE void stream(size_t index, const SIMDType& value) noexcept;
665 
666  template <typename VT>
668  const blaze::DenseVector<VT, TF>& rhs);
669 
670  template <typename VT>
672  const blaze::DenseVector<VT, TF>& rhs);
673 
674  template <typename VT>
676  addAssign(const blaze::DenseVector<VT, TF>& rhs);
677 
678  template <typename VT>
680  const blaze::DenseVector<VT, TF>& rhs);
681 
682  template <typename VT>
683  void addAssign(const blaze::SparseVector<VT, TF>& rhs);
684 
685  template <typename VT>
687  subAssign(const blaze::DenseVector<VT, TF>& rhs);
688 
689  template <typename VT>
691  const blaze::DenseVector<VT, TF>& rhs);
692 
693  template <typename VT>
694  void subAssign(const blaze::SparseVector<VT, TF>& rhs);
695 
696  template <typename VT>
698  template VectorizedMultAssign<VT>::value)>
699  multAssign(const blaze::DenseVector<VT, TF>& rhs);
700 
701  template <typename VT>
703  const blaze::DenseVector<VT, TF>& rhs);
704 
705  template <typename VT>
706  void multAssign(const blaze::SparseVector<VT, TF>& rhs);
707 
708  template <typename VT>
710  divAssign(const blaze::DenseVector<VT, TF>& rhs);
711 
712  template <typename VT>
714  const blaze::DenseVector<VT, TF>& rhs);
715  //@}
716 
717  private:
718  Type* v_ = nullptr;
719  size_t size_ = 0;
720  /// \endcond
721 };
722 
723 /// \cond
724 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
727  if (index >= size_) {
728  BLAZE_THROW_OUT_OF_RANGE("Invalid vector access index");
729  }
730  return (*this)[index];
731 }
732 
733 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
736  if (index >= size_) {
737  BLAZE_THROW_OUT_OF_RANGE("Invalid vector access index");
738  }
739  return (*this)[index];
740 }
741 
742 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
745  for (size_t i = 0; i < size_; ++i) {
746  // clang-tidy: do not use pointer arithmetic
747  v_[i] = rhs; // NOLINT
748  }
749  return *this;
750 }
751 
752 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
756  ASSERT(list.size() <= size_, "Invalid assignment to custom vector");
757  std::fill(std::copy(list.begin(), list.end(), v_), v_ + size_, Type());
758  return *this;
759 }
760 
761 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
762 template <typename Other, size_t N>
765  const Other (&array)[N]) {
766  ASSERT(size_ == N, "Invalid array size");
767  for (size_t i = 0UL; i < N; ++i) {
768  // clang-tidy: do not use pointer arithmetic
769  v_[i] = array[i]; // NOLINT
770  }
771  return *this;
772 }
773 
774 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
775 template <typename VT>
778  const blaze::Vector<VT, TF>& rhs) {
779  ASSERT((~rhs).size() == size_, "Vector sizes do not match");
780  blaze::smpAssign(*this, ~rhs);
781  return *this;
782 }
783 
784 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
785 template <typename VT>
788  const blaze::Vector<VT, TF>& rhs) {
789  ASSERT((~rhs).size() == size_, "Vector sizes do not match");
790  blaze::smpAddAssign(*this, ~rhs);
791  return *this;
792 }
793 
794 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
795 template <typename VT>
798  const blaze::Vector<VT, TF>& rhs) {
799  ASSERT((~rhs).size() == size_, "Vector sizes do not match");
800  blaze::smpSubAssign(*this, ~rhs);
801  return *this;
802 }
803 
804 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
805 template <typename VT>
808  const blaze::Vector<VT, TF>& rhs) {
809  BLAZE_CONSTRAINT_MUST_BE_VECTOR_WITH_TRANSPOSE_FLAG(VT, TF);
810  BLAZE_CONSTRAINT_MUST_NOT_REQUIRE_EVALUATION(blaze_result_type_t<VT>);
811 
812  using MultType = blaze_mult_trait_t<ResultType, blaze_result_type_t<VT>>;
813 
814  BLAZE_CONSTRAINT_MUST_BE_VECTOR_WITH_TRANSPOSE_FLAG(MultType, TF);
815  BLAZE_CONSTRAINT_MUST_NOT_REQUIRE_EVALUATION(MultType);
816 
817  ASSERT((~rhs).size() == size_, "Vector sizes do not match");
818  blaze::smpMultAssign(*this, ~rhs);
819  return *this;
820 }
821 
822 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
823 template <typename VT>
826  const blaze::Vector<VT, TF>& rhs) {
827  BLAZE_CONSTRAINT_MUST_BE_VECTOR_WITH_TRANSPOSE_FLAG(VT, TF);
828  BLAZE_CONSTRAINT_MUST_NOT_REQUIRE_EVALUATION(blaze_result_type_t<VT>);
829 
830  using DivType = blaze_div_trait_t<ResultType, blaze_result_type_t<VT>>;
831 
832  BLAZE_CONSTRAINT_MUST_BE_VECTOR_WITH_TRANSPOSE_FLAG(DivType, TF);
833  BLAZE_CONSTRAINT_MUST_NOT_REQUIRE_EVALUATION(DivType);
834 
835  ASSERT((~rhs).size() == size_, "Vector sizes do not match");
836  blaze::smpDivAssign(*this, ~rhs);
837  return *this;
838 }
839 
840 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
841 template <typename VT>
844  const blaze::Vector<VT, TF>& rhs) {
845  using blaze::assign;
846 
847  BLAZE_CONSTRAINT_MUST_BE_VECTOR_WITH_TRANSPOSE_FLAG(VT, TF);
848  BLAZE_CONSTRAINT_MUST_NOT_REQUIRE_EVALUATION(blaze_result_type_t<VT>);
849 
850  using CrossType = blaze_cross_trait_t<ResultType, blaze_result_type_t<VT>>;
851 
852  BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE(CrossType);
853  BLAZE_CONSTRAINT_MUST_BE_VECTOR_WITH_TRANSPOSE_FLAG(CrossType, TF);
854  BLAZE_CONSTRAINT_MUST_NOT_REQUIRE_EVALUATION(CrossType);
855 
856  if (size_ != 3UL || (~rhs).size() != 3UL) {
857  BLAZE_THROW_INVALID_ARGUMENT("Invalid vector size for cross product");
858  }
859 
860  const CrossType tmp(*this % (~rhs));
861  assign(*this, tmp);
862 
863  return *this;
864 }
865 
866 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
867 template <typename Other>
871  blaze::smpAssign(*this, (*this) * rhs);
872  return *this;
873 }
874 
875 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
876 template <typename Other>
877 inline std::enable_if_t<blaze_is_numeric_v<Other>,
880  BLAZE_USER_ASSERT(rhs != Other(0), "Division by zero detected");
881  blaze::smpAssign(*this, (*this) / rhs);
882  return *this;
883 }
884 
885 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
886 template <typename Other>
888  const Other* alias) const noexcept {
889  return static_cast<const void*>(this) == static_cast<const void*>(alias);
890 }
891 
892 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
893 template <typename Other>
895  const Other* alias) const noexcept {
896  return static_cast<const void*>(this) == static_cast<const void*>(alias);
897 }
898 
899 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
901  noexcept {
902  return (AF || checkAlignment(v_));
903 }
904 
905 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
907  const noexcept {
908  return (size() > blaze::SMP_DVECASSIGN_THRESHOLD);
909 }
910 
911 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
912 BLAZE_ALWAYS_INLINE
915  noexcept {
916  if (AF) {
917  return loada(index);
918  }
919  return loadu(index);
920 }
921 
922 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
923 BLAZE_ALWAYS_INLINE
926  noexcept {
927  using blaze::loada;
928 
929  BLAZE_CONSTRAINT_MUST_BE_VECTORIZABLE_TYPE(Type);
930 
931  BLAZE_INTERNAL_ASSERT(index < size_, "Invalid vector access index");
932  BLAZE_INTERNAL_ASSERT(index + SIMDSIZE <= size_,
933  "Invalid vector access index");
934  BLAZE_INTERNAL_ASSERT(!AF || index % SIMDSIZE == 0UL,
935  "Invalid vector access index");
936  BLAZE_INTERNAL_ASSERT(checkAlignment(v_ + index),
937  "Invalid vector access index");
938  // clang-tidy: do not use pointer arithmetic
939  return loada(v_ + index); // NOLINT
940 }
941 
942 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
943 BLAZE_ALWAYS_INLINE
946  noexcept {
947  using blaze::loadu;
948 
949  BLAZE_CONSTRAINT_MUST_BE_VECTORIZABLE_TYPE(Type);
950 
951  BLAZE_INTERNAL_ASSERT(index < size_, "Invalid vector access index");
952  BLAZE_INTERNAL_ASSERT(index + SIMDSIZE <= size_,
953  "Invalid vector access index");
954  // clang-tidy: do not use pointer arithmetic
955  return loadu(v_ + index); // NOLINT
956 }
957 
958 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
960  size_t index, const SIMDType& value) noexcept {
961  if (AF) {
962  storea(index, value);
963  } else {
964  storeu(index, value);
965  }
966 }
967 
968 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
969 BLAZE_ALWAYS_INLINE void
971  size_t index, const SIMDType& value) noexcept {
972  using blaze::storea;
973 
974  BLAZE_CONSTRAINT_MUST_BE_VECTORIZABLE_TYPE(Type);
975 
976  BLAZE_INTERNAL_ASSERT(index < size_, "Invalid vector access index");
977  BLAZE_INTERNAL_ASSERT(index + SIMDSIZE <= size_,
978  "Invalid vector access index");
979  BLAZE_INTERNAL_ASSERT(!AF || index % SIMDSIZE == 0UL,
980  "Invalid vector access index");
981  BLAZE_INTERNAL_ASSERT(checkAlignment(v_ + index),
982  "Invalid vector access index");
983  // clang-tidy: do not use pointer arithmetic
984  storea(v_ + index, value); // NOLINT
985 }
986 
987 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
988 BLAZE_ALWAYS_INLINE void
990  size_t index, const SIMDType& value) noexcept {
991  using blaze::storeu;
992 
993  BLAZE_CONSTRAINT_MUST_BE_VECTORIZABLE_TYPE(Type);
994 
995  BLAZE_INTERNAL_ASSERT(index < size_, "Invalid vector access index");
996  BLAZE_INTERNAL_ASSERT(index + SIMDSIZE <= size_,
997  "Invalid vector access index");
998  // clang-tidy: do not use pointer arithmetic
999  storeu(v_ + index, value); // NOLINT
1000 }
1001 
1002 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
1003 BLAZE_ALWAYS_INLINE void
1005  size_t index, const SIMDType& value) noexcept {
1006  using blaze::stream;
1007 
1008  BLAZE_CONSTRAINT_MUST_BE_VECTORIZABLE_TYPE(Type);
1009 
1010  BLAZE_INTERNAL_ASSERT(index < size_, "Invalid vector access index");
1011  BLAZE_INTERNAL_ASSERT(index + SIMDSIZE <= size_,
1012  "Invalid vector access index");
1013  BLAZE_INTERNAL_ASSERT(!AF || index % SIMDSIZE == 0UL,
1014  "Invalid vector access index");
1015  BLAZE_INTERNAL_ASSERT(checkAlignment(v_ + index),
1016  "Invalid vector access index");
1017  // clang-tidy: do not use pointer arithmetic
1018  stream(v_ + index, value); // NOLINT
1019 }
1020 
1021 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
1022 template <typename VT>
1023 inline std::enable_if_t<
1025  Type, AF, PF, TF, ExprResultType>::BLAZE_TEMPLATE
1026  VectorizedAssign<VT>::value)>
1028  const blaze::DenseVector<VT, TF>& rhs) {
1029  BLAZE_INTERNAL_ASSERT(size_ == (~rhs).size(), "Invalid vector sizes");
1030 
1031  const size_t ipos(size_ & size_t(-2));
1032  BLAZE_INTERNAL_ASSERT((size_ - (size_ % 2UL)) == ipos,
1033  "Invalid end calculation");
1034 
1035  for (size_t i = 0UL; i < ipos; i += 2UL) {
1036  // clang-tidy: do not use pointer arithmetic
1037  v_[i] = (~rhs)[i]; // NOLINT
1038  v_[i + 1UL] = (~rhs)[i + 1UL]; // NOLINT
1039  }
1040  if (ipos < (~rhs).size()) {
1041  // clang-tidy: do not use pointer arithmetic
1042  v_[ipos] = (~rhs)[ipos]; // NOLINT
1043  }
1044 }
1045 
1046 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
1047 template <typename VT>
1049  BLAZE_TEMPLATE VectorizedAssign<VT>::value)>
1051  const blaze::DenseVector<VT, TF>& rhs) {
1052  BLAZE_CONSTRAINT_MUST_BE_VECTORIZABLE_TYPE(Type);
1053 
1054  BLAZE_INTERNAL_ASSERT(size_ == (~rhs).size(), "Invalid vector sizes");
1055 
1056  const size_t ipos(size_ & size_t(-SIMDSIZE));
1057  BLAZE_INTERNAL_ASSERT((size_ - (size_ % SIMDSIZE)) == ipos,
1058  "Invalid end calculation");
1059 
1060  if (AF && blaze::useStreaming &&
1061  size_ > (blaze::cacheSize / (sizeof(Type) * 3UL)) &&
1062  !(~rhs).isAliased(this)) {
1063  size_t i(0UL);
1064 
1065  for (; i < ipos; i += SIMDSIZE) {
1066  stream(i, (~rhs).load(i));
1067  }
1068  for (; i < size_; ++i) {
1069  // clang-tidy: do not use pointer arithmetic
1070  v_[i] = (~rhs)[i]; // NOLINT
1071  }
1072  } else {
1073  const size_t i4way(size_ & size_t(-SIMDSIZE * 4));
1074  BLAZE_INTERNAL_ASSERT((size_ - (size_ % (SIMDSIZE * 4UL))) == i4way,
1075  "Invalid end calculation");
1076  BLAZE_INTERNAL_ASSERT(i4way <= ipos, "Invalid end calculation");
1077 
1078  size_t i(0UL);
1079  blaze_const_iterator_t<VT> it((~rhs).begin());
1080 
1081  for (; i < i4way; i += SIMDSIZE * 4UL) {
1082  store(i, it.load());
1083  it += SIMDSIZE;
1084  store(i + SIMDSIZE, it.load());
1085  it += SIMDSIZE;
1086  store(i + SIMDSIZE * 2UL, it.load());
1087  it += SIMDSIZE;
1088  store(i + SIMDSIZE * 3UL, it.load());
1089  it += SIMDSIZE;
1090  }
1091  for (; i < ipos; i += SIMDSIZE, it += SIMDSIZE) {
1092  store(i, it.load());
1093  }
1094  for (; i < size_; ++i, ++it) {
1095  // clang-tidy: do not use pointer arithmetic
1096  v_[i] = *it; // NOLINT
1097  }
1098  }
1099 }
1100 
1101 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
1102 template <typename VT>
1103 inline std::enable_if_t<
1105  Type, AF, PF, TF, ExprResultType>::BLAZE_TEMPLATE
1106  VectorizedAddAssign<VT>::value)>
1108  const blaze::DenseVector<VT, TF>& rhs) {
1109  BLAZE_INTERNAL_ASSERT(size_ == (~rhs).size(), "Invalid vector sizes");
1110 
1111  const size_t ipos(size_ & size_t(-2));
1112  BLAZE_INTERNAL_ASSERT((size_ - (size_ % 2UL)) == ipos,
1113  "Invalid end calculation");
1114 
1115  for (size_t i = 0UL; i < ipos; i += 2UL) {
1116  // clang-tidy: do not use pointer arithmetic
1117  v_[i] += (~rhs)[i]; // NOLINT
1118  v_[i + 1UL] += (~rhs)[i + 1UL]; // NOLINT
1119  }
1120  if (ipos < (~rhs).size()) {
1121  // clang-tidy: do not use pointer arithmetic
1122  v_[ipos] += (~rhs)[ipos]; // NOLINT
1123  }
1124 }
1125 
1126 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
1127 template <typename VT>
1129  BLAZE_TEMPLATE VectorizedAddAssign<VT>::value)>
1131  const blaze::DenseVector<VT, TF>& rhs) {
1132  BLAZE_CONSTRAINT_MUST_BE_VECTORIZABLE_TYPE(Type);
1133 
1134  BLAZE_INTERNAL_ASSERT(size_ == (~rhs).size(), "Invalid vector sizes");
1135 
1136  const size_t ipos(size_ & size_t(-SIMDSIZE));
1137  BLAZE_INTERNAL_ASSERT((size_ - (size_ % SIMDSIZE)) == ipos,
1138  "Invalid end calculation");
1139 
1140  const size_t i4way(size_ & size_t(-SIMDSIZE * 4));
1141  BLAZE_INTERNAL_ASSERT((size_ - (size_ % (SIMDSIZE * 4UL))) == i4way,
1142  "Invalid end calculation");
1143  BLAZE_INTERNAL_ASSERT(i4way <= ipos, "Invalid end calculation");
1144 
1145  size_t i(0UL);
1146  blaze_const_iterator_t<VT> it((~rhs).begin());
1147 
1148  for (; i < i4way; i += SIMDSIZE * 4UL) {
1149  store(i, load(i) + it.load());
1150  it += SIMDSIZE;
1151  store(i + SIMDSIZE, load(i + SIMDSIZE) + it.load());
1152  it += SIMDSIZE;
1153  store(i + SIMDSIZE * 2UL, load(i + SIMDSIZE * 2UL) + it.load());
1154  it += SIMDSIZE;
1155  store(i + SIMDSIZE * 3UL, load(i + SIMDSIZE * 3UL) + it.load());
1156  it += SIMDSIZE;
1157  }
1158  for (; i < ipos; i += SIMDSIZE, it += SIMDSIZE) {
1159  store(i, load(i) + it.load());
1160  }
1161  for (; i < size_; ++i, ++it) {
1162  // clang-tidy: do not use pointer arithmetic
1163  v_[i] += *it; // NOLINT
1164  }
1165 }
1166 
1167 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
1168 template <typename VT>
1170  const blaze::SparseVector<VT, TF>& rhs) {
1171  BLAZE_INTERNAL_ASSERT(size_ == (~rhs).size(), "Invalid vector sizes");
1172 
1173  for (blaze_const_iterator_t<VT> element = (~rhs).begin();
1174  element != (~rhs).end(); ++element) {
1175  v_[element->index()] += element->value();
1176  }
1177 }
1178 
1179 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
1180 template <typename VT>
1181 inline std::enable_if_t<
1183  Type, AF, PF, TF, ExprResultType>::BLAZE_TEMPLATE
1184  VectorizedSubAssign<VT>::value)>
1186  const blaze::DenseVector<VT, TF>& rhs) {
1187  BLAZE_INTERNAL_ASSERT(size_ == (~rhs).size(), "Invalid vector sizes");
1188 
1189  const size_t ipos(size_ & size_t(-2));
1190  BLAZE_INTERNAL_ASSERT((size_ - (size_ % 2UL)) == ipos,
1191  "Invalid end calculation");
1192 
1193  for (size_t i = 0UL; i < ipos; i += 2UL) {
1194  // clang-tidy: do not use pointer arithmetic
1195  v_[i] -= (~rhs)[i]; // NOLINT
1196  v_[i + 1UL] -= (~rhs)[i + 1UL]; // NOLINT
1197  }
1198  if (ipos < (~rhs).size()) {
1199  // clang-tidy: do not use pointer arithmetic
1200  v_[ipos] -= (~rhs)[ipos]; // NOLINT
1201  }
1202 }
1203 
1204 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
1205 template <typename VT>
1207  BLAZE_TEMPLATE VectorizedSubAssign<VT>::value)>
1209  const blaze::DenseVector<VT, TF>& rhs) {
1210  BLAZE_CONSTRAINT_MUST_BE_VECTORIZABLE_TYPE(Type);
1211 
1212  BLAZE_INTERNAL_ASSERT(size_ == (~rhs).size(), "Invalid vector sizes");
1213 
1214  const size_t ipos(size_ & size_t(-SIMDSIZE));
1215  BLAZE_INTERNAL_ASSERT((size_ - (size_ % SIMDSIZE)) == ipos,
1216  "Invalid end calculation");
1217 
1218  const size_t i4way(size_ & size_t(-SIMDSIZE * 4));
1219  BLAZE_INTERNAL_ASSERT((size_ - (size_ % (SIMDSIZE * 4UL))) == i4way,
1220  "Invalid end calculation");
1221  BLAZE_INTERNAL_ASSERT(i4way <= ipos, "Invalid end calculation");
1222 
1223  size_t i(0UL);
1224  blaze_const_iterator_t<VT> it((~rhs).begin());
1225 
1226  for (; i < i4way; i += SIMDSIZE * 4UL) {
1227  store(i, load(i) - it.load());
1228  it += SIMDSIZE;
1229  store(i + SIMDSIZE, load(i + SIMDSIZE) - it.load());
1230  it += SIMDSIZE;
1231  store(i + SIMDSIZE * 2UL, load(i + SIMDSIZE * 2UL) - it.load());
1232  it += SIMDSIZE;
1233  store(i + SIMDSIZE * 3UL, load(i + SIMDSIZE * 3UL) - it.load());
1234  it += SIMDSIZE;
1235  }
1236  for (; i < ipos; i += SIMDSIZE, it += SIMDSIZE) {
1237  store(i, load(i) - it.load());
1238  }
1239  for (; i < size_; ++i, ++it) {
1240  // clang-tidy: do not use pointer arithmetic
1241  v_[i] -= *it; // NOLINT
1242  }
1243 }
1244 
1245 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
1246 template <typename VT>
1248  const blaze::SparseVector<VT, TF>& rhs) {
1249  BLAZE_INTERNAL_ASSERT(size_ == (~rhs).size(), "Invalid vector sizes");
1250 
1251  for (blaze_const_iterator_t<VT> element = (~rhs).begin();
1252  element != (~rhs).end(); ++element) {
1253  v_[element->index()] -= element->value();
1254  }
1255 }
1256 
1257 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
1258 template <typename VT>
1259 inline std::enable_if_t<
1261  Type, AF, PF, TF, ExprResultType>::BLAZE_TEMPLATE
1262  VectorizedMultAssign<VT>::value)>
1264  const blaze::DenseVector<VT, TF>& rhs) {
1265  BLAZE_INTERNAL_ASSERT(size_ == (~rhs).size(), "Invalid vector sizes");
1266 
1267  const size_t ipos(size_ & size_t(-2));
1268  BLAZE_INTERNAL_ASSERT((size_ - (size_ % 2UL)) == ipos,
1269  "Invalid end calculation");
1270 
1271  for (size_t i = 0UL; i < ipos; i += 2UL) {
1272  // clang-tidy: do not use pointer arithmetic
1273  v_[i] *= (~rhs)[i]; // NOLINT
1274  v_[i + 1UL] *= (~rhs)[i + 1UL]; // NOLINT
1275  }
1276  if (ipos < (~rhs).size()) {
1277  // clang-tidy: do not use pointer arithmetic
1278  v_[ipos] *= (~rhs)[ipos]; // NOLINT
1279  }
1280 }
1281 
1282 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
1283 template <typename VT>
1285  BLAZE_TEMPLATE VectorizedMultAssign<VT>::value)>
1287  const blaze::DenseVector<VT, TF>& rhs) {
1288  BLAZE_CONSTRAINT_MUST_BE_VECTORIZABLE_TYPE(Type);
1289 
1290  BLAZE_INTERNAL_ASSERT(size_ == (~rhs).size(), "Invalid vector sizes");
1291 
1292  const size_t ipos(size_ & size_t(-SIMDSIZE));
1293  BLAZE_INTERNAL_ASSERT((size_ - (size_ % SIMDSIZE)) == ipos,
1294  "Invalid end calculation");
1295 
1296  const size_t i4way(size_ & size_t(-SIMDSIZE * 4));
1297  BLAZE_INTERNAL_ASSERT((size_ - (size_ % (SIMDSIZE * 4UL))) == i4way,
1298  "Invalid end calculation");
1299  BLAZE_INTERNAL_ASSERT(i4way <= ipos, "Invalid end calculation");
1300 
1301  size_t i(0UL);
1302  blaze_const_iterator_t<VT> it((~rhs).begin());
1303 
1304  for (; i < i4way; i += SIMDSIZE * 4UL) {
1305  store(i, load(i) * it.load());
1306  it += SIMDSIZE;
1307  store(i + SIMDSIZE, load(i + SIMDSIZE) * it.load());
1308  it += SIMDSIZE;
1309  store(i + SIMDSIZE * 2UL, load(i + SIMDSIZE * 2UL) * it.load());
1310  it += SIMDSIZE;
1311  store(i + SIMDSIZE * 3UL, load(i + SIMDSIZE * 3UL) * it.load());
1312  it += SIMDSIZE;
1313  }
1314  for (; i < ipos; i += SIMDSIZE, it += SIMDSIZE) {
1315  store(i, load(i) * it.load()); // LCOV_EXCL_LINE
1316  }
1317  for (; i < size_; ++i, ++it) {
1318  // clang-tidy: do not use pointer arithmetic
1319  v_[i] *= *it; // NOLINT
1320  }
1321 }
1322 
1323 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
1324 template <typename VT>
1326  const blaze::SparseVector<VT, TF>& rhs) {
1327  BLAZE_INTERNAL_ASSERT(size_ == (~rhs).size(), "Invalid vector sizes");
1328 
1329  const blaze::DynamicVector<Type, TF> tmp(serial(*this));
1330  reset();
1331  for (blaze_const_iterator_t<VT> element = (~rhs).begin();
1332  element != (~rhs).end(); ++element) {
1333  v_[element->index()] = tmp[element->index()] * element->value();
1334  }
1335 }
1336 
1337 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
1338 template <typename VT>
1339 inline std::enable_if_t<
1341  Type, AF, PF, TF, ExprResultType>::BLAZE_TEMPLATE
1342  VectorizedDivAssign<VT>::value)>
1344  const blaze::DenseVector<VT, TF>& rhs) {
1345  BLAZE_INTERNAL_ASSERT(size_ == (~rhs).size(), "Invalid vector sizes");
1346 
1347  const size_t ipos(size_ & size_t(-2));
1348  BLAZE_INTERNAL_ASSERT((size_ - (size_ % 2UL)) == ipos,
1349  "Invalid end calculation");
1350 
1351  for (size_t i = 0UL; i < ipos; i += 2UL) {
1352  // clang-tidy: do not use pointer arithmetic
1353  v_[i] /= (~rhs)[i]; // NOLINT
1354  v_[i + 1UL] /= (~rhs)[i + 1UL]; // NOLINT
1355  }
1356  if (ipos < (~rhs).size()) {
1357  // clang-tidy: do not use pointer arithmetic
1358  v_[ipos] /= (~rhs)[ipos]; // NOLINT
1359  }
1360 }
1361 
1362 template <typename Type, bool AF, bool PF, bool TF, typename ExprResultType>
1363 template <typename VT>
1365  BLAZE_TEMPLATE VectorizedDivAssign<VT>::value)>
1367  const blaze::DenseVector<VT, TF>& rhs) {
1368  BLAZE_CONSTRAINT_MUST_BE_VECTORIZABLE_TYPE(Type);
1369 
1370  BLAZE_INTERNAL_ASSERT(size_ == (~rhs).size(), "Invalid vector sizes");
1371 
1372  const size_t ipos(size_ & size_t(-SIMDSIZE));
1373  BLAZE_INTERNAL_ASSERT((size_ - (size_ % SIMDSIZE)) == ipos,
1374  "Invalid end calculation");
1375 
1376  const size_t i4way(size_ & size_t(-SIMDSIZE * 4));
1377  BLAZE_INTERNAL_ASSERT((size_ - (size_ % (SIMDSIZE * 4UL))) == i4way,
1378  "Invalid end calculation");
1379  BLAZE_INTERNAL_ASSERT(i4way <= ipos, "Invalid end calculation");
1380 
1381  size_t i(0UL);
1382  blaze_const_iterator_t<VT> it((~rhs).begin());
1383 
1384  for (; i < i4way; i += SIMDSIZE * 4UL) {
1385  store(i, load(i) / it.load());
1386  it += SIMDSIZE;
1387  store(i + SIMDSIZE, load(i + SIMDSIZE) / it.load());
1388  it += SIMDSIZE;
1389  store(i + SIMDSIZE * 2UL, load(i + SIMDSIZE * 2UL) / it.load());
1390  it += SIMDSIZE;
1391  store(i + SIMDSIZE * 3UL, load(i + SIMDSIZE * 3UL) / it.load());
1392  it += SIMDSIZE;
1393  }
1394  for (; i < ipos; i += SIMDSIZE, it += SIMDSIZE) {
1395  store(i, load(i) / it.load()); // LCOV_EXCL_LINE
1396  }
1397  for (; i < size_; ++i, ++it) {
1398  // clang-tidy: do not use pointer arithmetic
1399  v_[i] /= *it; // NOLINT
1400  }
1401 }
1402 /// \endcond
1403 
1404 // There is a bug either in Blaze or in vector intrinsics implementation in GCC
1405 // that results in _mm_set1_epi64 not being callable with an `unsigned long`.
1406 // The way to work around this is to use a forwarding reference (which is super
1407 // aggressive and matches everything), convert the exponent to a double, and
1408 // then call the double pow.
1409 template <
1410  typename Type, bool AF, bool PF, bool TF, typename ExprResultType,
1411  typename T,
1414  T&& exponent) noexcept {
1415  using ReturnType =
1416  const blaze::DVecMapExpr<PointerVector<Type, AF, PF, TF, ExprResultType>,
1417  BlazePow<double>, TF>;
1418  return ReturnType(t, BlazePow<double>{static_cast<double>(exponent)});
1419 }
1420 
1421 /*!
1422  * \brief Generates the `OP` assignment operator for the type `TYPE`
1423  *
1424  * For example, if `OP` is `+=` and `TYPE` is `DataVector` then this will add
1425  * `+=` for `DataVector` on the RHS, `blaze::DenseVector` on the RHS, and
1426  * `ElementType` (`double` for `DataVector`) on the RHS. This macro is used in
1427  * the cases where the new vector type inherits from `PointerVector` with a
1428  * custom `ExprResultType`.
1429  */
1430 #define MAKE_MATH_ASSIGN_EXPRESSION_POINTERVECTOR(OP, TYPE) \
1431  TYPE& operator OP(const TYPE& rhs) noexcept { \
1432  /* clang-tidy: parens around OP */ \
1433  ~*this OP ~rhs; /* NOLINT */ \
1434  return *this; \
1435  } \
1436  /* clang-tidy: parens around TYPE */ \
1437  template <typename VT, bool VF> \
1438  TYPE& operator OP(const blaze::DenseVector<VT, VF>& rhs) /* NOLINT */ \
1439  noexcept { \
1440  ~*this OP rhs; \
1441  return *this; \
1442  } \
1443  /* clang-tidy: parens around TYPE */ \
1444  TYPE& operator OP(const ElementType& rhs) noexcept { /* NOLINT */ \
1445  ~*this OP rhs; \
1446  return *this; \
1447  }
constexpr T step_function(const T &arg) noexcept
Defines the Heaviside step function for arithmetic types. .
Definition: Math.hpp:62
Definition: PointerVector.hpp:393
Definition: PointerVector.hpp:418
Includes Blaze library with specific configs.
#define ASSERT(a, m)
Assert that an expression should be true.
Definition: Assert.hpp:49
Definition: PointerVector.hpp:349
Definition: ComplexDataVector.hpp:56
Definition: PointerVector.hpp:124
A raw pointer endowed with expression template support via the Blaze library.
Definition: PointerVector.hpp:496
Defines macro ASSERT.
Definition: PointerVector.hpp:317
Tensor< T, Symmetry<>, index_list<> > Scalar
Scalar type.
Definition: TypeAliases.hpp:21
decltype(auto) constexpr pow(const T &t) noexcept
Compute t^N where N is an integer (positive or negative)
Definition: ConstantExpressions.hpp:157
constexpr T & at(std::array< T, N > &arr, Size index)
Retrieve a entry from a container, with checks in Debug mode that the index being retrieved is valid...
Definition: Gsl.hpp:124