Line data Source code
1 1 : // Distributed under the MIT License.
2 : // See LICENSE.txt for details.
3 :
4 : /// \file
5 : /// Defines function transpose
6 :
7 : #pragma once
8 :
9 : #include <cstddef>
10 :
11 : #include "Utilities/ErrorHandling/Assert.hpp"
12 : #include "Utilities/Gsl.hpp"
13 : #include "Utilities/MakeWithValue.hpp"
14 :
15 : namespace detail {
16 : void transpose_impl(double* matrix_transpose, const double* matrix,
17 : int32_t number_of_rows, int32_t number_of_columns);
18 : } // namespace detail
19 :
20 :
21 : /// \ingroup NumericalAlgorithmsGroup
22 : /// \brief Function to compute transposed data.
23 : ///
24 : /// Transpose the data pointed to by `data`, writing the result to the
25 : /// location pointed to by `result`. See the ::transpose function for
26 : /// a safer interface and for the meaning of the other arguments.
27 : // This is not an overload of transpose because overload resolution
28 : // would make non-intuitive choices of the return-by-pointer version
29 : // below over this function.
30 : template <typename T>
31 1 : void raw_transpose(const gsl::not_null<T*> result, const T* const data,
32 : const size_t chunk_size, const size_t number_of_chunks) {
33 : if constexpr (std::is_same_v<double, T>) {
34 : detail::transpose_impl(result, data, static_cast<int32_t>(number_of_chunks),
35 : static_cast<int32_t>(chunk_size));
36 : } else {
37 : // The i outside loop order is faster, but that could be architecture
38 : // dependent and so may need updating in the future. Changing this made the
39 : // logical derivatives in 3D with 50 variables 20% faster.
40 : for (size_t i = 0; i < chunk_size; ++i) {
41 : for (size_t j = 0; j < number_of_chunks; ++j) {
42 : // clang-tidy: pointer arithmetic
43 : result.get()[j + number_of_chunks * i] = // NOLINT
44 : data[i + chunk_size * j]; // NOLINT
45 : }
46 : }
47 : }
48 : }
49 :
50 : /// @{
51 : /// \ingroup NumericalAlgorithmsGroup
52 : /// \brief Function to compute transposed data.
53 : ///
54 : /// The primary use of this function is to rearrange the memory layout so that
55 : /// another function can operate on contiguous chunks of memory.
56 : ///
57 : /// \requires `result.size()` to be the product of `number_of_chunks` and
58 : /// `chunk_size`, `u.size()` to be equal or greater than `result.size()`,
59 : /// and that both `result` and `u` have a `data()` member function.
60 : ///
61 : /// \details The container `u` holds a contiguous array of data,
62 : /// treated as a sequence of `number_of_chunks` contiguous sets of
63 : /// entries of size `chunk_size`. The output `result` has its data
64 : /// arranged such that the first `number_of_chunks` elements in
65 : /// `result` will be the first element of each chunk of `u`. The
66 : /// last `number_of_chunks` elements in `result` will be the last
67 : /// (i.e. `chunk_size`-th) element of each chunk of `u`. If
68 : /// `u.size()` is greater than `result.size()` the extra elements
69 : /// of `u` are ignored.
70 : ///
71 : /// \note This is equivalent to treating the first part of `u` as a matrix and
72 : /// filling `result` (or the returned object) with the transpose of that
73 : /// matrix.
74 : ///
75 : /// If `u` represents a block of data indexed by
76 : /// \f$(x, y, z, \ldots)\f$ with the first index varying fastest,
77 : /// transpose serves to rotate the indices. If the extents are
78 : /// \f$(X, Y, Z, \ldots)\f$, with product \f$N\f$, `transpose(u, X,
79 : /// N/X)` reorders the data to be indexed \f$(y, z, \ldots, x)\f$,
80 : /// `transpose(u, X*Y, N/X/Y)` reorders the data to be indexed
81 : /// \f$(z, \ldots, x, y)\f$, etc.
82 : ///
83 : /// \example
84 : /// \snippet Test_Transpose.cpp transpose_matrix
85 : /// \snippet Test_Transpose.cpp transpose_by_not_null_example
86 : /// \snippet Test_Transpose.cpp return_transpose_example
87 : /// \snippet Test_Transpose.cpp partial_transpose_example
88 : ///
89 : /// \tparam U the type of data to be transposed
90 : /// \tparam T the type of the transposed data
91 : template <typename U, typename T>
92 1 : void transpose(const gsl::not_null<T*> result, const U& u,
93 : const size_t chunk_size, const size_t number_of_chunks) {
94 : ASSERT(chunk_size * number_of_chunks == result->size(),
95 : "chunk_size = " << chunk_size << ", number_of_chunks = "
96 : << number_of_chunks << ", size = " << result->size());
97 : ASSERT(result->size() <= u.size(),
98 : "result.size = " << result->size() << ", u.size = " << u.size());
99 : raw_transpose(make_not_null(result->data()), u.data(), chunk_size,
100 : number_of_chunks);
101 : }
102 :
103 : template <typename U, typename T = U>
104 1 : T transpose(const U& u, const size_t chunk_size,
105 : const size_t number_of_chunks) {
106 : T t = make_with_value<T>(u, 0.0);
107 : transpose(make_not_null(&t), u, chunk_size, number_of_chunks);
108 : return t;
109 : }
110 : /// @}
|