Line data Source code
1 0 : // Distributed under the MIT License.
2 : // See LICENSE.txt for details.
3 :
4 : #pragma once
5 :
6 : #include <cstddef>
7 : #include <limits>
8 : #include <optional>
9 : #include <pup.h>
10 : #include <string>
11 : #include <type_traits>
12 : #include <unordered_map>
13 : #include <utility>
14 : #include <vector>
15 :
16 : #include "DataStructures/DataBox/DataBox.hpp"
17 : #include "DataStructures/DataBox/TagName.hpp"
18 : #include "DataStructures/DataVector.hpp"
19 : #include "Domain/Structure/Element.hpp"
20 : #include "Domain/Structure/ElementId.hpp"
21 : #include "Domain/Tags.hpp"
22 : #include "IO/Observer/Helpers.hpp"
23 : #include "IO/Observer/ObservationId.hpp"
24 : #include "IO/Observer/ObserverComponent.hpp"
25 : #include "IO/Observer/ReductionActions.hpp"
26 : #include "IO/Observer/Tags.hpp"
27 : #include "IO/Observer/TypeOfObservation.hpp"
28 : #include "Options/Auto.hpp"
29 : #include "Options/String.hpp"
30 : #include "Parallel/GlobalCache.hpp"
31 : #include "Parallel/Info.hpp"
32 : #include "Parallel/Invoke.hpp"
33 : #include "Parallel/Local.hpp"
34 : #include "Parallel/MemoryMonitor/MemoryMonitor.hpp"
35 : #include "Parallel/MemoryMonitor/Tags.hpp"
36 : #include "Parallel/Reduction.hpp"
37 : #include "Parallel/TypeTraits.hpp"
38 : #include "ParallelAlgorithms/Actions/MemoryMonitor/ProcessArray.hpp"
39 : #include "ParallelAlgorithms/Actions/MemoryMonitor/ProcessGroups.hpp"
40 : #include "ParallelAlgorithms/Actions/MemoryMonitor/ProcessSingleton.hpp"
41 : #include "ParallelAlgorithms/EventsAndTriggers/Event.hpp"
42 : #include "Utilities/ErrorHandling/Error.hpp"
43 : #include "Utilities/Functional.hpp"
44 : #include "Utilities/Serialization/CharmPupable.hpp"
45 : #include "Utilities/Serialization/Serialize.hpp"
46 : #include "Utilities/TMPL.hpp"
47 :
48 : /// \cond
49 : namespace Parallel::Algorithms {
50 : struct Array;
51 : struct Group;
52 : struct Nodegroup;
53 : struct Singleton;
54 : } // namespace Parallel::Algorithms
55 : /// \endcond
56 :
57 : namespace Events {
58 : /*!
59 : * \brief Event run on the DgElementArray that will monitor the memory usage of
60 : * parallel components in megabytes.
61 : *
62 : * \details Given a list of parallel component names from Options, this will
63 : * calculate the memory usage of each component and write it to disk in the
64 : * reductions file under the `/MemoryMonitors/` group. The name of each file is
65 : * the `pretty_type::name` of each parallel component.
66 : *
67 : * The parallel components available to monitor are the ones defined in the
68 : * `component_list` type alias in the metavariables. In addition to these
69 : * components, you can also monitor the size of the GlobalCache. To see which
70 : * parallel components are available to monitor, request to monitor an invalid
71 : * parallel component ("Blah" for example) in the input file. An ERROR will
72 : * occur and a list of the available components to monitor will be printed.
73 : *
74 : * \note Currently, the only Parallel::Algorithms::Array parallel component that
75 : * can be monitored is the DgElementArray itself.
76 : */
77 :
78 : template <size_t Dim>
79 1 : class MonitorMemory : public Event {
80 : private:
81 : // Reduction data for arrays
82 0 : using ReductionData = Parallel::ReductionData<
83 : // Time
84 : Parallel::ReductionDatum<double, funcl::AssertEqual<>>,
85 : // Vector of total mem usage on each node
86 : Parallel::ReductionDatum<std::vector<double>,
87 : funcl::ElementWise<funcl::Plus<>>>>;
88 :
89 : public:
90 0 : explicit MonitorMemory(CkMigrateMessage* msg);
91 : using PUP::able::register_constructor;
92 0 : WRAPPED_PUPable_decl_template(MonitorMemory); // NOLINT
93 :
94 0 : struct ComponentsToMonitor {
95 0 : using type =
96 : Options::Auto<std::vector<std::string>, Options::AutoLabel::All>;
97 0 : static constexpr Options::String help = {
98 : "Names of parallel components to monitor the memory usage of. If you'd "
99 : "like to monitor all available parallel components, pass 'All' "
100 : "instead."};
101 : };
102 :
103 0 : using options = tmpl::list<ComponentsToMonitor>;
104 :
105 0 : static constexpr Options::String help =
106 : "Observe memory usage of parallel components.";
107 :
108 0 : MonitorMemory() = default;
109 :
110 : template <typename Metavariables>
111 0 : MonitorMemory(
112 : const std::optional<std::vector<std::string>>& components_to_monitor,
113 : const Options::Context& context, Metavariables /*meta*/);
114 :
115 0 : using observed_reduction_data_tags =
116 : observers::make_reduction_data_tags<tmpl::list<ReductionData>>;
117 :
118 0 : using compute_tags_for_observation_box = tmpl::list<>;
119 :
120 0 : using return_tags = tmpl::list<>;
121 0 : using argument_tags = tmpl::list<domain::Tags::Element<Dim>>;
122 :
123 : template <typename Metavariables, typename ArrayIndex,
124 : typename ParallelComponent>
125 0 : void operator()(const ::Element<Dim>& element,
126 : Parallel::GlobalCache<Metavariables>& cache,
127 : const ArrayIndex& array_index,
128 : const ParallelComponent* const /*meta*/,
129 : const ObservationValue& observation_value) const;
130 :
131 0 : using observation_registration_tags = tmpl::list<>;
132 :
133 : std::optional<
134 : std::pair<observers::TypeOfObservation, observers::ObservationKey>>
135 0 : get_observation_type_and_key_for_registration() const {
136 : return {};
137 : }
138 :
139 0 : using is_ready_argument_tags = tmpl::list<>;
140 :
141 : template <typename Metavariables, typename ArrayIndex, typename Component>
142 0 : bool is_ready(Parallel::GlobalCache<Metavariables>& /*cache*/,
143 : const ArrayIndex& /*array_index*/,
144 : const Component* const /*meta*/) const {
145 : return true;
146 : }
147 :
148 1 : bool needs_evolved_variables() const override { return false; }
149 :
150 : // NOLINTNEXTLINE(google-runtime-references)
151 0 : void pup(PUP::er& p) override;
152 :
153 : private:
154 0 : std::unordered_set<std::string> components_to_monitor_{};
155 : };
156 :
157 : /// \cond
158 : template <size_t Dim>
159 : MonitorMemory<Dim>::MonitorMemory(CkMigrateMessage* msg) : Event(msg) {}
160 :
161 : template <size_t Dim>
162 : template <typename Metavariables>
163 : MonitorMemory<Dim>::MonitorMemory(
164 : const std::optional<std::vector<std::string>>& components_to_monitor,
165 : const Options::Context& context, Metavariables /*meta*/) {
166 : using component_list = tmpl::push_back<typename Metavariables::component_list,
167 : Parallel::GlobalCache<Metavariables>>;
168 : std::unordered_map<std::string, std::string> existing_components{};
169 : std::string str_component_list{};
170 :
171 : tmpl::for_each<component_list>(
172 : [&existing_components, &str_component_list](auto component_v) {
173 : using component = tmpl::type_from<decltype(component_v)>;
174 : const std::string component_name = pretty_type::name<component>();
175 : const std::string chare_type_name =
176 : pretty_type::name<typename component::chare_type>();
177 : existing_components[component_name] = chare_type_name;
178 : // Only Array we can monitor is DgElementArray
179 : if (chare_type_name != "Array") {
180 : str_component_list += " - " + component_name + "\n";
181 : } else if (component_name == "DgElementArray") {
182 : str_component_list += " - " + component_name + "\n";
183 : }
184 : });
185 :
186 : // A list of names was specified
187 : if (components_to_monitor.has_value()) {
188 : for (const auto& component : *components_to_monitor) {
189 : // Do some checks:
190 : // 1. Make sure the components requested are viable components. This
191 : // protects against spelling errors.
192 : // 2. Currently the only charm Array you can monitor the memory of is
193 : // the DgElementArray so enforce this.
194 : if (existing_components.count(component) != 1) {
195 : PARSE_ERROR(
196 : context,
197 : "Cannot monitor memory usage of unknown parallel component '"
198 : << component
199 : << "'. Please choose from the existing parallel components:\n"
200 : << str_component_list);
201 : } else if (existing_components.at(component) == "Array" and
202 : component != "DgElementArray") {
203 : PARSE_ERROR(
204 : context,
205 : "Cannot monitor the '"
206 : << component
207 : << "' parallel component. Currently, the only Array parallel "
208 : "component allowed to be monitored is the "
209 : "DgElementArray.");
210 : }
211 :
212 : components_to_monitor_.insert(component);
213 : }
214 : } else {
215 : // 'All' was specified. Filter out Array components that are not the
216 : // DgElementArray
217 : for (const auto& [name, chare] : existing_components) {
218 : if (chare != "Array") {
219 : components_to_monitor_.insert(name);
220 : } else if (name == "DgElementArray") {
221 : components_to_monitor_.insert(name);
222 : }
223 : }
224 : }
225 : }
226 :
227 : template <size_t Dim>
228 : template <typename Metavariables, typename ArrayIndex,
229 : typename ParallelComponent>
230 : void MonitorMemory<Dim>::operator()(
231 : const ::Element<Dim>& element, Parallel::GlobalCache<Metavariables>& cache,
232 : const ArrayIndex& array_index, const ParallelComponent* const /*meta*/,
233 : const ObservationValue& observation_value) const {
234 : using component_list = tmpl::push_back<typename Metavariables::component_list,
235 : Parallel::GlobalCache<Metavariables>>;
236 :
237 : tmpl::for_each<component_list>([this, &observation_value, &element, &cache,
238 : &array_index](auto component_v) {
239 : using component = tmpl::type_from<decltype(component_v)>;
240 :
241 : // If we aren't monitoring this parallel component, then just exit now
242 : if (components_to_monitor_.count(pretty_type::name<component>()) != 1) {
243 : return;
244 : }
245 :
246 : // Certain components only need to be triggered once, so we have a special
247 : // element designated to be the one that triggers memory monitoring, the
248 : // 0th element.
249 : const auto& element_id = element.id();
250 : // Avoid GCC-7 compiler warning about unused variable (in the Array if
251 : // constexpr branch)
252 : [[maybe_unused]] const bool designated_element =
253 : is_zeroth_element(element_id);
254 :
255 : // If this is an array, this is run on every element. It has already
256 : // been asserted in the constructor that the only Array the MemoryMonitor
257 : // can monitor is the DgElementArray itself. If you want to monitor other
258 : // Arrays, the implementation will need to be generalized.
259 : if constexpr (Parallel::is_array_v<component>) {
260 : auto& memory_monitor_proxy = Parallel::get_parallel_component<
261 : mem_monitor::MemoryMonitor<Metavariables>>(cache);
262 : auto array_element_proxy =
263 : Parallel::get_parallel_component<component>(cache)[array_index];
264 : const double size_in_bytes = static_cast<double>(
265 : size_of_object_in_bytes(*Parallel::local(array_element_proxy)));
266 : const double size_in_megabytes = size_in_bytes / 1.0e6;
267 :
268 : // vector the size of the number of nodes we are running on. Set the
269 : // 'my_node'th element of the vector to the size of this Element. Then
270 : // when we reduce, we will have a vector with 'num_nodes' elements, each
271 : // of which represents the total memory usage of all Elements on that
272 : // node.
273 : const size_t num_nodes = Parallel::number_of_nodes<size_t>(
274 : *Parallel::local(array_element_proxy));
275 : const size_t my_node =
276 : Parallel::my_node<size_t>(*Parallel::local(array_element_proxy));
277 : std::vector<double> data(num_nodes, 0.0);
278 : data[my_node] = size_in_megabytes;
279 :
280 : Parallel::contribute_to_reduction<
281 : mem_monitor::ProcessArray<ParallelComponent>>(
282 : ReductionData{observation_value.value, data}, array_element_proxy,
283 : memory_monitor_proxy);
284 : } else if constexpr (Parallel::is_singleton_v<component>) {
285 : // If this is a singleton, we only run this once so use the designated
286 : // element. Nothing to reduce with singletons so just call the simple
287 : // action on the singleton
288 : if (designated_element) {
289 : auto& singleton_proxy =
290 : Parallel::get_parallel_component<component>(cache);
291 :
292 : Parallel::simple_action<mem_monitor::ProcessSingleton>(
293 : singleton_proxy, observation_value.value);
294 : }
295 : } else if constexpr (Parallel::is_nodegroup_v<component> or
296 : Parallel::is_group_v<component>) {
297 : // If this is a (node)group, call a simple action on each branch if on
298 : // the designated element
299 : if (designated_element) {
300 : // Can't run simple actions on the cache so broadcast a specific entry
301 : // method that will calculate the size and send it to the memory
302 : // monitor
303 : if constexpr (std::is_same_v<component,
304 : Parallel::GlobalCache<Metavariables>>) {
305 : auto cache_proxy = cache.get_this_proxy();
306 :
307 : // This will be called on all branches of the GlobalCache
308 : cache_proxy.compute_size_for_memory_monitor(observation_value.value);
309 : } else {
310 : // Groups and nodegroups share an action
311 : auto& group_proxy =
312 : Parallel::get_parallel_component<component>(cache);
313 :
314 : // This will be called on all branches of the (node)group
315 : Parallel::simple_action<mem_monitor::ProcessGroups>(
316 : group_proxy, observation_value.value);
317 : }
318 : }
319 : }
320 : });
321 : }
322 :
323 : template <size_t Dim>
324 : void MonitorMemory<Dim>::pup(PUP::er& p) {
325 : Event::pup(p);
326 : p | components_to_monitor_;
327 : }
328 :
329 : template <size_t Dim>
330 : PUP::able::PUP_ID MonitorMemory<Dim>::my_PUP_ID = 0; // NOLINT
331 : /// \endcond
332 :
333 : } // namespace Events
|