Line data Source code
1 0 : // Distributed under the MIT License.
2 : // See LICENSE.txt for details.
3 :
4 : #pragma once
5 :
6 : #include <algorithm>
7 : #include <cstddef>
8 : #include <ios>
9 : #include <optional>
10 : #include <pup.h>
11 : #include <set>
12 : #include <sstream>
13 : #include <string>
14 : #include <type_traits>
15 : #include <unordered_set>
16 : #include <utility>
17 :
18 : #include "DataStructures/DataBox/DataBox.hpp"
19 : #include "Options/Auto.hpp"
20 : #include "Options/Context.hpp"
21 : #include "Options/ParseError.hpp"
22 : #include "Options/String.hpp"
23 : #include "Parallel/Algorithms/AlgorithmSingletonDeclarations.hpp"
24 : #include "Parallel/Info.hpp"
25 : #include "Parallel/ParallelComponentHelpers.hpp"
26 : #include "Parallel/Printf/Printf.hpp"
27 : #include "Parallel/TypeTraits.hpp"
28 : #include "Utilities/ErrorHandling/Assert.hpp"
29 : #include "Utilities/ErrorHandling/Error.hpp"
30 : #include "Utilities/Numeric.hpp"
31 : #include "Utilities/PrettyType.hpp"
32 : #include "Utilities/Serialization/PupStlCpp17.hpp"
33 : #include "Utilities/StdHelpers.hpp"
34 : #include "Utilities/System/ParallelInfo.hpp"
35 : #include "Utilities/TMPL.hpp"
36 : #include "Utilities/TaggedTuple.hpp"
37 : #include "Utilities/TypeTraits/CreateHasTypeAlias.hpp"
38 :
39 : /// \cond
40 : namespace Parallel::Tags {
41 : template <typename Component>
42 : struct SingletonInfo;
43 : struct AvoidGlobalProc0;
44 : template <typename Metavariables>
45 : struct ResourceInfo;
46 : } // namespace Parallel::Tags
47 : /// \endcond
48 :
49 : namespace Parallel {
50 : /*!
51 : * \ingroup ParallelGroup
52 : * \brief Holds resource info for a single singleton component
53 : *
54 : * \details Holds what proc the singleton is to be placed on and whether that
55 : * proc should be exclusive, i.e. no array component elements or other
56 : * singletons placed on that proc. Instead of specifying a proc, the proc can be
57 : * chosen automatically by using the `Options::Auto` option.
58 : *
59 : * The template parameter `Component` is only used to identify which singleton
60 : * component this SingletonInfoHolder belongs to.
61 : */
62 : template <typename Component>
63 1 : struct SingletonInfoHolder {
64 0 : struct Proc {
65 0 : using type = Options::Auto<int>;
66 0 : static constexpr Options::String help = {
67 : "Proc to put singleton on. This can be determined automatically if "
68 : "desired by specifying 'Auto' (without quotes)."};
69 : };
70 :
71 0 : struct Exclusive {
72 0 : using type = bool;
73 0 : static constexpr Options::String help = {
74 : "Reserve this proc for this singleton. No array component elements or "
75 : "other singleton components will be placed on this proc."};
76 : };
77 :
78 0 : using options = tmpl::list<Proc, Exclusive>;
79 0 : static constexpr Options::String help = {
80 : "Resource options for a single singleton."};
81 :
82 0 : SingletonInfoHolder(std::optional<int> input_proc, const bool input_exclusive,
83 : const Options::Context& context = {})
84 : : exclusive_(input_exclusive) {
85 : // If there is no value, we don't need to error so use 0 as a comparator
86 : // in both cases
87 : if (input_proc.value_or(0) < 0) {
88 : PARSE_ERROR(
89 : context,
90 : "Proc must be a non-negative integer. Please choose another proc.");
91 : }
92 :
93 : proc_ = input_proc.has_value()
94 : ? std::optional<size_t>(static_cast<size_t>(input_proc.value()))
95 : : std::nullopt;
96 : }
97 :
98 0 : SingletonInfoHolder() = default;
99 0 : SingletonInfoHolder(const SingletonInfoHolder& /*rhs*/) = default;
100 0 : SingletonInfoHolder& operator=(const SingletonInfoHolder& /*rhs*/) = default;
101 0 : SingletonInfoHolder(SingletonInfoHolder&& /*rhs*/) = default;
102 0 : SingletonInfoHolder& operator=(SingletonInfoHolder&& /*rhs*/) = default;
103 0 : ~SingletonInfoHolder() = default;
104 :
105 : // NOLINTNEXTLINE(google-runtime-references)
106 0 : void pup(PUP::er& p) {
107 : p | proc_;
108 : p | exclusive_;
109 : };
110 :
111 : /// Proc that singleton is to be placed on. If the optional is a std::nullopt,
112 : /// then the proc should be chosen automatically.
113 1 : std::optional<size_t> proc() const { return proc_; }
114 :
115 : /// Whether or not the singleton wants to be exclusive on the proc.
116 1 : bool is_exclusive() const { return exclusive_; }
117 :
118 : private:
119 : template <typename ParallelComponent>
120 0 : friend bool operator==(const SingletonInfoHolder<ParallelComponent>& lhs,
121 : const SingletonInfoHolder<ParallelComponent>& rhs);
122 : // We use size_t here because we want a non-negative integer, but we use int
123 : // in the option because we want to protect against negative numbers. And a
124 : // negative size_t is actually a really large value (it wraps around)
125 0 : std::optional<size_t> proc_{std::nullopt};
126 0 : bool exclusive_{false};
127 : };
128 :
129 : template <typename ParallelComponent>
130 0 : bool operator==(const SingletonInfoHolder<ParallelComponent>& lhs,
131 : const SingletonInfoHolder<ParallelComponent>& rhs) {
132 : return lhs.proc_ == rhs.proc_ and lhs.exclusive_ == rhs.exclusive_;
133 : }
134 :
135 : template <typename ParallelComponent>
136 0 : bool operator!=(const SingletonInfoHolder<ParallelComponent>& lhs,
137 : const SingletonInfoHolder<ParallelComponent>& rhs) {
138 : return not(lhs == rhs);
139 : }
140 :
141 : template <typename ParallelComponents>
142 0 : struct SingletonPack;
143 :
144 : /*!
145 : * \ingroup ParallelGroup
146 : * \brief Holds options for a group of singleton components.
147 : *
148 : * \details The info for each singleton in the `ParallelComponents` template
149 : * pack is stored in an individual `Parallel::SingletonInfoHolder`.
150 : *
151 : * You can pass `Auto` as an option for each singleton in an input file and each
152 : * singleton will be constructed as a default `Parallel::SingletonInfoHolder`.
153 : */
154 : template <typename... ParallelComponents>
155 1 : struct SingletonPack<tmpl::list<ParallelComponents...>> {
156 : private:
157 : static_assert((Parallel::is_singleton_v<ParallelComponents> and ...),
158 : "At least one of the parallel components passed to "
159 : "SingletonPack is not a Singleton.");
160 0 : using component_list = tmpl::list<ParallelComponents...>;
161 :
162 : template <typename Component>
163 0 : struct LocalTag {
164 0 : using type = SingletonInfoHolder<Component>;
165 : };
166 0 : using local_tags =
167 : tmpl::transform<component_list, tmpl::bind<LocalTag, tmpl::_1>>;
168 :
169 : public:
170 : template <typename Component>
171 0 : struct SingletonOption {
172 0 : using type = Options::Auto<SingletonInfoHolder<Component>>;
173 0 : static std::string name() { return pretty_type::name<Component>(); }
174 0 : static constexpr Options::String help = {
175 : "Resource options for a specific singleton."};
176 : };
177 :
178 0 : using options =
179 : tmpl::transform<component_list, tmpl::bind<SingletonOption, tmpl::_1>>;
180 0 : static constexpr Options::String help = {
181 : "Resource options for all singletons."};
182 :
183 0 : SingletonPack(
184 : const std::optional<
185 : SingletonInfoHolder<ParallelComponents>>&... singleton_info_holders,
186 : const Options::Context& /*context*/ = {})
187 : : procs_(tuples::tagged_tuple_from_typelist<local_tags>(
188 : singleton_info_holders.value_or(
189 : SingletonInfoHolder<ParallelComponents>{})...)) {}
190 :
191 0 : SingletonPack() = default;
192 0 : SingletonPack(const SingletonPack& /*rhs*/) = default;
193 0 : SingletonPack& operator=(const SingletonPack& /*rhs*/) = default;
194 0 : SingletonPack(SingletonPack&& /*rhs*/) = default;
195 0 : SingletonPack& operator=(SingletonPack&& /*rhs*/) = default;
196 0 : ~SingletonPack() = default;
197 :
198 : // NOLINTNEXTLINE(google-runtime-references)
199 0 : void pup(PUP::er& p) { p | procs_; };
200 :
201 : /// Get a const reference to the SingletonInfoHolder for the `Component`
202 : /// singleton
203 : template <typename Component>
204 1 : const auto& get() const {
205 : return tuples::get<LocalTag<Component>>(procs_);
206 : }
207 :
208 : private:
209 : template <typename... Components>
210 0 : friend bool operator==(const SingletonPack<tmpl::list<Components...>>& lhs,
211 : const SingletonPack<tmpl::list<Components...>>& rhs);
212 :
213 0 : tuples::tagged_tuple_from_typelist<local_tags> procs_{};
214 : };
215 :
216 : template <typename... Components>
217 0 : bool operator==(const SingletonPack<tmpl::list<Components...>>& lhs,
218 : const SingletonPack<tmpl::list<Components...>>& rhs) {
219 : return lhs.procs_ == rhs.procs_;
220 : }
221 :
222 : template <typename... Components>
223 0 : bool operator!=(const SingletonPack<tmpl::list<Components...>>& lhs,
224 : const SingletonPack<tmpl::list<Components...>>& rhs) {
225 : return not(lhs == rhs);
226 : }
227 :
228 : namespace detail {
229 : template <typename Metavariables>
230 : using singleton_components =
231 : tmpl::filter<typename Metavariables::component_list,
232 : Parallel::is_singleton<tmpl::_1>>;
233 : } // namespace detail
234 :
235 : /*!
236 : * \ingroup ParallelGroup
237 : * \brief Holds resource info for all singletons and for avoiding placing array
238 : * elements/singletons on the global proc 0.
239 : *
240 : * \details This can be used for placing all singletons in an executable.
241 : *
242 : * If you have no singletons, you'll need the following block in the input file
243 : * (where you can set the value of AvoidGlobalProc0 to true or false):
244 : *
245 : * \code {.yaml}
246 : * ResourceInfo:
247 : * AvoidGlobalProc0: true
248 : * \endcode
249 : *
250 : * If you have singletons, but do not want to assign any of them to a specific
251 : * proc or be exclusive on a proc, you'll need the following block in the input
252 : * file (where you can set the value of AvoidGlobalProc0 to true or false):
253 : *
254 : * \code {.yaml}
255 : * ResourceInfo:
256 : * AvoidGlobalProc0: true
257 : * Singletons: Auto
258 : * \endcode
259 : *
260 : * Otherwise, you will need to specify a block in the input file as below,
261 : * where you will need to specify the options for each singleton:
262 : *
263 : * \code {.yaml}
264 : * ResourceInfo:
265 : * AvoidGlobalProc0: true
266 : * Singletons:
267 : * MySingleton1:
268 : * Proc: 2
269 : * Exclusive: true
270 : * MySingleton2: Auto
271 : * \endcode
272 : *
273 : * where `MySingleton1` is the `pretty_type::name` of the singleton component
274 : * and the options for each singleton are described in
275 : * `Parallel::SingletonInfoHolder` (You can use `Auto` for each singleton that
276 : * you want to have it's proc determined automatically and be non-exclusive,
277 : * like `MySingleton2`).
278 : *
279 : * Several consistency checks are done during option parsing to avoid user
280 : * error. However, some checks can't be done during option parsing because the
281 : * number of nodes/procs is needed to determine if there is an inconsistency.
282 : * These checks are done during runtime, just before the map of singletons is
283 : * created.
284 : *
285 : * To automatically place singletons, we use a custom algorithm that will
286 : * distribute singletons evenly over the number of nodes, and evenly over the
287 : * procs on a node. This will help keep communication costs down by distributing
288 : * the workload over all of the communication cores (one communication core per
289 : * charm node), and ensure that our resources are being maximally utilized (i.e.
290 : * one core doesn't have all the singletons on it).
291 : *
292 : * Defining some terminology for singletons: `requested` means that a specific
293 : * processor was requested in the input file; `auto` means that the processor
294 : * should be chosen automatically; `exclusive` means that no other singletons or
295 : * array elements should be placed on this singleton's processor; `nonexclusive`
296 : * means that you *can* place other singletons or array elements on this
297 : * singleton's processor. The algorithm that distributes the singletons is as
298 : * follows:
299 : *
300 : * 1. Allocate all singletons that `requested` specific processors, both
301 : * `exclusive` and `nonexclusive`. This is done during option parsing.
302 : * 2. Allocate `auto exclusive` singletons, distributing the total number of
303 : * `exclusive` singletons (`auto` + `requested`) as evenly as possibly over
304 : * the number of nodes. We say "as evenly as possible" because this depends
305 : * on the `requested exclusive` singletons. For example, if we have 4 nodes
306 : * and 5 cores per node, the number of `requested exclusive` singletons on
307 : * each node is (0, 1, 4, 1), and we have 3 `auto exclusive` singletons to
308 : * place, the best distribution of `exclusive` singletons we can achieve
309 : * given our constraints is (2, 2, 4, 1). Clearly this is not the *most*
310 : * evenly distributed the `exclusive` singletons could be. However, this *is*
311 : * the most evenly distributed they could be given the starting distribution
312 : * from the input file.
313 : * 3. Allocate `auto nonexclusive` singletons, distributing the total number of
314 : * `nonexclusive` singletons (`auto` + `requested`): First, as evenly as
315 : * possibly over the number of nodes. Then, on each node, distributing the
316 : * singletons as evenly as possibly over the number of processors on that
317 : * node. The same disclaimer about "as evenly as possibly" from the previous
318 : * step applies here.
319 : *
320 : * The goal of this algorithm is to mimic, as best as possible, how a human
321 : * would distribute this workload. It isn't perfect, but is a significant
322 : * improvement over placing singletons on one proc after another starting from
323 : * global proc 0.
324 : */
325 : template <typename Metavariables>
326 1 : struct ResourceInfo {
327 : private:
328 0 : using singletons = detail::singleton_components<Metavariables>;
329 :
330 : template <typename Component>
331 0 : struct LocalTag {
332 : // exclusive, proc
333 0 : using type = std::pair<bool, std::optional<size_t>>;
334 : };
335 0 : using local_tags =
336 : tmpl::transform<singletons, tmpl::bind<LocalTag, tmpl::_1>>;
337 :
338 : public:
339 0 : struct Singletons {
340 0 : using type = Options::Auto<SingletonPack<singletons>>;
341 0 : static constexpr Options::String help = {
342 : "Resource options for all singletons."};
343 : };
344 :
345 0 : struct AvoidGlobalProc0 {
346 0 : using type = bool;
347 0 : static constexpr Options::String help = {
348 : "Whether to avoid placing Array elements or singletons on global proc "
349 : "0."};
350 : };
351 :
352 0 : using options = tmpl::push_front<
353 : tmpl::conditional_t<tmpl::size<singletons>::value != 0,
354 : tmpl::list<Singletons>, tmpl::list<>>,
355 : AvoidGlobalProc0>;
356 :
357 0 : static constexpr Options::String help = {
358 : "Resource options for a simulation. This information will be used when "
359 : "placing Array and Singleton parallel components on the requested "
360 : "resources."};
361 :
362 : /// The main constructor. All other constructors that take options will call
363 : /// this one. This constructor holds all checks able to be done during option
364 : /// parsing.
365 1 : ResourceInfo(const bool avoid_global_proc_0,
366 : const std::optional<SingletonPack<singletons>>& singleton_pack,
367 : const Options::Context& context = {});
368 :
369 : /// This constructor is used when only AvoidGlobalProc0 is specified, but no
370 : /// SingletonInfoHolders are specified. Calls the main constructor with an
371 : /// empty SingletonPack.
372 1 : ResourceInfo(const bool avoid_global_proc_0,
373 : const Options::Context& context = {});
374 :
375 0 : ResourceInfo() = default;
376 0 : ResourceInfo(const ResourceInfo& /*rhs*/) = default;
377 0 : ResourceInfo& operator=(const ResourceInfo& /*rhs*/) = default;
378 0 : ResourceInfo(ResourceInfo&& /*rhs*/) = default;
379 0 : ResourceInfo& operator=(ResourceInfo&& /*rhs*/) = default;
380 0 : ~ResourceInfo() = default;
381 :
382 : // NOLINTNEXTLINE(google-runtime-references)
383 0 : void pup(PUP::er& p);
384 :
385 : /// Returns whether we should avoid placing array elements and singletons on
386 : /// the global zeroth proc. Default `false`.
387 1 : bool avoid_global_proc_0() const { return avoid_global_proc_0_; }
388 :
389 : /// Return a SingletonInfoHolder corresponding to `Component`
390 : template <typename Component>
391 1 : auto get_singleton_info() const;
392 :
393 : /// Returns a `std::unordered_set<size_t>` of processors that array components
394 : /// should avoid placing elements on. This should be passed to the
395 : /// `allocate_array` function of the array component
396 1 : const std::unordered_set<size_t>& procs_to_ignore() const;
397 :
398 : /// Returns a `std::set<size_t>` that has all processors available to put
399 : /// elements on, meaning processors that aren't ignored.
400 1 : const std::set<size_t>& procs_available_for_elements() const;
401 :
402 : /// Returns the proc that the singleton `Component` should be placed on.
403 : template <typename Component>
404 1 : size_t proc_for() const;
405 :
406 : /// \brief Actually builds the singleton map and allocates all the singletons.
407 : ///
408 : /// \details This could be done in the constructor, however, since we need the
409 : /// number of nodes to do some sanity checks, it can't. If an executable is
410 : /// run with the --check-options flag, we will be running on 1 proc and 1 node
411 : /// so some of the checks done in this function would fail. Unfortunately,
412 : /// that means the checks that require knowing the number of nodes now occur
413 : /// at runtime instead of option parsing. This is why the
414 : /// `singleton_map_has_been_set_` bool is necessary and why we check if this
415 : /// function has been called in most other member functions.
416 : ///
417 : /// To avoid a cyclic dependency between the GlobalCache and ResourceInfo, we
418 : /// template this function rather than explicitly use the GlobalCache because
419 : /// the GlobalCache depends on ResourceInfo
420 : ///
421 : /// This function should only be called once.
422 : template <typename Cache>
423 1 : void build_singleton_map(const Cache& cache);
424 :
425 : private:
426 : template <typename Metavars>
427 0 : friend bool operator==(const ResourceInfo<Metavars>& lhs,
428 : const ResourceInfo<Metavars>& rhs);
429 :
430 0 : void singleton_map_not_built() const {
431 : ERROR(
432 : "The singleton map has not been built yet. You must call "
433 : "build_singleton_map() before you call this function.");
434 : }
435 0 : bool avoid_global_proc_0_{false};
436 0 : bool singleton_map_has_been_set_{false};
437 : // These are quantities that we will need for placing singletons which can be
438 : // determined just by option parsing
439 0 : size_t num_exclusive_singletons_{};
440 0 : size_t num_procs_to_ignore_{};
441 0 : size_t num_requested_exclusive_singletons_{};
442 0 : size_t num_requested_nonexclusive_singletons_{};
443 0 : std::unordered_multiset<size_t> requested_nonexclusive_procs_{};
444 : // Procs that are exclusive. These may or may not be specifically requested
445 0 : std::unordered_set<size_t> procs_to_ignore_{};
446 0 : std::set<size_t> procs_available_for_elements_{};
447 : // For each singleton (whether it has a SingletonInfo or not), maps whether
448 : // it's exclusive and what proc it is on.
449 0 : tuples::tagged_tuple_from_typelist<local_tags> singleton_map_{};
450 : };
451 :
452 : template <typename Metavariables>
453 : ResourceInfo<Metavariables>::ResourceInfo(
454 : const bool avoid_global_proc_0,
455 : const std::optional<SingletonPack<singletons>>& opt_singleton_pack,
456 : const Options::Context& context)
457 : : avoid_global_proc_0_(avoid_global_proc_0) {
458 : if (avoid_global_proc_0_) {
459 : procs_to_ignore_.insert(0);
460 : ++num_procs_to_ignore_;
461 : }
462 :
463 : if constexpr (tmpl::size<singletons>::value > 0) {
464 : const auto& singleton_pack =
465 : opt_singleton_pack.value_or(SingletonPack<singletons>{});
466 :
467 : // Procs that were specifically requested. These may or may not be exclusive
468 : std::unordered_multiset<int> requested_procs{};
469 :
470 : [[maybe_unused]] const auto parse_singletons = [this, &context,
471 : &singleton_pack,
472 : &requested_procs](
473 : const auto component_v) {
474 : using component = tmpl::type_from<decltype(component_v)>;
475 : auto& singleton_map = tuples::get<LocalTag<component>>(singleton_map_);
476 :
477 : // This singleton has a SingletonInfoHolder associated with it. Get all
478 : // the info necessary from it
479 : if constexpr (tmpl::list_contains_v<singletons, component>) {
480 : const auto& info_holder = singleton_pack.template get<component>();
481 : // Assign proc. If a specific proc is requested, add it to a map. We'll
482 : // check that exclusive singletons have unique procs once we've gone
483 : // through everything once
484 : const auto proc = info_holder.proc();
485 : singleton_map.second = proc;
486 :
487 : if (proc.has_value()) {
488 : requested_procs.insert(*proc);
489 : }
490 :
491 : if (info_holder.is_exclusive()) {
492 : // Check that no singleton has requested to be on proc 0 while
493 : // AvoidGlobalProc0 is simultaneously true.
494 : if (avoid_global_proc_0_ and proc.has_value() and *proc == 0) {
495 : PARSE_ERROR(
496 : context,
497 : "A singleton has requested to be exclusively on proc 0, "
498 : "but the AvoidGlobalProc0 option is also set to true.");
499 : }
500 :
501 : // This singleton is exclusive so set it.
502 : singleton_map.first = true;
503 : ++num_exclusive_singletons_;
504 : ++num_procs_to_ignore_;
505 : // If it requested a specific proc, ignore it when assigning the rest
506 : // of the singletons
507 : if (proc.has_value()) {
508 : procs_to_ignore_.insert(static_cast<size_t>(*proc));
509 : ++num_requested_exclusive_singletons_;
510 : }
511 : } else {
512 : // This singleton is not exclusive.
513 : singleton_map.first = false;
514 : if (proc.has_value()) {
515 : ++num_requested_nonexclusive_singletons_;
516 : requested_nonexclusive_procs_.insert(static_cast<size_t>(*proc));
517 : }
518 : }
519 : } else {
520 : // This singleton doesn't have a SingletonInfoHolder so it automatically
521 : // isn't exclusive and gets set assigned an automatic proc.
522 : singleton_map.first = false;
523 : // nullopt is a sentinel for auto
524 : singleton_map.second = std::nullopt;
525 : }
526 : };
527 :
528 : // Create a map between each singleton, whether it is exclusive, and which
529 : // proc it wants to be on. Use nullopt as a sentinel for choosing the proc
530 : // automatically.
531 : tmpl::for_each<singletons>(parse_singletons);
532 : [[maybe_unused]] const auto sanity_checks = [this, &context,
533 : &requested_procs](
534 : const auto component_v) {
535 : using component = tmpl::type_from<decltype(component_v)>;
536 : auto& singleton_map = tuples::get<LocalTag<component>>(singleton_map_);
537 :
538 : const bool exclusive = singleton_map.first;
539 : const auto proc = singleton_map.second;
540 :
541 : // Check exclusive singletons that requested to be on a specific proc
542 : // if any other singletons requested to be on the same proc (exclusive
543 : // or not)
544 : if (exclusive and proc.has_value() and requested_procs.count(*proc) > 1) {
545 : PARSE_ERROR(context,
546 : "Two singletons have requested to be on proc "
547 : << proc.value()
548 : << ", but at least one of them has requested to be "
549 : "exclusively on this proc.");
550 : }
551 : };
552 :
553 : // Do some inter-singleton sanity checks to avoid inconsistencies
554 : tmpl::for_each<singletons>(sanity_checks);
555 : }
556 : }
557 :
558 : template <typename Metavariables>
559 : ResourceInfo<Metavariables>::ResourceInfo(const bool avoid_global_proc_0,
560 : const Options::Context& context)
561 : : ResourceInfo(avoid_global_proc_0, std::nullopt, context) {}
562 :
563 : template <typename Metavariables>
564 : void ResourceInfo<Metavariables>::pup(PUP::er& p) {
565 : p | avoid_global_proc_0_;
566 : p | singleton_map_has_been_set_;
567 : p | num_exclusive_singletons_;
568 : p | num_procs_to_ignore_;
569 : p | num_requested_exclusive_singletons_;
570 : p | num_requested_nonexclusive_singletons_;
571 : p | requested_nonexclusive_procs_;
572 : p | procs_to_ignore_;
573 : p | procs_available_for_elements_;
574 : p | singleton_map_;
575 : }
576 :
577 : template <typename Metavariables>
578 : template <typename Component>
579 : auto ResourceInfo<Metavariables>::get_singleton_info() const {
580 : if (not singleton_map_has_been_set_) {
581 : singleton_map_not_built();
582 : }
583 :
584 : const auto& singleton_map = tuples::get<LocalTag<Component>>(singleton_map_);
585 : return SingletonInfoHolder<Component>{
586 : {static_cast<int>(*singleton_map.second)}, singleton_map.first};
587 : }
588 :
589 : template <typename Metavariables>
590 : const std::unordered_set<size_t>& ResourceInfo<Metavariables>::procs_to_ignore()
591 : const {
592 : if (not singleton_map_has_been_set_) {
593 : singleton_map_not_built();
594 : }
595 : return procs_to_ignore_;
596 : }
597 :
598 : template <typename Metavariables>
599 : const std::set<size_t>&
600 : ResourceInfo<Metavariables>::procs_available_for_elements() const {
601 : if (not singleton_map_has_been_set_) {
602 : singleton_map_not_built();
603 : }
604 : return procs_available_for_elements_;
605 : }
606 :
607 : template <typename Metavariables>
608 : template <typename Component>
609 : size_t ResourceInfo<Metavariables>::proc_for() const {
610 : if (not singleton_map_has_been_set_) {
611 : singleton_map_not_built();
612 : }
613 : return *tuples::get<LocalTag<Component>>(singleton_map_).second;
614 : }
615 :
616 : template <typename Metavars>
617 0 : bool operator==(const ResourceInfo<Metavars>& lhs,
618 : const ResourceInfo<Metavars>& rhs) {
619 : return lhs.avoid_global_proc_0_ == rhs.avoid_global_proc_0_ and
620 : lhs.singleton_map_has_been_set_ == rhs.singleton_map_has_been_set_ and
621 : lhs.num_exclusive_singletons_ == rhs.num_exclusive_singletons_ and
622 : lhs.num_procs_to_ignore_ == rhs.num_procs_to_ignore_ and
623 : lhs.num_requested_exclusive_singletons_ ==
624 : rhs.num_requested_exclusive_singletons_ and
625 : lhs.num_requested_nonexclusive_singletons_ ==
626 : rhs.num_requested_nonexclusive_singletons_ and
627 : lhs.requested_nonexclusive_procs_ ==
628 : rhs.requested_nonexclusive_procs_ and
629 : lhs.procs_to_ignore_ == rhs.procs_to_ignore_ and
630 : lhs.procs_available_for_elements_ ==
631 : rhs.procs_available_for_elements_ and
632 : lhs.singleton_map_ == rhs.singleton_map_;
633 : }
634 :
635 : template <typename Metavars>
636 0 : bool operator!=(const ResourceInfo<Metavars>& lhs,
637 : const ResourceInfo<Metavars>& rhs) {
638 : return not(lhs == rhs);
639 : }
640 :
641 : template <typename Metavariables>
642 : template <typename Cache>
643 : void ResourceInfo<Metavariables>::build_singleton_map(const Cache& cache) {
644 : const size_t num_procs = Parallel::number_of_procs<size_t>(cache);
645 : const size_t num_nodes = Parallel::number_of_nodes<size_t>(cache);
646 :
647 : // We don't do procs_to_ignore_.size() here because the auto singletons who
648 : // requested to be exclusive haven't been assigned yet so their procs haven't
649 : // been added to procs_to_ignore_
650 : if (num_procs_to_ignore_ >= num_procs) {
651 : ERROR(
652 : "The total number of cores requested is less than or equal to the "
653 : "number of cores that requested to be exclusive, i.e. without "
654 : "array elements or multiple singletons. The array elements have "
655 : "nowhere to be placed. Number of cores requested: "
656 : << num_procs << ". Number of cores that requested to be exclusive: "
657 : << num_procs_to_ignore_ << ".");
658 : }
659 :
660 : // Check if any singletons that requested to be on specific proc requested to
661 : // be on a proc beyond the last proc.
662 : tmpl::for_each<singletons>([this, &num_procs](const auto component_v) {
663 : using component = tmpl::type_from<decltype(component_v)>;
664 : auto& singleton_map = tuples::get<LocalTag<component>>(singleton_map_);
665 : const auto proc = singleton_map.second;
666 :
667 : if (proc.has_value() and *proc > num_procs - 1) {
668 : ERROR("Singleton " << pretty_type::name<component>()
669 : << " requested to be placed on proc " << *proc
670 : << ", but that proc is beyond the last proc "
671 : << num_procs - 1 << ".");
672 : }
673 : });
674 :
675 : // At this point, all requested singletons have been allocated on their
676 : // desired procs. This leaves just the auto singletons left, both exclusive
677 : // and non-exclusive.
678 :
679 : // First allocate auto exclusive singletons
680 : // This first vector will keep track of the total number of singletons on each
681 : // node so we can spread them out evenly
682 : std::vector<size_t> singletons_on_each_node(num_nodes, 0_st);
683 : // This second vector keeps track of only the auto exclusive singletons on
684 : // each node
685 : std::vector<size_t> auto_exclusive_singletons_on_each_node(num_nodes, 0_st);
686 : // Populate requested exclusive singletons on each node with input options. We
687 : // couldn't have done this in the constructor because we didn't know how many
688 : // nodes there were or how many procs were on each node. We'll do the
689 : // non-exclusive ones later.
690 : tmpl::for_each<singletons>(
691 : [this, &cache, &singletons_on_each_node](const auto component_v) {
692 : using component = tmpl::type_from<decltype(component_v)>;
693 : auto& singleton_map = tuples::get<LocalTag<component>>(singleton_map_);
694 : const bool exclusive = singleton_map.first;
695 : const auto proc = singleton_map.second;
696 :
697 : if (exclusive and proc.has_value()) {
698 : ++singletons_on_each_node[Parallel::node_of<size_t>(*proc, cache)];
699 : }
700 : });
701 :
702 : size_t remaining_auto_exclusive_singletons =
703 : num_exclusive_singletons_ - num_requested_exclusive_singletons_;
704 : // Start with the min number of singletons on a node as our baseline. Then,
705 : // while we still have auto exclusive singletons to place, we loop over all
706 : // nodes and place singletons on nodes with this minimum number. Once all
707 : // nodes have at least this minimum number, we increment the minimum number
708 : // and loop over the nodes again
709 : size_t min_num_singletons_on_a_node = *std::min_element(
710 : singletons_on_each_node.begin(), singletons_on_each_node.end());
711 : while (remaining_auto_exclusive_singletons > 0) {
712 : for (size_t i = 0; i < num_nodes; i++) {
713 : // If this node has more than the minimum number of singletons on it, skip
714 : // it for now
715 : if (singletons_on_each_node[i] > min_num_singletons_on_a_node) {
716 : continue;
717 : }
718 : // Since nodes can have different number of procs, we check that we
719 : // haven't exhausted the number of procs on this node. This check is ok
720 : // right now because we haven't included any nonexclusive singletons in
721 : // singletons_on_each_node yet.
722 : if (not(singletons_on_each_node[i] <
723 : Parallel::procs_on_node<size_t>(i, cache))) {
724 : continue;
725 : }
726 :
727 : ++singletons_on_each_node[i];
728 : ++auto_exclusive_singletons_on_each_node[i];
729 : --remaining_auto_exclusive_singletons;
730 :
731 : // We need to break out of both loops here. Use a goto.
732 : if (remaining_auto_exclusive_singletons == 0) {
733 : goto break_auto_exclusive_loops;
734 : }
735 : } // for (size_t i = 0; i < num_nodes; i++)
736 :
737 : ++min_num_singletons_on_a_node;
738 : } // while (remaining_auto_exclusive_singletons > 0)
739 : break_auto_exclusive_loops:
740 :
741 : ASSERT(remaining_auto_exclusive_singletons == 0,
742 : "Not all exclusive singletons have been allocated. The remaining "
743 : "number of singletons to be allocated is "
744 : << remaining_auto_exclusive_singletons << ".");
745 :
746 : // Actually allocate the auto exclusive singletons
747 : size_t current_node = 0;
748 : tmpl::for_each<singletons>([this, &cache, ¤t_node,
749 : &auto_exclusive_singletons_on_each_node](
750 : const auto component_v) {
751 : using component = tmpl::type_from<decltype(component_v)>;
752 : auto& singleton_map = tuples::get<LocalTag<component>>(singleton_map_);
753 : const bool exclusive = singleton_map.first;
754 : const auto int_proc = singleton_map.second;
755 :
756 : // Only allocating auto exclusive at the moment
757 : if (exclusive and not int_proc.has_value()) {
758 : while (auto_exclusive_singletons_on_each_node[current_node] == 0) {
759 : ++current_node;
760 : }
761 :
762 : size_t proc = Parallel::first_proc_on_node<size_t>(current_node, cache);
763 : // Don't place two exclusive singletons on the same proc, but also if
764 : // a singleton requested a specific proc, whether or not it is
765 : // exclusive, we can't place an exclusive singleton on that proc. That
766 : // defeats the whole purpose of requesting the specific proc...
767 : while (procs_to_ignore_.find(proc) != procs_to_ignore_.end() or
768 : requested_nonexclusive_procs_.count(proc) > 0) {
769 : ++proc;
770 : }
771 :
772 : singleton_map.second = proc;
773 : procs_to_ignore_.insert(proc);
774 :
775 : --auto_exclusive_singletons_on_each_node[current_node];
776 : }
777 : });
778 :
779 : ASSERT(alg::accumulate(auto_exclusive_singletons_on_each_node, 0_st) == 0,
780 : "Not all auto exclusive singletons have been allocated. The remaining "
781 : "number of auto exclusive singletons to be allocated is "
782 : << alg::accumulate(auto_exclusive_singletons_on_each_node, 0_st));
783 :
784 : // procs_to_ignore_ is now complete. Now construct
785 : // procs_available_for_elements_
786 : for (size_t i = 0; i < num_procs; i++) {
787 : if (procs_to_ignore_.find(i) == procs_to_ignore_.end()) {
788 : procs_available_for_elements_.insert(i);
789 : }
790 : }
791 :
792 : // At this point, all auto exclusive singletons have been allocated. Now the
793 : // only singletons left are auto non-exclusive. We use vectors of
794 : // std::optional<size_t> here as sentinels for procs which should be avoided.
795 : // A nullopt means that the proc shouldn't have auto nonexclusive singletons
796 : // on it. When we have lots of cores to run on (hundreds of thousands or even
797 : // millions), these vectors will take up a non-negligible amount of memory.
798 : // However, we only need to do this once an executable at the very beginning
799 : // so it shouldn't really matter
800 : std::vector<std::optional<size_t>> nonexclusive_singletons_on_each_proc(
801 : num_procs, std::optional<size_t>(0_st));
802 : // This vector has the default be nullopt rather than 0, because this will
803 : // only be used when we actually place singletons. We only care which procs
804 : // have singletons, which will usually be a small subset of the total procs.
805 : std::vector<std::optional<size_t>> auto_nonexclusive_singletons_on_each_proc(
806 : num_procs, std::nullopt);
807 : for (const size_t proc : procs_to_ignore_) {
808 : nonexclusive_singletons_on_each_proc[proc] = std::nullopt;
809 : }
810 : // Now we add in the requested nonexclusive to the total number of singletons
811 : // per node
812 : for (const auto& proc : requested_nonexclusive_procs_) {
813 : ++*nonexclusive_singletons_on_each_proc[proc];
814 : ++singletons_on_each_node[Parallel::node_of<size_t>(proc, cache)];
815 : }
816 :
817 : size_t remaining_auto_nonexclusive_singletons =
818 : tmpl::size<singletons>::value - num_exclusive_singletons_ -
819 : num_requested_nonexclusive_singletons_;
820 :
821 : // This serves the same purpose as before
822 : min_num_singletons_on_a_node = *std::min_element(
823 : singletons_on_each_node.begin(), singletons_on_each_node.end());
824 : while (remaining_auto_nonexclusive_singletons > 0) {
825 : for (size_t i = 0; i < num_nodes; i++) {
826 : const int first_proc = Parallel::first_proc_on_node<int>(i, cache);
827 : const int procs_on_node = Parallel::procs_on_node<int>(i, cache);
828 : const int first_proc_next_node = first_proc + procs_on_node;
829 :
830 : auto first_proc_iter =
831 : std::next(nonexclusive_singletons_on_each_proc.begin(), first_proc);
832 : auto first_proc_next_node_iter = std::next(
833 : nonexclusive_singletons_on_each_proc.begin(), first_proc_next_node);
834 :
835 : // Get the proc on this node with the minimum number of singletons. This
836 : // serves the same purpose as min_num_singletons_on_a_node except now for
837 : // procs on a specific node
838 : auto& min_num_singletons_on_a_proc_opt =
839 : *std::min_element(first_proc_iter, first_proc_next_node_iter,
840 : [](const auto& a, const auto& b) {
841 : if (a.has_value() and b.has_value()) {
842 : return a.value() < b.value();
843 : } else {
844 : return a.has_value();
845 : }
846 : });
847 :
848 : // Check if this node can accommodate more singletons. Do two checks:
849 : // 1. This node doesn't have more than the minimum number of singletons
850 : // 2. That this node isn't filled up with exclusive singletons (nullopt =
851 : // all procs on this node are taken)
852 : if (singletons_on_each_node[i] > min_num_singletons_on_a_node or
853 : not min_num_singletons_on_a_proc_opt.has_value()) {
854 : continue;
855 : }
856 :
857 : // At this point, we have guaranteed that this node should have an auto
858 : // nonexclusive singleton on it somewhere. Now determine where
859 : size_t min_num_singletons_on_a_proc =
860 : min_num_singletons_on_a_proc_opt.value();
861 :
862 : // Find the first available proc on this node. Check that
863 : // 1. This proc is available (i.e. no exclusive singletons on it)
864 : // 2. This proc has the minimum number of singletons on it for this node
865 : // so we distribute the singletons evenly over all the procs on this
866 : // node
867 : auto proc_iter =
868 : std::find_if(first_proc_iter, first_proc_next_node_iter,
869 : [&min_num_singletons_on_a_proc](const auto& proc_opt) {
870 : return proc_opt.has_value() and
871 : *proc_opt == min_num_singletons_on_a_proc;
872 : });
873 :
874 : // Get the index of the overall vector. We need this because we're going
875 : // to be indexing two separate vectors, otherwise we could have just used
876 : // the value of the iterator
877 : const size_t proc = static_cast<size_t>(std::distance(
878 : nonexclusive_singletons_on_each_proc.begin(), proc_iter));
879 :
880 : // Increment things
881 : ++*nonexclusive_singletons_on_each_proc[proc];
882 : ++singletons_on_each_node[i];
883 : if (auto_nonexclusive_singletons_on_each_proc[proc].has_value()) {
884 : ++*auto_nonexclusive_singletons_on_each_proc[proc];
885 : } else {
886 : auto_nonexclusive_singletons_on_each_proc[proc] = 1;
887 : }
888 : --remaining_auto_nonexclusive_singletons;
889 :
890 : // We need to break out of both loops here. Use a goto.
891 : if (remaining_auto_nonexclusive_singletons == 0) {
892 : goto break_auto_nonexclusive_loops;
893 : }
894 : } // for (size_t i = 0; i < num_nodes; i++)
895 :
896 : ++min_num_singletons_on_a_node;
897 : } // while (remaining_auto_nonexclusive_singletons > 0)
898 : break_auto_nonexclusive_loops:
899 :
900 : ASSERT(remaining_auto_nonexclusive_singletons == 0,
901 : "Not all nonexclusive singletons have been allocated. The remaining "
902 : "number of singletons to be allocated is "
903 : << remaining_auto_nonexclusive_singletons << ".");
904 :
905 : // Actually allocate the auto nonexclusive singletons
906 : std::stringstream ss;
907 : ss << "\nAllocating Singletons:\n";
908 : size_t current_proc = 0;
909 : tmpl::for_each<singletons>([this, ¤t_proc, &cache, &ss,
910 : &auto_nonexclusive_singletons_on_each_proc](
911 : const auto component_v) {
912 : using component = tmpl::type_from<decltype(component_v)>;
913 : auto& singleton_map = tuples::get<LocalTag<component>>(singleton_map_);
914 : const auto proc_opt = singleton_map.second;
915 :
916 : // At this point, the only singletons that have this are nonexclusive
917 : if (not proc_opt.has_value()) {
918 : while (not auto_nonexclusive_singletons_on_each_proc[current_proc]
919 : .has_value()) {
920 : ++current_proc;
921 : }
922 :
923 : singleton_map.second = current_proc;
924 :
925 : --*auto_nonexclusive_singletons_on_each_proc[current_proc];
926 : // Indicate that there are no more singletons to be placed on this proc
927 : if (*auto_nonexclusive_singletons_on_each_proc[current_proc] == 0) {
928 : auto_nonexclusive_singletons_on_each_proc[current_proc] = std::nullopt;
929 : }
930 : }
931 :
932 : // Print some diagnostic info to stdout for each singleton. This can aid in
933 : // debugging.
934 : ss << pretty_type::name<component>();
935 : ss << " on node " << Parallel::node_of<int>(*singleton_map.second, cache);
936 : ss << ", global proc " << *singleton_map.second;
937 : ss << ", exclusive = " << std::boolalpha << singleton_map.first << "\n";
938 : });
939 :
940 : ss << "\n";
941 : Parallel::printf("%s", ss.str());
942 :
943 : // Now that everything has been set, signal that we don't have to do
944 : // this again.
945 : singleton_map_has_been_set_ = true;
946 : }
947 : } // namespace Parallel
|