SpECTRE Documentation Coverage Report
Current view: top level - Parallel - ResourceInfo.hpp Hit Total Coverage
Commit: 1f2210958b4f38fdc0400907ee7c6d5af5111418 Lines: 14 92 15.2 %
Date: 2025-12-05 05:03:31
Legend: Lines: hit not hit

          Line data    Source code
       1           0 : // Distributed under the MIT License.
       2             : // See LICENSE.txt for details.
       3             : 
       4             : #pragma once
       5             : 
       6             : #include <algorithm>
       7             : #include <cstddef>
       8             : #include <ios>
       9             : #include <optional>
      10             : #include <pup.h>
      11             : #include <set>
      12             : #include <sstream>
      13             : #include <string>
      14             : #include <type_traits>
      15             : #include <unordered_set>
      16             : #include <utility>
      17             : 
      18             : #include "DataStructures/DataBox/DataBox.hpp"
      19             : #include "Options/Auto.hpp"
      20             : #include "Options/Context.hpp"
      21             : #include "Options/ParseError.hpp"
      22             : #include "Options/String.hpp"
      23             : #include "Parallel/Algorithms/AlgorithmSingletonDeclarations.hpp"
      24             : #include "Parallel/Info.hpp"
      25             : #include "Parallel/ParallelComponentHelpers.hpp"
      26             : #include "Parallel/Printf/Printf.hpp"
      27             : #include "Parallel/TypeTraits.hpp"
      28             : #include "Utilities/ErrorHandling/Assert.hpp"
      29             : #include "Utilities/ErrorHandling/Error.hpp"
      30             : #include "Utilities/Numeric.hpp"
      31             : #include "Utilities/PrettyType.hpp"
      32             : #include "Utilities/Serialization/PupStlCpp17.hpp"
      33             : #include "Utilities/StdHelpers.hpp"
      34             : #include "Utilities/System/ParallelInfo.hpp"
      35             : #include "Utilities/TMPL.hpp"
      36             : #include "Utilities/TaggedTuple.hpp"
      37             : #include "Utilities/TypeTraits/CreateHasTypeAlias.hpp"
      38             : 
      39             : /// \cond
      40             : namespace Parallel::Tags {
      41             : template <typename Component>
      42             : struct SingletonInfo;
      43             : struct AvoidGlobalProc0;
      44             : template <typename Metavariables>
      45             : struct ResourceInfo;
      46             : }  // namespace Parallel::Tags
      47             : /// \endcond
      48             : 
      49             : namespace Parallel {
      50             : /*!
      51             :  * \ingroup ParallelGroup
      52             :  * \brief Holds resource info for a single singleton component
      53             :  *
      54             :  * \details Holds what proc the singleton is to be placed on and whether that
      55             :  * proc should be exclusive, i.e. no array component elements or other
      56             :  * singletons placed on that proc. Instead of specifying a proc, the proc can be
      57             :  * chosen automatically by using the `Options::Auto` option.
      58             :  *
      59             :  * The template parameter `Component` is only used to identify which singleton
      60             :  * component this SingletonInfoHolder belongs to.
      61             :  */
      62             : template <typename Component>
      63           1 : struct SingletonInfoHolder {
      64           0 :   struct Proc {
      65           0 :     using type = Options::Auto<int>;
      66           0 :     static constexpr Options::String help = {
      67             :         "Proc to put singleton on. This can be determined automatically if "
      68             :         "desired by specifying 'Auto' (without quotes)."};
      69             :   };
      70             : 
      71           0 :   struct Exclusive {
      72           0 :     using type = bool;
      73           0 :     static constexpr Options::String help = {
      74             :         "Reserve this proc for this singleton. No array component elements or "
      75             :         "other singleton components will be placed on this proc."};
      76             :   };
      77             : 
      78           0 :   using options = tmpl::list<Proc, Exclusive>;
      79           0 :   static constexpr Options::String help = {
      80             :       "Resource options for a single singleton."};
      81             : 
      82           0 :   SingletonInfoHolder(std::optional<int> input_proc, const bool input_exclusive,
      83             :                       const Options::Context& context = {})
      84             :       : exclusive_(input_exclusive) {
      85             :     // If there is no value, we don't need to error so use 0 as a comparator
      86             :     // in both cases
      87             :     if (input_proc.value_or(0) < 0) {
      88             :       PARSE_ERROR(
      89             :           context,
      90             :           "Proc must be a non-negative integer. Please choose another proc.");
      91             :     }
      92             : 
      93             :     proc_ = input_proc.has_value()
      94             :                 ? std::optional<size_t>(static_cast<size_t>(input_proc.value()))
      95             :                 : std::nullopt;
      96             :   }
      97             : 
      98           0 :   SingletonInfoHolder() = default;
      99           0 :   SingletonInfoHolder(const SingletonInfoHolder& /*rhs*/) = default;
     100           0 :   SingletonInfoHolder& operator=(const SingletonInfoHolder& /*rhs*/) = default;
     101           0 :   SingletonInfoHolder(SingletonInfoHolder&& /*rhs*/) = default;
     102           0 :   SingletonInfoHolder& operator=(SingletonInfoHolder&& /*rhs*/) = default;
     103           0 :   ~SingletonInfoHolder() = default;
     104             : 
     105             :   // NOLINTNEXTLINE(google-runtime-references)
     106           0 :   void pup(PUP::er& p) {
     107             :     p | proc_;
     108             :     p | exclusive_;
     109             :   };
     110             : 
     111             :   /// Proc that singleton is to be placed on. If the optional is a std::nullopt,
     112             :   /// then the proc should be chosen automatically.
     113           1 :   std::optional<size_t> proc() const { return proc_; }
     114             : 
     115             :   /// Whether or not the singleton wants to be exclusive on the proc.
     116           1 :   bool is_exclusive() const { return exclusive_; }
     117             : 
     118             :  private:
     119             :   template <typename ParallelComponent>
     120           0 :   friend bool operator==(const SingletonInfoHolder<ParallelComponent>& lhs,
     121             :                          const SingletonInfoHolder<ParallelComponent>& rhs);
     122             :   // We use size_t here because we want a non-negative integer, but we use int
     123             :   // in the option because we want to protect against negative numbers. And a
     124             :   // negative size_t is actually a really large value (it wraps around)
     125           0 :   std::optional<size_t> proc_{std::nullopt};
     126           0 :   bool exclusive_{false};
     127             : };
     128             : 
     129             : template <typename ParallelComponent>
     130           0 : bool operator==(const SingletonInfoHolder<ParallelComponent>& lhs,
     131             :                 const SingletonInfoHolder<ParallelComponent>& rhs) {
     132             :   return lhs.proc_ == rhs.proc_ and lhs.exclusive_ == rhs.exclusive_;
     133             : }
     134             : 
     135             : template <typename ParallelComponent>
     136           0 : bool operator!=(const SingletonInfoHolder<ParallelComponent>& lhs,
     137             :                 const SingletonInfoHolder<ParallelComponent>& rhs) {
     138             :   return not(lhs == rhs);
     139             : }
     140             : 
     141             : template <typename ParallelComponents>
     142           0 : struct SingletonPack;
     143             : 
     144             : /*!
     145             :  * \ingroup ParallelGroup
     146             :  * \brief Holds options for a group of singleton components.
     147             :  *
     148             :  * \details The info for each singleton in the `ParallelComponents` template
     149             :  *  pack is stored in an individual `Parallel::SingletonInfoHolder`.
     150             :  *
     151             :  * You can pass `Auto` as an option for each singleton in an input file and each
     152             :  * singleton will be constructed as a default `Parallel::SingletonInfoHolder`.
     153             :  */
     154             : template <typename... ParallelComponents>
     155           1 : struct SingletonPack<tmpl::list<ParallelComponents...>> {
     156             :  private:
     157             :   static_assert((Parallel::is_singleton_v<ParallelComponents> and ...),
     158             :                 "At least one of the parallel components passed to "
     159             :                 "SingletonPack is not a Singleton.");
     160           0 :   using component_list = tmpl::list<ParallelComponents...>;
     161             : 
     162             :   template <typename Component>
     163           0 :   struct LocalTag {
     164           0 :     using type = SingletonInfoHolder<Component>;
     165             :   };
     166           0 :   using local_tags =
     167             :       tmpl::transform<component_list, tmpl::bind<LocalTag, tmpl::_1>>;
     168             : 
     169             :  public:
     170             :   template <typename Component>
     171           0 :   struct SingletonOption {
     172           0 :     using type = Options::Auto<SingletonInfoHolder<Component>>;
     173           0 :     static std::string name() { return pretty_type::name<Component>(); }
     174           0 :     static constexpr Options::String help = {
     175             :         "Resource options for a specific singleton."};
     176             :   };
     177             : 
     178           0 :   using options =
     179             :       tmpl::transform<component_list, tmpl::bind<SingletonOption, tmpl::_1>>;
     180           0 :   static constexpr Options::String help = {
     181             :       "Resource options for all singletons."};
     182             : 
     183           0 :   SingletonPack(
     184             :       const std::optional<
     185             :           SingletonInfoHolder<ParallelComponents>>&... singleton_info_holders,
     186             :       const Options::Context& /*context*/ = {})
     187             :       : procs_(tuples::tagged_tuple_from_typelist<local_tags>(
     188             :             singleton_info_holders.value_or(
     189             :                 SingletonInfoHolder<ParallelComponents>{})...)) {}
     190             : 
     191           0 :   SingletonPack() = default;
     192           0 :   SingletonPack(const SingletonPack& /*rhs*/) = default;
     193           0 :   SingletonPack& operator=(const SingletonPack& /*rhs*/) = default;
     194           0 :   SingletonPack(SingletonPack&& /*rhs*/) = default;
     195           0 :   SingletonPack& operator=(SingletonPack&& /*rhs*/) = default;
     196           0 :   ~SingletonPack() = default;
     197             : 
     198             :   // NOLINTNEXTLINE(google-runtime-references)
     199           0 :   void pup(PUP::er& p) { p | procs_; };
     200             : 
     201             :   /// Get a const reference to the SingletonInfoHolder for the `Component`
     202             :   /// singleton
     203             :   template <typename Component>
     204           1 :   const auto& get() const {
     205             :     return tuples::get<LocalTag<Component>>(procs_);
     206             :   }
     207             : 
     208             :  private:
     209             :   template <typename... Components>
     210           0 :   friend bool operator==(const SingletonPack<tmpl::list<Components...>>& lhs,
     211             :                          const SingletonPack<tmpl::list<Components...>>& rhs);
     212             : 
     213           0 :   tuples::tagged_tuple_from_typelist<local_tags> procs_{};
     214             : };
     215             : 
     216             : template <typename... Components>
     217           0 : bool operator==(const SingletonPack<tmpl::list<Components...>>& lhs,
     218             :                 const SingletonPack<tmpl::list<Components...>>& rhs) {
     219             :   return lhs.procs_ == rhs.procs_;
     220             : }
     221             : 
     222             : template <typename... Components>
     223           0 : bool operator!=(const SingletonPack<tmpl::list<Components...>>& lhs,
     224             :                 const SingletonPack<tmpl::list<Components...>>& rhs) {
     225             :   return not(lhs == rhs);
     226             : }
     227             : 
     228             : namespace detail {
     229             : template <typename Metavariables>
     230             : using singleton_components =
     231             :     tmpl::filter<typename Metavariables::component_list,
     232             :                  Parallel::is_singleton<tmpl::_1>>;
     233             : }  // namespace detail
     234             : 
     235             : /*!
     236             :  * \ingroup ParallelGroup
     237             :  * \brief Holds resource info for all singletons and for avoiding placing array
     238             :  * elements/singletons on the global proc 0.
     239             :  *
     240             :  * \details This can be used for placing all singletons in an executable.
     241             :  *
     242             :  * If you have no singletons, you'll need the following block in the input file
     243             :  * (where you can set the value of AvoidGlobalProc0 to true or false):
     244             :  *
     245             :  * \code {.yaml}
     246             :  * ResourceInfo:
     247             :  *   AvoidGlobalProc0: true
     248             :  * \endcode
     249             :  *
     250             :  * If you have singletons, but do not want to assign any of them to a specific
     251             :  * proc or be exclusive on a proc, you'll need the following block in the input
     252             :  * file (where you can set the value of AvoidGlobalProc0 to true or false):
     253             :  *
     254             :  * \code {.yaml}
     255             :  * ResourceInfo:
     256             :  *   AvoidGlobalProc0: true
     257             :  *   Singletons: Auto
     258             :  * \endcode
     259             :  *
     260             :  * Otherwise, you will need to specify a block in the input file as below,
     261             :  * where you will need to specify the options for each singleton:
     262             :  *
     263             :  * \code {.yaml}
     264             :  * ResourceInfo:
     265             :  *   AvoidGlobalProc0: true
     266             :  *   Singletons:
     267             :  *     MySingleton1:
     268             :  *       Proc: 2
     269             :  *       Exclusive: true
     270             :  *     MySingleton2: Auto
     271             :  * \endcode
     272             :  *
     273             :  * where `MySingleton1` is the `pretty_type::name` of the singleton component
     274             :  * and the options for each singleton are described in
     275             :  * `Parallel::SingletonInfoHolder` (You can use `Auto` for each singleton that
     276             :  * you want to have it's proc determined automatically and be non-exclusive,
     277             :  * like `MySingleton2`).
     278             :  *
     279             :  * Several consistency checks are done during option parsing to avoid user
     280             :  * error. However, some checks can't be done during option parsing because the
     281             :  * number of nodes/procs is needed to determine if there is an inconsistency.
     282             :  * These checks are done during runtime, just before the map of singletons is
     283             :  * created.
     284             :  *
     285             :  * To automatically place singletons, we use a custom algorithm that will
     286             :  * distribute singletons evenly over the number of nodes, and evenly over the
     287             :  * procs on a node. This will help keep communication costs down by distributing
     288             :  * the workload over all of the communication cores (one communication core per
     289             :  * charm node), and ensure that our resources are being maximally utilized (i.e.
     290             :  * one core doesn't have all the singletons on it).
     291             :  *
     292             :  * Defining some terminology for singletons: `requested` means that a specific
     293             :  * processor was requested in the input file; `auto` means that the processor
     294             :  * should be chosen automatically; `exclusive` means that no other singletons or
     295             :  * array elements should be placed on this singleton's processor; `nonexclusive`
     296             :  * means that you *can* place other singletons or array elements on this
     297             :  * singleton's processor. The algorithm that distributes the singletons is as
     298             :  * follows:
     299             :  *
     300             :  * 1. Allocate all singletons that `requested` specific processors, both
     301             :  *    `exclusive` and `nonexclusive`. This is done during option parsing.
     302             :  * 2. Allocate `auto exclusive` singletons, distributing the total number of
     303             :  *    `exclusive` singletons (`auto` + `requested`) as evenly as possibly over
     304             :  *    the number of nodes. We say "as evenly as possible" because this depends
     305             :  *    on the `requested exclusive` singletons. For example, if we have 4 nodes
     306             :  *    and 5 cores per node, the number of `requested exclusive` singletons on
     307             :  *    each node is (0, 1, 4, 1), and we have 3 `auto exclusive` singletons to
     308             :  *    place, the best distribution of `exclusive` singletons we can achieve
     309             :  *    given our constraints is (2, 2, 4, 1). Clearly this is not the *most*
     310             :  *    evenly distributed the `exclusive` singletons could be. However, this *is*
     311             :  *    the most evenly distributed they could be given the starting distribution
     312             :  *    from the input file.
     313             :  * 3. Allocate `auto nonexclusive` singletons, distributing the total number of
     314             :  *    `nonexclusive` singletons (`auto` + `requested`): First, as evenly as
     315             :  *    possibly over the number of nodes. Then, on each node, distributing the
     316             :  *    singletons as evenly as possibly over the number of processors on that
     317             :  *    node. The same disclaimer about "as evenly as possibly" from the previous
     318             :  *    step applies here.
     319             :  *
     320             :  * The goal of this algorithm is to mimic, as best as possible, how a human
     321             :  * would distribute this workload. It isn't perfect, but is a significant
     322             :  * improvement over placing singletons on one proc after another starting from
     323             :  * global proc 0.
     324             :  */
     325             : template <typename Metavariables>
     326           1 : struct ResourceInfo {
     327             :  private:
     328           0 :   using singletons = detail::singleton_components<Metavariables>;
     329             : 
     330             :   template <typename Component>
     331           0 :   struct LocalTag {
     332             :     // exclusive, proc
     333           0 :     using type = std::pair<bool, std::optional<size_t>>;
     334             :   };
     335           0 :   using local_tags =
     336             :       tmpl::transform<singletons, tmpl::bind<LocalTag, tmpl::_1>>;
     337             : 
     338             :  public:
     339           0 :   struct Singletons {
     340           0 :     using type = Options::Auto<SingletonPack<singletons>>;
     341           0 :     static constexpr Options::String help = {
     342             :         "Resource options for all singletons."};
     343             :   };
     344             : 
     345           0 :   struct AvoidGlobalProc0 {
     346           0 :     using type = bool;
     347           0 :     static constexpr Options::String help = {
     348             :         "Whether to avoid placing Array elements or singletons on global proc "
     349             :         "0."};
     350             :   };
     351             : 
     352           0 :   using options = tmpl::push_front<
     353             :       tmpl::conditional_t<tmpl::size<singletons>::value != 0,
     354             :                           tmpl::list<Singletons>, tmpl::list<>>,
     355             :       AvoidGlobalProc0>;
     356             : 
     357           0 :   static constexpr Options::String help = {
     358             :       "Resource options for a simulation. This information will be used when "
     359             :       "placing Array and Singleton parallel components on the requested "
     360             :       "resources."};
     361             : 
     362             :   /// The main constructor. All other constructors that take options will call
     363             :   /// this one. This constructor holds all checks able to be done during option
     364             :   /// parsing.
     365           1 :   ResourceInfo(const bool avoid_global_proc_0,
     366             :                const std::optional<SingletonPack<singletons>>& singleton_pack,
     367             :                const Options::Context& context = {});
     368             : 
     369             :   /// This constructor is used when only AvoidGlobalProc0 is specified, but no
     370             :   /// SingletonInfoHolders are specified. Calls the main constructor with an
     371             :   /// empty SingletonPack.
     372           1 :   ResourceInfo(const bool avoid_global_proc_0,
     373             :                const Options::Context& context = {});
     374             : 
     375           0 :   ResourceInfo() = default;
     376           0 :   ResourceInfo(const ResourceInfo& /*rhs*/) = default;
     377           0 :   ResourceInfo& operator=(const ResourceInfo& /*rhs*/) = default;
     378           0 :   ResourceInfo(ResourceInfo&& /*rhs*/) = default;
     379           0 :   ResourceInfo& operator=(ResourceInfo&& /*rhs*/) = default;
     380           0 :   ~ResourceInfo() = default;
     381             : 
     382             :   // NOLINTNEXTLINE(google-runtime-references)
     383           0 :   void pup(PUP::er& p);
     384             : 
     385             :   /// Returns whether we should avoid placing array elements and singletons on
     386             :   /// the global zeroth proc. Default `false`.
     387           1 :   bool avoid_global_proc_0() const { return avoid_global_proc_0_; }
     388             : 
     389             :   /// Return a SingletonInfoHolder corresponding to `Component`
     390             :   template <typename Component>
     391           1 :   auto get_singleton_info() const;
     392             : 
     393             :   /// Returns a `std::unordered_set<size_t>` of processors that array components
     394             :   /// should avoid placing elements on. This should be passed to the
     395             :   /// `allocate_array` function of the array component
     396           1 :   const std::unordered_set<size_t>& procs_to_ignore() const;
     397             : 
     398             :   /// Returns a `std::set<size_t>` that has all processors available to put
     399             :   /// elements on, meaning processors that aren't ignored.
     400           1 :   const std::set<size_t>& procs_available_for_elements() const;
     401             : 
     402             :   /// Returns the proc that the singleton `Component` should be placed on.
     403             :   template <typename Component>
     404           1 :   size_t proc_for() const;
     405             : 
     406             :   /// \brief Actually builds the singleton map and allocates all the singletons.
     407             :   ///
     408             :   /// \details This could be done in the constructor, however, since we need the
     409             :   /// number of nodes to do some sanity checks, it can't. If an executable is
     410             :   /// run with the --check-options flag, we will be running on 1 proc and 1 node
     411             :   /// so some of the checks done in this function would fail. Unfortunately,
     412             :   /// that means the checks that require knowing the number of nodes now occur
     413             :   /// at runtime instead of option parsing. This is why the
     414             :   /// `singleton_map_has_been_set_` bool is necessary and why we check if this
     415             :   /// function has been called in most other member functions.
     416             :   ///
     417             :   /// To avoid a cyclic dependency between the GlobalCache and ResourceInfo, we
     418             :   /// template this function rather than explicitly use the GlobalCache because
     419             :   /// the GlobalCache depends on ResourceInfo
     420             :   ///
     421             :   /// This function should only be called once.
     422             :   template <typename Cache>
     423           1 :   void build_singleton_map(const Cache& cache);
     424             : 
     425             :  private:
     426             :   template <typename Metavars>
     427           0 :   friend bool operator==(const ResourceInfo<Metavars>& lhs,
     428             :                          const ResourceInfo<Metavars>& rhs);
     429             : 
     430           0 :   void singleton_map_not_built() const {
     431             :     ERROR(
     432             :         "The singleton map has not been built yet. You must call "
     433             :         "build_singleton_map() before you call this function.");
     434             :   }
     435           0 :   bool avoid_global_proc_0_{false};
     436           0 :   bool singleton_map_has_been_set_{false};
     437             :   // These are quantities that we will need for placing singletons which can be
     438             :   // determined just by option parsing
     439           0 :   size_t num_exclusive_singletons_{};
     440           0 :   size_t num_procs_to_ignore_{};
     441           0 :   size_t num_requested_exclusive_singletons_{};
     442           0 :   size_t num_requested_nonexclusive_singletons_{};
     443           0 :   std::unordered_multiset<size_t> requested_nonexclusive_procs_{};
     444             :   // Procs that are exclusive. These may or may not be specifically requested
     445           0 :   std::unordered_set<size_t> procs_to_ignore_{};
     446           0 :   std::set<size_t> procs_available_for_elements_{};
     447             :   // For each singleton (whether it has a SingletonInfo or not), maps whether
     448             :   // it's exclusive and what proc it is on.
     449           0 :   tuples::tagged_tuple_from_typelist<local_tags> singleton_map_{};
     450             : };
     451             : 
     452             : template <typename Metavariables>
     453             : ResourceInfo<Metavariables>::ResourceInfo(
     454             :     const bool avoid_global_proc_0,
     455             :     const std::optional<SingletonPack<singletons>>& opt_singleton_pack,
     456             :     const Options::Context& context)
     457             :     : avoid_global_proc_0_(avoid_global_proc_0) {
     458             :   if (avoid_global_proc_0_) {
     459             :     procs_to_ignore_.insert(0);
     460             :     ++num_procs_to_ignore_;
     461             :   }
     462             : 
     463             :   if constexpr (tmpl::size<singletons>::value > 0) {
     464             :     const auto& singleton_pack =
     465             :         opt_singleton_pack.value_or(SingletonPack<singletons>{});
     466             : 
     467             :     // Procs that were specifically requested. These may or may not be exclusive
     468             :     std::unordered_multiset<int> requested_procs{};
     469             : 
     470             :     [[maybe_unused]] const auto parse_singletons = [this, &context,
     471             :                                                     &singleton_pack,
     472             :                                                     &requested_procs](
     473             :                                                        const auto component_v) {
     474             :       using component = tmpl::type_from<decltype(component_v)>;
     475             :       auto& singleton_map = tuples::get<LocalTag<component>>(singleton_map_);
     476             : 
     477             :       // This singleton has a SingletonInfoHolder associated with it. Get all
     478             :       // the info necessary from it
     479             :       if constexpr (tmpl::list_contains_v<singletons, component>) {
     480             :         const auto& info_holder = singleton_pack.template get<component>();
     481             :         // Assign proc. If a specific proc is requested, add it to a map. We'll
     482             :         // check that exclusive singletons have unique procs once we've gone
     483             :         // through everything once
     484             :         const auto proc = info_holder.proc();
     485             :         singleton_map.second = proc;
     486             : 
     487             :         if (proc.has_value()) {
     488             :           requested_procs.insert(*proc);
     489             :         }
     490             : 
     491             :         if (info_holder.is_exclusive()) {
     492             :           // Check that no singleton has requested to be on proc 0 while
     493             :           // AvoidGlobalProc0 is simultaneously true.
     494             :           if (avoid_global_proc_0_ and proc.has_value() and *proc == 0) {
     495             :             PARSE_ERROR(
     496             :                 context,
     497             :                 "A singleton has requested to be exclusively on proc 0, "
     498             :                 "but the AvoidGlobalProc0 option is also set to true.");
     499             :           }
     500             : 
     501             :           // This singleton is exclusive so set it.
     502             :           singleton_map.first = true;
     503             :           ++num_exclusive_singletons_;
     504             :           ++num_procs_to_ignore_;
     505             :           // If it requested a specific proc, ignore it when assigning the rest
     506             :           // of the singletons
     507             :           if (proc.has_value()) {
     508             :             procs_to_ignore_.insert(static_cast<size_t>(*proc));
     509             :             ++num_requested_exclusive_singletons_;
     510             :           }
     511             :         } else {
     512             :           // This singleton is not exclusive.
     513             :           singleton_map.first = false;
     514             :           if (proc.has_value()) {
     515             :             ++num_requested_nonexclusive_singletons_;
     516             :             requested_nonexclusive_procs_.insert(static_cast<size_t>(*proc));
     517             :           }
     518             :         }
     519             :       } else {
     520             :         // This singleton doesn't have a SingletonInfoHolder so it automatically
     521             :         // isn't exclusive and gets set assigned an automatic proc.
     522             :         singleton_map.first = false;
     523             :         // nullopt is a sentinel for auto
     524             :         singleton_map.second = std::nullopt;
     525             :       }
     526             :     };
     527             : 
     528             :     // Create a map between each singleton, whether it is exclusive, and which
     529             :     // proc it wants to be on. Use nullopt as a sentinel for choosing the proc
     530             :     // automatically.
     531             :     tmpl::for_each<singletons>(parse_singletons);
     532             :     [[maybe_unused]] const auto sanity_checks = [this, &context,
     533             :                                                  &requested_procs](
     534             :                                                     const auto component_v) {
     535             :       using component = tmpl::type_from<decltype(component_v)>;
     536             :       auto& singleton_map = tuples::get<LocalTag<component>>(singleton_map_);
     537             : 
     538             :       const bool exclusive = singleton_map.first;
     539             :       const auto proc = singleton_map.second;
     540             : 
     541             :       // Check exclusive singletons that requested to be on a specific proc
     542             :       // if any other singletons requested to be on the same proc (exclusive
     543             :       // or not)
     544             :       if (exclusive and proc.has_value() and requested_procs.count(*proc) > 1) {
     545             :         PARSE_ERROR(context,
     546             :                     "Two singletons have requested to be on proc "
     547             :                         << proc.value()
     548             :                         << ", but at least one of them has requested to be "
     549             :                            "exclusively on this proc.");
     550             :       }
     551             :     };
     552             : 
     553             :     // Do some inter-singleton sanity checks to avoid inconsistencies
     554             :     tmpl::for_each<singletons>(sanity_checks);
     555             :   }
     556             : }
     557             : 
     558             : template <typename Metavariables>
     559             : ResourceInfo<Metavariables>::ResourceInfo(const bool avoid_global_proc_0,
     560             :                                           const Options::Context& context)
     561             :     : ResourceInfo(avoid_global_proc_0, std::nullopt, context) {}
     562             : 
     563             : template <typename Metavariables>
     564             : void ResourceInfo<Metavariables>::pup(PUP::er& p) {
     565             :   p | avoid_global_proc_0_;
     566             :   p | singleton_map_has_been_set_;
     567             :   p | num_exclusive_singletons_;
     568             :   p | num_procs_to_ignore_;
     569             :   p | num_requested_exclusive_singletons_;
     570             :   p | num_requested_nonexclusive_singletons_;
     571             :   p | requested_nonexclusive_procs_;
     572             :   p | procs_to_ignore_;
     573             :   p | procs_available_for_elements_;
     574             :   p | singleton_map_;
     575             : }
     576             : 
     577             : template <typename Metavariables>
     578             : template <typename Component>
     579             : auto ResourceInfo<Metavariables>::get_singleton_info() const {
     580             :   if (not singleton_map_has_been_set_) {
     581             :     singleton_map_not_built();
     582             :   }
     583             : 
     584             :   const auto& singleton_map = tuples::get<LocalTag<Component>>(singleton_map_);
     585             :   return SingletonInfoHolder<Component>{
     586             :       {static_cast<int>(*singleton_map.second)}, singleton_map.first};
     587             : }
     588             : 
     589             : template <typename Metavariables>
     590             : const std::unordered_set<size_t>& ResourceInfo<Metavariables>::procs_to_ignore()
     591             :     const {
     592             :   if (not singleton_map_has_been_set_) {
     593             :     singleton_map_not_built();
     594             :   }
     595             :   return procs_to_ignore_;
     596             : }
     597             : 
     598             : template <typename Metavariables>
     599             : const std::set<size_t>&
     600             : ResourceInfo<Metavariables>::procs_available_for_elements() const {
     601             :   if (not singleton_map_has_been_set_) {
     602             :     singleton_map_not_built();
     603             :   }
     604             :   return procs_available_for_elements_;
     605             : }
     606             : 
     607             : template <typename Metavariables>
     608             : template <typename Component>
     609             : size_t ResourceInfo<Metavariables>::proc_for() const {
     610             :   if (not singleton_map_has_been_set_) {
     611             :     singleton_map_not_built();
     612             :   }
     613             :   return *tuples::get<LocalTag<Component>>(singleton_map_).second;
     614             : }
     615             : 
     616             : template <typename Metavars>
     617           0 : bool operator==(const ResourceInfo<Metavars>& lhs,
     618             :                 const ResourceInfo<Metavars>& rhs) {
     619             :   return lhs.avoid_global_proc_0_ == rhs.avoid_global_proc_0_ and
     620             :          lhs.singleton_map_has_been_set_ == rhs.singleton_map_has_been_set_ and
     621             :          lhs.num_exclusive_singletons_ == rhs.num_exclusive_singletons_ and
     622             :          lhs.num_procs_to_ignore_ == rhs.num_procs_to_ignore_ and
     623             :          lhs.num_requested_exclusive_singletons_ ==
     624             :              rhs.num_requested_exclusive_singletons_ and
     625             :          lhs.num_requested_nonexclusive_singletons_ ==
     626             :              rhs.num_requested_nonexclusive_singletons_ and
     627             :          lhs.requested_nonexclusive_procs_ ==
     628             :              rhs.requested_nonexclusive_procs_ and
     629             :          lhs.procs_to_ignore_ == rhs.procs_to_ignore_ and
     630             :          lhs.procs_available_for_elements_ ==
     631             :              rhs.procs_available_for_elements_ and
     632             :          lhs.singleton_map_ == rhs.singleton_map_;
     633             : }
     634             : 
     635             : template <typename Metavars>
     636           0 : bool operator!=(const ResourceInfo<Metavars>& lhs,
     637             :                 const ResourceInfo<Metavars>& rhs) {
     638             :   return not(lhs == rhs);
     639             : }
     640             : 
     641             : template <typename Metavariables>
     642             : template <typename Cache>
     643             : void ResourceInfo<Metavariables>::build_singleton_map(const Cache& cache) {
     644             :   const size_t num_procs = Parallel::number_of_procs<size_t>(cache);
     645             :   const size_t num_nodes = Parallel::number_of_nodes<size_t>(cache);
     646             : 
     647             :   // We don't do procs_to_ignore_.size() here because the auto singletons who
     648             :   // requested to be exclusive haven't been assigned yet so their procs haven't
     649             :   // been added to procs_to_ignore_
     650             :   if (num_procs_to_ignore_ >= num_procs) {
     651             :     ERROR(
     652             :         "The total number of cores requested is less than or equal to the "
     653             :         "number of cores that requested to be exclusive, i.e. without "
     654             :         "array elements or multiple singletons. The array elements have "
     655             :         "nowhere to be placed. Number of cores requested: "
     656             :         << num_procs << ". Number of cores that requested to be exclusive: "
     657             :         << num_procs_to_ignore_ << ".");
     658             :   }
     659             : 
     660             :   // Check if any singletons that requested to be on specific proc requested to
     661             :   // be on a proc beyond the last proc.
     662             :   tmpl::for_each<singletons>([this, &num_procs](const auto component_v) {
     663             :     using component = tmpl::type_from<decltype(component_v)>;
     664             :     auto& singleton_map = tuples::get<LocalTag<component>>(singleton_map_);
     665             :     const auto proc = singleton_map.second;
     666             : 
     667             :     if (proc.has_value() and *proc > num_procs - 1) {
     668             :       ERROR("Singleton " << pretty_type::name<component>()
     669             :                          << " requested to be placed on proc " << *proc
     670             :                          << ", but that proc is beyond the last proc "
     671             :                          << num_procs - 1 << ".");
     672             :     }
     673             :   });
     674             : 
     675             :   // At this point, all requested singletons have been allocated on their
     676             :   // desired procs. This leaves just the auto singletons left, both exclusive
     677             :   // and non-exclusive.
     678             : 
     679             :   // First allocate auto exclusive singletons
     680             :   // This first vector will keep track of the total number of singletons on each
     681             :   // node so we can spread them out evenly
     682             :   std::vector<size_t> singletons_on_each_node(num_nodes, 0_st);
     683             :   // This second vector keeps track of only the auto exclusive singletons on
     684             :   // each node
     685             :   std::vector<size_t> auto_exclusive_singletons_on_each_node(num_nodes, 0_st);
     686             :   // Populate requested exclusive singletons on each node with input options. We
     687             :   // couldn't have done this in the constructor because we didn't know how many
     688             :   // nodes there were or how many procs were on each node. We'll do the
     689             :   // non-exclusive ones later.
     690             :   tmpl::for_each<singletons>(
     691             :       [this, &cache, &singletons_on_each_node](const auto component_v) {
     692             :         using component = tmpl::type_from<decltype(component_v)>;
     693             :         auto& singleton_map = tuples::get<LocalTag<component>>(singleton_map_);
     694             :         const bool exclusive = singleton_map.first;
     695             :         const auto proc = singleton_map.second;
     696             : 
     697             :         if (exclusive and proc.has_value()) {
     698             :           ++singletons_on_each_node[Parallel::node_of<size_t>(*proc, cache)];
     699             :         }
     700             :       });
     701             : 
     702             :   size_t remaining_auto_exclusive_singletons =
     703             :       num_exclusive_singletons_ - num_requested_exclusive_singletons_;
     704             :   // Start with the min number of singletons on a node as our baseline. Then,
     705             :   // while we still have auto exclusive singletons to place, we loop over all
     706             :   // nodes and place singletons on nodes with this minimum number. Once all
     707             :   // nodes have at least this minimum number, we increment the minimum number
     708             :   // and loop over the nodes again
     709             :   size_t min_num_singletons_on_a_node = *std::min_element(
     710             :       singletons_on_each_node.begin(), singletons_on_each_node.end());
     711             :   while (remaining_auto_exclusive_singletons > 0) {
     712             :     for (size_t i = 0; i < num_nodes; i++) {
     713             :       // If this node has more than the minimum number of singletons on it, skip
     714             :       // it for now
     715             :       if (singletons_on_each_node[i] > min_num_singletons_on_a_node) {
     716             :         continue;
     717             :       }
     718             :       // Since nodes can have different number of procs, we check that we
     719             :       // haven't exhausted the number of procs on this node. This check is ok
     720             :       // right now because we haven't included any nonexclusive singletons in
     721             :       // singletons_on_each_node yet.
     722             :       if (not(singletons_on_each_node[i] <
     723             :               Parallel::procs_on_node<size_t>(i, cache))) {
     724             :         continue;
     725             :       }
     726             : 
     727             :       ++singletons_on_each_node[i];
     728             :       ++auto_exclusive_singletons_on_each_node[i];
     729             :       --remaining_auto_exclusive_singletons;
     730             : 
     731             :       // We need to break out of both loops here. Use a goto.
     732             :       if (remaining_auto_exclusive_singletons == 0) {
     733             :         goto break_auto_exclusive_loops;
     734             :       }
     735             :     }  // for (size_t i = 0; i < num_nodes; i++)
     736             : 
     737             :     ++min_num_singletons_on_a_node;
     738             :   }  // while (remaining_auto_exclusive_singletons > 0)
     739             : break_auto_exclusive_loops:
     740             : 
     741             :   ASSERT(remaining_auto_exclusive_singletons == 0,
     742             :          "Not all exclusive singletons have been allocated. The remaining "
     743             :          "number of singletons to be allocated is "
     744             :              << remaining_auto_exclusive_singletons << ".");
     745             : 
     746             :   // Actually allocate the auto exclusive singletons
     747             :   size_t current_node = 0;
     748             :   tmpl::for_each<singletons>([this, &cache, &current_node,
     749             :                               &auto_exclusive_singletons_on_each_node](
     750             :                                  const auto component_v) {
     751             :     using component = tmpl::type_from<decltype(component_v)>;
     752             :     auto& singleton_map = tuples::get<LocalTag<component>>(singleton_map_);
     753             :     const bool exclusive = singleton_map.first;
     754             :     const auto int_proc = singleton_map.second;
     755             : 
     756             :     // Only allocating auto exclusive at the moment
     757             :     if (exclusive and not int_proc.has_value()) {
     758             :       while (auto_exclusive_singletons_on_each_node[current_node] == 0) {
     759             :         ++current_node;
     760             :       }
     761             : 
     762             :       size_t proc = Parallel::first_proc_on_node<size_t>(current_node, cache);
     763             :       // Don't place two exclusive singletons on the same proc, but also if
     764             :       // a singleton requested a specific proc, whether or not it is
     765             :       // exclusive, we can't place an exclusive singleton on that proc. That
     766             :       // defeats the whole purpose of requesting the specific proc...
     767             :       while (procs_to_ignore_.find(proc) != procs_to_ignore_.end() or
     768             :              requested_nonexclusive_procs_.count(proc) > 0) {
     769             :         ++proc;
     770             :       }
     771             : 
     772             :       singleton_map.second = proc;
     773             :       procs_to_ignore_.insert(proc);
     774             : 
     775             :       --auto_exclusive_singletons_on_each_node[current_node];
     776             :     }
     777             :   });
     778             : 
     779             :   ASSERT(alg::accumulate(auto_exclusive_singletons_on_each_node, 0_st) == 0,
     780             :          "Not all auto exclusive singletons have been allocated. The remaining "
     781             :          "number of auto exclusive singletons to be allocated is "
     782             :              << alg::accumulate(auto_exclusive_singletons_on_each_node, 0_st));
     783             : 
     784             :   // procs_to_ignore_ is now complete. Now construct
     785             :   // procs_available_for_elements_
     786             :   for (size_t i = 0; i < num_procs; i++) {
     787             :     if (procs_to_ignore_.find(i) == procs_to_ignore_.end()) {
     788             :       procs_available_for_elements_.insert(i);
     789             :     }
     790             :   }
     791             : 
     792             :   // At this point, all auto exclusive singletons have been allocated. Now the
     793             :   // only singletons left are auto non-exclusive. We use vectors of
     794             :   // std::optional<size_t> here as sentinels for procs which should be avoided.
     795             :   // A nullopt means that the proc shouldn't have auto nonexclusive singletons
     796             :   // on it. When we have lots of cores to run on (hundreds of thousands or even
     797             :   // millions), these vectors will take up a non-negligible amount of memory.
     798             :   // However, we only need to do this once an executable at the very beginning
     799             :   // so it shouldn't really matter
     800             :   std::vector<std::optional<size_t>> nonexclusive_singletons_on_each_proc(
     801             :       num_procs, std::optional<size_t>(0_st));
     802             :   // This vector has the default be nullopt rather than 0, because this will
     803             :   // only be used when we actually place singletons. We only care which procs
     804             :   // have singletons, which will usually be a small subset of the total procs.
     805             :   std::vector<std::optional<size_t>> auto_nonexclusive_singletons_on_each_proc(
     806             :       num_procs, std::nullopt);
     807             :   for (const size_t proc : procs_to_ignore_) {
     808             :     nonexclusive_singletons_on_each_proc[proc] = std::nullopt;
     809             :   }
     810             :   // Now we add in the requested nonexclusive to the total number of singletons
     811             :   // per node
     812             :   for (const auto& proc : requested_nonexclusive_procs_) {
     813             :     ++*nonexclusive_singletons_on_each_proc[proc];
     814             :     ++singletons_on_each_node[Parallel::node_of<size_t>(proc, cache)];
     815             :   }
     816             : 
     817             :   size_t remaining_auto_nonexclusive_singletons =
     818             :       tmpl::size<singletons>::value - num_exclusive_singletons_ -
     819             :       num_requested_nonexclusive_singletons_;
     820             : 
     821             :   // This serves the same purpose as before
     822             :   min_num_singletons_on_a_node = *std::min_element(
     823             :       singletons_on_each_node.begin(), singletons_on_each_node.end());
     824             :   while (remaining_auto_nonexclusive_singletons > 0) {
     825             :     for (size_t i = 0; i < num_nodes; i++) {
     826             :       const int first_proc = Parallel::first_proc_on_node<int>(i, cache);
     827             :       const int procs_on_node = Parallel::procs_on_node<int>(i, cache);
     828             :       const int first_proc_next_node = first_proc + procs_on_node;
     829             : 
     830             :       auto first_proc_iter =
     831             :           std::next(nonexclusive_singletons_on_each_proc.begin(), first_proc);
     832             :       auto first_proc_next_node_iter = std::next(
     833             :           nonexclusive_singletons_on_each_proc.begin(), first_proc_next_node);
     834             : 
     835             :       // Get the proc on this node with the minimum number of singletons. This
     836             :       // serves the same purpose as min_num_singletons_on_a_node except now for
     837             :       // procs on a specific node
     838             :       auto& min_num_singletons_on_a_proc_opt =
     839             :           *std::min_element(first_proc_iter, first_proc_next_node_iter,
     840             :                             [](const auto& a, const auto& b) {
     841             :                               if (a.has_value() and b.has_value()) {
     842             :                                 return a.value() < b.value();
     843             :                               } else {
     844             :                                 return a.has_value();
     845             :                               }
     846             :                             });
     847             : 
     848             :       // Check if this node can accommodate more singletons. Do two checks:
     849             :       // 1. This node doesn't have more than the minimum number of singletons
     850             :       // 2. That this node isn't filled up with exclusive singletons (nullopt =
     851             :       //    all procs on this node are taken)
     852             :       if (singletons_on_each_node[i] > min_num_singletons_on_a_node or
     853             :           not min_num_singletons_on_a_proc_opt.has_value()) {
     854             :         continue;
     855             :       }
     856             : 
     857             :       // At this point, we have guaranteed that this node should have an auto
     858             :       // nonexclusive singleton on it somewhere. Now determine where
     859             :       size_t min_num_singletons_on_a_proc =
     860             :           min_num_singletons_on_a_proc_opt.value();
     861             : 
     862             :       // Find the first available proc on this node. Check that
     863             :       // 1. This proc is available (i.e. no exclusive singletons on it)
     864             :       // 2. This proc has the minimum number of singletons on it for this node
     865             :       //    so we distribute the singletons evenly over all the procs on this
     866             :       //    node
     867             :       auto proc_iter =
     868             :           std::find_if(first_proc_iter, first_proc_next_node_iter,
     869             :                        [&min_num_singletons_on_a_proc](const auto& proc_opt) {
     870             :                          return proc_opt.has_value() and
     871             :                                 *proc_opt == min_num_singletons_on_a_proc;
     872             :                        });
     873             : 
     874             :       // Get the index of the overall vector. We need this because we're going
     875             :       // to be indexing two separate vectors, otherwise we could have just used
     876             :       // the value of the iterator
     877             :       const size_t proc = static_cast<size_t>(std::distance(
     878             :           nonexclusive_singletons_on_each_proc.begin(), proc_iter));
     879             : 
     880             :       // Increment things
     881             :       ++*nonexclusive_singletons_on_each_proc[proc];
     882             :       ++singletons_on_each_node[i];
     883             :       if (auto_nonexclusive_singletons_on_each_proc[proc].has_value()) {
     884             :         ++*auto_nonexclusive_singletons_on_each_proc[proc];
     885             :       } else {
     886             :         auto_nonexclusive_singletons_on_each_proc[proc] = 1;
     887             :       }
     888             :       --remaining_auto_nonexclusive_singletons;
     889             : 
     890             :       // We need to break out of both loops here. Use a goto.
     891             :       if (remaining_auto_nonexclusive_singletons == 0) {
     892             :         goto break_auto_nonexclusive_loops;
     893             :       }
     894             :     }  // for (size_t i = 0; i < num_nodes; i++)
     895             : 
     896             :     ++min_num_singletons_on_a_node;
     897             :   }  // while (remaining_auto_nonexclusive_singletons > 0)
     898             : break_auto_nonexclusive_loops:
     899             : 
     900             :   ASSERT(remaining_auto_nonexclusive_singletons == 0,
     901             :          "Not all nonexclusive singletons have been allocated. The remaining "
     902             :          "number of singletons to be allocated is "
     903             :              << remaining_auto_nonexclusive_singletons << ".");
     904             : 
     905             :   // Actually allocate the auto nonexclusive singletons
     906             :   std::stringstream ss;
     907             :   ss << "\nAllocating Singletons:\n";
     908             :   size_t current_proc = 0;
     909             :   tmpl::for_each<singletons>([this, &current_proc, &cache, &ss,
     910             :                               &auto_nonexclusive_singletons_on_each_proc](
     911             :                                  const auto component_v) {
     912             :     using component = tmpl::type_from<decltype(component_v)>;
     913             :     auto& singleton_map = tuples::get<LocalTag<component>>(singleton_map_);
     914             :     const auto proc_opt = singleton_map.second;
     915             : 
     916             :     // At this point, the only singletons that have this are nonexclusive
     917             :     if (not proc_opt.has_value()) {
     918             :       while (not auto_nonexclusive_singletons_on_each_proc[current_proc]
     919             :                      .has_value()) {
     920             :         ++current_proc;
     921             :       }
     922             : 
     923             :       singleton_map.second = current_proc;
     924             : 
     925             :       --*auto_nonexclusive_singletons_on_each_proc[current_proc];
     926             :       // Indicate that there are no more singletons to be placed on this proc
     927             :       if (*auto_nonexclusive_singletons_on_each_proc[current_proc] == 0) {
     928             :         auto_nonexclusive_singletons_on_each_proc[current_proc] = std::nullopt;
     929             :       }
     930             :     }
     931             : 
     932             :     // Print some diagnostic info to stdout for each singleton. This can aid in
     933             :     // debugging.
     934             :     ss << pretty_type::name<component>();
     935             :     ss << " on node " << Parallel::node_of<int>(*singleton_map.second, cache);
     936             :     ss << ", global proc " << *singleton_map.second;
     937             :     ss << ", exclusive = " << std::boolalpha << singleton_map.first << "\n";
     938             :   });
     939             : 
     940             :   ss << "\n";
     941             :   Parallel::printf("%s", ss.str());
     942             : 
     943             :   // Now that everything has been set, signal that we don't have to do
     944             :   // this again.
     945             :   singleton_map_has_been_set_ = true;
     946             : }
     947             : }  // namespace Parallel

Generated by: LCOV version 1.14