SpECTRE Documentation Coverage Report
Current view: top level - Domain - ElementDistribution.hpp Hit Total Coverage
Commit: 361cb8d8406bb752684a5f31c27320ec444a50e3 Lines: 5 12 41.7 %
Date: 2025-11-09 02:02:04
Legend: Lines: hit not hit

          Line data    Source code
       1           0 : // Distributed under the MIT License.
       2             : // See LICENSE.txt for details.
       3             : 
       4             : #pragma once
       5             : 
       6             : #include <array>
       7             : #include <cstddef>
       8             : #include <optional>
       9             : #include <string>
      10             : #include <unordered_map>
      11             : #include <unordered_set>
      12             : #include <utility>
      13             : #include <vector>
      14             : 
      15             : #include "Options/Options.hpp"
      16             : #include "Options/ParseError.hpp"
      17             : #include "Utilities/TypeTraits/CreateGetStaticMemberVariableOrDefault.hpp"
      18             : 
      19             : /// \cond
      20             : template <size_t Dim>
      21             : class Block;
      22             : 
      23             : template <size_t Dim>
      24             : class ElementId;
      25             : 
      26             : namespace Spectral {
      27             : enum class Basis : uint8_t;
      28             : enum class Quadrature : uint8_t;
      29             : }  // namespace Spectral
      30             : /// \endcond
      31             : 
      32             : namespace domain {
      33             : /// The weighting scheme for assigning computational costs to `Element`s for
      34             : /// distributing balanced compuational costs per processor (see
      35             : /// `BlockZCurveProcDistribution`)
      36           1 : enum class ElementWeight {
      37             :   /// A weighting scheme where each `Element` is assigned the same computational
      38             :   /// cost
      39             :   Uniform,
      40             :   /// A weighting scheme where each `Element`'s computational cost is equal to
      41             :   /// the number of grid points in that `Element`
      42             :   NumGridPoints,
      43             :   /// A weighting scheme where each `Element`'s computational cost is weighted
      44             :   /// by both the number of grid points and minimum spacing between grid points
      45             :   /// in that `Element` (see `get_num_points_and_grid_spacing_cost()` for
      46             :   /// details)
      47             :   NumGridPointsAndGridSpacing
      48             : };
      49             : 
      50           0 : std::ostream& operator<<(std::ostream& os, ElementWeight weight);
      51             : 
      52             : /// \brief Get the cost of each `Element` in a list of `Block`s where
      53             : /// `element_weight` specifies which weight distribution scheme to use
      54             : ///
      55             : /// \details It is only necessary to pass in a value for `i1_basis` and
      56             : /// `i1_quadrature` if the value for `element_weight` is
      57             : /// `ElementWeight::NumGridPointsAndGridSpacing`. Otherwise, the argument isn't
      58             : /// needed and will have no effect if it does have a value.
      59             : template <size_t Dim>
      60           1 : std::unordered_map<ElementId<Dim>, double> get_element_costs(
      61             :     const std::vector<Block<Dim>>& blocks,
      62             :     const std::vector<std::array<size_t, Dim>>& initial_refinement_levels,
      63             :     const std::vector<std::array<size_t, Dim>>& initial_extents,
      64             :     ElementWeight element_weight,
      65             :     const std::optional<Spectral::Basis>& i1_basis,
      66             :     const std::optional<Spectral::Quadrature>& i1_quadrature);
      67             : 
      68             : /*!
      69             :  * \brief Distribution strategy for assigning elements to CPUs using a
      70             :  * Morton ('Z-order') space-filling curve to determine placement within each
      71             :  * block, where `Element`s are distributed across CPUs
      72             :  *
      73             :  * \details The element distribution attempts to assign a balanced total
      74             :  * computational cost to each processor that is allowed to have `Element`s.
      75             :  * First, each `Block`'s `Element`s are ordered by their Z-curve index (see more
      76             :  * below). `Element`s are traversed in this order and assigned to CPUs in order,
      77             :  * moving onto the next CPU once the target cost per CPU is met. The target cost
      78             :  * per CPU is defined as the remaining cost to distribute divided by the
      79             :  * remaining number of CPUs to distribute to. This is an important distinction
      80             :  * from simply having one constant target cost per CPU defined as the total cost
      81             :  * divided by the total number of CPUs with elements. Since the total cost of
      82             :  * `Element`s on a processor will nearly never add up to be exactly the average
      83             :  * cost per CPU, this means that we would either have to decide to overshoot or
      84             :  * undershoot the average as we iterate over the CPUs and assign `Element`s. If
      85             :  * we overshoot the average on each processor, the final processor could have a
      86             :  * much lower cost than the rest of the processors and we run the risk of
      87             :  * overshooting so much that one or more of the requested processors don't get
      88             :  * assigned any `Element`s at all. If we undershoot the average on each
      89             :  * processor, the final processor could have a much higher cost than the others
      90             :  * due to remainder cost piling up. This algorithm avoids these risks by instead
      91             :  * adjusting the target cost per CPU as we finish assigning cost to previous
      92             :  * CPUs.
      93             :  *
      94             :  * Morton curves are a simple and easily-computed space-filling curve that
      95             :  * (unlike Hilbert curves) permit diagonal traversal. See, for instance,
      96             :  * \cite Borrell2018 for a discussion of mesh partitioning using space-filling
      97             :  * curves.
      98             :  * A concrete example of the use of a Morton curve in 2d is given below.
      99             :  *
     100             :  * A sketch of a 2D block with 4x2 elements, with each element labeled according
     101             :  * to the order on the Morton curve:
     102             :  * ```
     103             :  *          x-->
     104             :  *          0   1   2   3
     105             :  *        ----------------
     106             :  *  y  0 |  0   2   4   6
     107             :  *  |    |  | / | / | / |
     108             :  *  v  1 |  1   3   5   7
     109             :  * ```
     110             :  * (forming a zig-zag path, that under some rotation/reflection has a 'Z'
     111             :  * shape).
     112             :  *
     113             :  * The Morton curve method is a quick way of getting acceptable spatial locality
     114             :  * -- usually, for approximately even distributions, it will ensure that
     115             :  * elements are assigned in large volume chunks, and the structure of the Morton
     116             :  * curve ensures that for a given processor and block, the elements will be
     117             :  * assigned in no more than two orthogonally connected clusters. In principle, a
     118             :  * Hilbert curve could potentially improve upon the gains obtained by this class
     119             :  * by guaranteeing that all elements within each block form a single
     120             :  * orthogonally connected cluster.
     121             :  *
     122             :  * The assignment of portions of blocks to processors may use partial blocks,
     123             :  * and/or multiple blocks to ensure an even distribution of elements to
     124             :  * processors.
     125             :  * We currently make no distinction between dividing elements between processors
     126             :  * within a node and dividing elements between processors across nodes. The
     127             :  * current technique aims to have a simple method of reducing communication
     128             :  * globally, though it would likely be more efficient to prioritize minimization
     129             :  * of inter-node communication, because communication across interconnects is
     130             :  * the primary cost of communication in charm++ runs.
     131             :  *
     132             :  * \warning The use of the Morton curve to generate a well-clustered element
     133             :  * distribution currently assumes that the refinement is uniform over each
     134             :  * block, with no internal structure that would be generated by, for instance
     135             :  * AMR.
     136             :  * This distribution method will need alteration to perform well for blocks with
     137             :  * internal structure from h-refinement. Morton curves can be defined
     138             :  * recursively, so a generalization of the present method is possible for blocks
     139             :  * with internal refinement
     140             :  *
     141             :  * \tparam Dim the number of spatial dimensions of the `Block`s
     142             :  */
     143             : template <size_t Dim>
     144           1 : struct BlockZCurveProcDistribution {
     145           0 :   BlockZCurveProcDistribution() = default;
     146             : 
     147             :   /// The `number_of_procs_with_elements` argument represents how many procs
     148             :   /// will have elements. This is not necessarily equal to the total number of
     149             :   /// procs because some global procs may be ignored by the sixth argument
     150             :   /// `global_procs_to_ignore`.
     151           1 :   BlockZCurveProcDistribution(
     152             :       const std::unordered_map<ElementId<Dim>, double>& element_costs,
     153             :       size_t number_of_procs_with_elements,
     154             :       const std::vector<Block<Dim>>& blocks,
     155             :       const std::vector<std::array<size_t, Dim>>& initial_refinement_levels,
     156             :       const std::vector<std::array<size_t, Dim>>& initial_extents,
     157             :       const std::unordered_set<size_t>& global_procs_to_ignore = {});
     158             : 
     159             :   /// Gets the suggested processor number for a particular `ElementId`,
     160             :   /// determined by the Morton curve weighted element assignment described in
     161             :   /// detail in the parent class documentation.
     162           1 :   size_t get_proc_for_element(const ElementId<Dim>& element_id) const;
     163             : 
     164             :   const std::vector<std::vector<std::pair<size_t, size_t>>>&
     165           0 :   block_element_distribution() const {
     166             :     return block_element_distribution_;
     167             :   }
     168             : 
     169             :  private:
     170             :   // in this nested data structure:
     171             :   // - The block id is the first index
     172             :   // - There is an arbitrary number of CPUs per block, each with an element
     173             :   //   allowance
     174             :   // - Each element allowance is represented by a pair of proc number, number of
     175             :   //   elements in the allowance
     176             :   std::vector<std::vector<std::pair<size_t, size_t>>>
     177           0 :       block_element_distribution_;
     178             : };
     179             : }  // namespace domain
     180             : 
     181             : namespace element_weight_detail {
     182             : CREATE_GET_STATIC_MEMBER_VARIABLE_OR_DEFAULT(local_time_stepping)
     183             : }  // namespace element_weight_detail
     184             : 
     185             : template <>
     186           0 : struct Options::create_from_yaml<domain::ElementWeight> {
     187             :   template <typename Metavariables>
     188           0 :   static domain::ElementWeight create(const Options::Option& options) {
     189             :     const auto ordering = options.parse_as<std::string>();
     190             :     if (ordering == "Uniform") {
     191             :       return domain::ElementWeight::Uniform;
     192             :     } else if (ordering == "NumGridPoints") {
     193             :       return domain::ElementWeight::NumGridPoints;
     194             :     } else if (ordering == "NumGridPointsAndGridSpacing") {
     195             :       if constexpr (not element_weight_detail::
     196             :                         get_local_time_stepping_or_default_v<Metavariables,
     197             :                                                              false>) {
     198             :         PARSE_ERROR(
     199             :             options.context(),
     200             :             "When not using local time stepping, you cannot use "
     201             :             "NumGridPointsAndGridSpacing for the element distribution. Please "
     202             :             "choose another element distribution.");
     203             :       }
     204             :       return domain::ElementWeight::NumGridPointsAndGridSpacing;
     205             :     }
     206             :     PARSE_ERROR(options.context(),
     207             :                 "ElementWeight must be 'Uniform', 'NumGridPoints', or, "
     208             :                 "'NumGridPointsAndGridSpacing'");
     209             :   }
     210             : };

Generated by: LCOV version 1.14