Line data Source code
1 0 : // Distributed under the MIT License. 2 : // See LICENSE.txt for details. 3 : 4 : #pragma once 5 : 6 : #include <cstddef> 7 : #include <functional> 8 : #include <unordered_map> 9 : #include <unordered_set> 10 : #include <vector> 11 : 12 : #include "Domain/Block.hpp" 13 : #include "Domain/Creators/DomainCreator.hpp" 14 : #include "Domain/Creators/OptionTags.hpp" 15 : #include "Domain/Creators/Tags/Domain.hpp" 16 : #include "Domain/Creators/Tags/InitialExtents.hpp" 17 : #include "Domain/Creators/Tags/InitialRefinementLevels.hpp" 18 : #include "Domain/Domain.hpp" 19 : #include "Domain/ElementDistribution.hpp" 20 : #include "Domain/Structure/ElementId.hpp" 21 : #include "Domain/Structure/InitialElementIds.hpp" 22 : #include "Domain/Tags/ElementDistribution.hpp" 23 : #include "Evolution/DiscontinuousGalerkin/Initialization/QuadratureTag.hpp" 24 : #include "Parallel/Algorithms/AlgorithmArray.hpp" 25 : #include "Parallel/DomainDiagnosticInfo.hpp" 26 : #include "Parallel/GlobalCache.hpp" 27 : #include "Parallel/Info.hpp" 28 : #include "Parallel/Local.hpp" 29 : #include "Parallel/ParallelComponentHelpers.hpp" 30 : #include "Parallel/Phase.hpp" 31 : #include "Parallel/Printf/Printf.hpp" 32 : #include "Parallel/Tags/Parallelization.hpp" 33 : #include "Utilities/Literals.hpp" 34 : #include "Utilities/Numeric.hpp" 35 : #include "Utilities/System/ParallelInfo.hpp" 36 : #include "Utilities/TMPL.hpp" 37 : #include "Utilities/TypeTraits/CreateHasStaticMemberVariable.hpp" 38 : 39 : namespace detail { 40 : CREATE_HAS_STATIC_MEMBER_VARIABLE(use_z_order_distribution) 41 : CREATE_HAS_STATIC_MEMBER_VARIABLE_V(use_z_order_distribution) 42 : CREATE_HAS_STATIC_MEMBER_VARIABLE(local_time_stepping) 43 : CREATE_HAS_STATIC_MEMBER_VARIABLE_V(local_time_stepping) 44 : } // namespace detail 45 : 46 : /*! 47 : * \brief The parallel component responsible for managing the DG elements that 48 : * compose the computational domain 49 : * 50 : * This parallel component will perform the actions specified by the 51 : * `PhaseDepActionList`. 52 : * 53 : * The element assignment to processors is performed by 54 : * `domain::BlockZCurveProcDistribution` (using a Morton space-filling curve), 55 : * unless `static constexpr bool use_z_order_distribution = false;` is specified 56 : * in the `Metavariables`, in which case elements are assigned to processors via 57 : * round-robin assignment. In both cases, an unordered set of `size_t`s can be 58 : * passed to the `allocate_array` function which represents physical processors 59 : * to avoid placing elements on. If the space-filling curve is used, then if 60 : * `static constexpr bool local_time_stepping = true;` is specified 61 : * in the `Metavariables`, `Element`s will be distributed according to their 62 : * computational costs determined by the number of grid points and minimum grid 63 : * spacing of that `Element` (see 64 : * `domain::get_num_points_and_grid_spacing_cost()`), else the computational 65 : * cost is determined only by the number of grid points in the `Element`. 66 : */ 67 : template <class Metavariables, class PhaseDepActionList> 68 1 : struct DgElementArray { 69 0 : static constexpr size_t volume_dim = Metavariables::volume_dim; 70 : 71 0 : using chare_type = Parallel::Algorithms::Array; 72 0 : using metavariables = Metavariables; 73 0 : using phase_dependent_action_list = PhaseDepActionList; 74 0 : using array_index = ElementId<volume_dim>; 75 : 76 0 : using const_global_cache_tags = tmpl::list<domain::Tags::Domain<volume_dim>, 77 : domain::Tags::ElementDistribution>; 78 : 79 0 : using simple_tags_from_options = Parallel::get_simple_tags_from_options< 80 : Parallel::get_initialization_actions_list<phase_dependent_action_list>>; 81 : 82 0 : using array_allocation_tags = tmpl::list<>; 83 : 84 0 : static void allocate_array( 85 : Parallel::CProxy_GlobalCache<Metavariables>& global_cache, 86 : const tuples::tagged_tuple_from_typelist<simple_tags_from_options>& 87 : initialization_items, 88 : const tuples::tagged_tuple_from_typelist<array_allocation_tags>& 89 : array_allocation_items = {}, 90 : const std::unordered_set<size_t>& procs_to_ignore = {}); 91 : 92 0 : static void execute_next_phase( 93 : const Parallel::Phase next_phase, 94 : Parallel::CProxy_GlobalCache<Metavariables>& global_cache) { 95 : auto& local_cache = *Parallel::local_branch(global_cache); 96 : Parallel::get_parallel_component<DgElementArray>(local_cache) 97 : .start_phase(next_phase); 98 : } 99 : }; 100 : 101 : template <class Metavariables, class PhaseDepActionList> 102 : void DgElementArray<Metavariables, PhaseDepActionList>::allocate_array( 103 : Parallel::CProxy_GlobalCache<Metavariables>& global_cache, 104 : const tuples::tagged_tuple_from_typelist<simple_tags_from_options>& 105 : initialization_items, 106 : const tuples::tagged_tuple_from_typelist<array_allocation_tags>& 107 : /*array_allocation_items*/, 108 : const std::unordered_set<size_t>& procs_to_ignore) { 109 : auto& local_cache = *Parallel::local_branch(global_cache); 110 : auto& dg_element_array = 111 : Parallel::get_parallel_component<DgElementArray>(local_cache); 112 : 113 : const auto& domain = 114 : Parallel::get<domain::Tags::Domain<volume_dim>>(local_cache); 115 : const auto& initial_refinement_levels = 116 : get<domain::Tags::InitialRefinementLevels<volume_dim>>( 117 : initialization_items); 118 : const auto& initial_extents = 119 : get<domain::Tags::InitialExtents<volume_dim>>(initialization_items); 120 : const auto& quadrature = 121 : get<evolution::dg::Tags::Quadrature>(initialization_items); 122 : const std::optional<domain::ElementWeight>& element_weight = 123 : Parallel::get<domain::Tags::ElementDistribution>(local_cache); 124 : 125 : const size_t number_of_procs = Parallel::number_of_procs<size_t>(local_cache); 126 : const size_t number_of_nodes = Parallel::number_of_nodes<size_t>(local_cache); 127 : const size_t num_of_procs_to_use = number_of_procs - procs_to_ignore.size(); 128 : 129 : const auto& blocks = domain.blocks(); 130 : 131 : // Only need the element distribution if the element weight has a value 132 : // because then we have to use the space filling curve and not just use round 133 : // robin. 134 : domain::BlockZCurveProcDistribution<volume_dim> element_distribution{}; 135 : if (element_weight.has_value()) { 136 : const std::unordered_map<ElementId<volume_dim>, double> element_costs = 137 : domain::get_element_costs(blocks, initial_refinement_levels, 138 : initial_extents, element_weight.value(), 139 : quadrature); 140 : element_distribution = domain::BlockZCurveProcDistribution<volume_dim>{ 141 : element_costs, num_of_procs_to_use, blocks, initial_refinement_levels, 142 : initial_extents, procs_to_ignore}; 143 : } 144 : 145 : // Will be used to print domain diagnostic info 146 : std::vector<size_t> elements_per_core(number_of_procs, 0_st); 147 : std::vector<size_t> elements_per_node(number_of_nodes, 0_st); 148 : std::vector<size_t> grid_points_per_core(number_of_procs, 0_st); 149 : std::vector<size_t> grid_points_per_node(number_of_nodes, 0_st); 150 : 151 : size_t which_proc = 0; 152 : for (const auto& block : blocks) { 153 : const auto& initial_ref_levs = initial_refinement_levels[block.id()]; 154 : const size_t grid_points_per_element = alg::accumulate( 155 : initial_extents[block.id()], 1_st, std::multiplies<size_t>()); 156 : 157 : const std::vector<ElementId<volume_dim>> element_ids = 158 : initial_element_ids(block.id(), initial_ref_levs); 159 : 160 : // Value means ZCurve. nullopt means round robin 161 : if (element_weight.has_value()) { 162 : for (const auto& element_id : element_ids) { 163 : const size_t target_proc = 164 : element_distribution.get_proc_for_element(element_id); 165 : dg_element_array(element_id) 166 : .insert(global_cache, initialization_items, target_proc); 167 : 168 : const size_t target_node = 169 : Parallel::node_of<size_t>(target_proc, local_cache); 170 : ++elements_per_core[target_proc]; 171 : ++elements_per_node[target_node]; 172 : grid_points_per_core[target_proc] += grid_points_per_element; 173 : grid_points_per_node[target_node] += grid_points_per_element; 174 : } 175 : } else { 176 : for (size_t i = 0; i < element_ids.size(); ++i) { 177 : while (procs_to_ignore.find(which_proc) != procs_to_ignore.end()) { 178 : which_proc = which_proc + 1 == number_of_procs ? 0 : which_proc + 1; 179 : } 180 : 181 : dg_element_array(ElementId<volume_dim>(element_ids[i])) 182 : .insert(global_cache, initialization_items, which_proc); 183 : 184 : const size_t target_node = 185 : Parallel::node_of<size_t>(which_proc, local_cache); 186 : ++elements_per_core[which_proc]; 187 : ++elements_per_node[target_node]; 188 : grid_points_per_core[which_proc] += grid_points_per_element; 189 : grid_points_per_node[target_node] += grid_points_per_element; 190 : 191 : which_proc = which_proc + 1 == number_of_procs ? 0 : which_proc + 1; 192 : } 193 : } 194 : } 195 : 196 : dg_element_array.doneInserting(); 197 : 198 : Parallel::printf("\n%s\n", domain::diagnostic_info( 199 : domain.blocks().size(), local_cache, 200 : elements_per_core, elements_per_node, 201 : grid_points_per_core, grid_points_per_node)); 202 : }