Line data Source code
1 0 : // Distributed under the MIT License. 2 : // See LICENSE.txt for details. 3 : 4 : #pragma once 5 : 6 : #include <charm++.h> 7 : #include <cstddef> 8 : #include <functional> 9 : #include <optional> 10 : #include <unordered_map> 11 : #include <unordered_set> 12 : #include <vector> 13 : 14 : #include "Domain/Block.hpp" 15 : #include "Domain/Creators/DomainCreator.hpp" 16 : #include "Domain/Creators/OptionTags.hpp" 17 : #include "Domain/Creators/Tags/Domain.hpp" 18 : #include "Domain/Creators/Tags/InitialExtents.hpp" 19 : #include "Domain/Creators/Tags/InitialRefinementLevels.hpp" 20 : #include "Domain/Domain.hpp" 21 : #include "Domain/ElementDistribution.hpp" 22 : #include "Domain/Structure/ElementId.hpp" 23 : #include "Domain/Structure/InitialElementIds.hpp" 24 : #include "Domain/Tags.hpp" 25 : #include "Domain/Tags/ElementDistribution.hpp" 26 : #include "Elliptic/DiscontinuousGalerkin/Tags.hpp" 27 : #include "NumericalAlgorithms/Spectral/Basis.hpp" 28 : #include "NumericalAlgorithms/Spectral/Quadrature.hpp" 29 : #include "Parallel/Algorithms/AlgorithmArray.hpp" 30 : #include "Parallel/DomainDiagnosticInfo.hpp" 31 : #include "Parallel/GlobalCache.hpp" 32 : #include "Parallel/Info.hpp" 33 : #include "Parallel/Local.hpp" 34 : #include "Parallel/ParallelComponentHelpers.hpp" 35 : #include "Parallel/Phase.hpp" 36 : #include "Parallel/Printf/Printf.hpp" 37 : #include "Parallel/Protocols/ArrayElementsAllocator.hpp" 38 : #include "Parallel/Tags/Parallelization.hpp" 39 : #include "Utilities/Literals.hpp" 40 : #include "Utilities/Numeric.hpp" 41 : #include "Utilities/ProtocolHelpers.hpp" 42 : #include "Utilities/System/ParallelInfo.hpp" 43 : #include "Utilities/TMPL.hpp" 44 : #include "Utilities/TaggedTuple.hpp" 45 : 46 : namespace elliptic { 47 : 48 : /*! 49 : * \brief A `Parallel::protocols::ArrayElementsAllocator` that creates array 50 : * elements to cover the initial computational domain 51 : * 52 : * An element is created for every element ID in every block, determined by the 53 : * `initial_element_ids` function and the option-created `domain::Tags::Domain` 54 : * and `domain::Tags::InitialRefinementLevels`. The elements are distributed 55 : * on processors using the `domain::BlockZCurveProcDistribution`. In both cases, 56 : * an unordered set of `size_t`s can be passed to the `allocate_array` function 57 : * which represents physical processors to avoid placing elements on. `Element`s 58 : * are distributed to processors according to their computational costs 59 : * determined by the number of grid points. 60 : */ 61 : template <size_t Dim> 62 1 : struct DefaultElementsAllocator 63 : : tt::ConformsTo<Parallel::protocols::ArrayElementsAllocator> { 64 : template <typename ParallelComponent> 65 0 : using array_allocation_tags = tmpl::list<>; 66 : 67 : template <typename ParallelComponent, typename Metavariables, 68 : typename... InitializationTags> 69 0 : static void apply( 70 : Parallel::CProxy_GlobalCache<Metavariables>& global_cache, 71 : const tuples::TaggedTuple<InitializationTags...>& initialization_items, 72 : const tuples::tagged_tuple_from_typelist< 73 : typename ParallelComponent::array_allocation_tags>& 74 : /*array_allocation_items*/ 75 : = {}, 76 : const std::unordered_set<size_t>& procs_to_ignore = {}) { 77 : auto& local_cache = *Parallel::local_branch(global_cache); 78 : auto& element_array = 79 : Parallel::get_parallel_component<ParallelComponent>(local_cache); 80 : const auto& initial_extents = 81 : get<domain::Tags::InitialExtents<Dim>>(initialization_items); 82 : const auto basis = Spectral::Basis::Legendre; 83 : const auto& quadrature = 84 : Parallel::get<elliptic::dg::Tags::Quadrature>(local_cache); 85 : 86 : const auto& domain = Parallel::get<domain::Tags::Domain<Dim>>(local_cache); 87 : const auto& initial_refinement_levels = 88 : get<domain::Tags::InitialRefinementLevels<Dim>>(initialization_items); 89 : 90 : const size_t number_of_procs = 91 : Parallel::number_of_procs<size_t>(local_cache); 92 : const size_t number_of_nodes = 93 : Parallel::number_of_nodes<size_t>(local_cache); 94 : const size_t num_of_procs_to_use = number_of_procs - procs_to_ignore.size(); 95 : 96 : const auto& blocks = domain.blocks(); 97 : 98 : const std::optional<domain::ElementWeight>& element_weight = 99 : get<domain::Tags::ElementDistribution>(local_cache); 100 : 101 : domain::BlockZCurveProcDistribution<Dim> element_distribution{}; 102 : if (element_weight.has_value()) { 103 : const std::unordered_map<ElementId<Dim>, double> element_costs = 104 : domain::get_element_costs(blocks, initial_refinement_levels, 105 : initial_extents, element_weight.value(), 106 : basis, quadrature); 107 : element_distribution = domain::BlockZCurveProcDistribution<Dim>{ 108 : element_costs, num_of_procs_to_use, 109 : blocks, initial_refinement_levels, 110 : initial_extents, procs_to_ignore}; 111 : } 112 : 113 : // Will be used to print domain diagnostic info 114 : std::vector<size_t> elements_per_core(number_of_procs, 0_st); 115 : std::vector<size_t> elements_per_node(number_of_nodes, 0_st); 116 : std::vector<size_t> grid_points_per_core(number_of_procs, 0_st); 117 : std::vector<size_t> grid_points_per_node(number_of_nodes, 0_st); 118 : 119 : size_t which_proc = 0; 120 : for (const auto& block : blocks) { 121 : const size_t grid_points_per_element = alg::accumulate( 122 : initial_extents[block.id()], 1_st, std::multiplies<size_t>()); 123 : 124 : const std::vector<ElementId<Dim>> element_ids = initial_element_ids( 125 : block.id(), initial_refinement_levels[block.id()]); 126 : 127 : // Distributed with weighted space filling curve 128 : if (element_weight.has_value()) { 129 : for (const auto& element_id : element_ids) { 130 : const size_t target_proc = 131 : element_distribution.get_proc_for_element(element_id); 132 : element_array(element_id) 133 : .insert(global_cache, initialization_items, target_proc); 134 : 135 : const size_t target_node = 136 : Parallel::node_of<size_t>(target_proc, local_cache); 137 : ++elements_per_core[target_proc]; 138 : ++elements_per_node[target_node]; 139 : grid_points_per_core[target_proc] += grid_points_per_element; 140 : grid_points_per_node[target_node] += grid_points_per_element; 141 : } 142 : } else { 143 : // Distributed with round-robin 144 : for (size_t i = 0; i < element_ids.size(); ++i) { 145 : while (procs_to_ignore.find(which_proc) != procs_to_ignore.end()) { 146 : which_proc = which_proc + 1 == number_of_procs ? 0 : which_proc + 1; 147 : } 148 : 149 : element_array(ElementId<Dim>(element_ids[i])) 150 : .insert(global_cache, initialization_items, which_proc); 151 : 152 : const size_t target_node = 153 : Parallel::node_of<size_t>(which_proc, local_cache); 154 : ++elements_per_core[which_proc]; 155 : ++elements_per_node[target_node]; 156 : grid_points_per_core[which_proc] += grid_points_per_element; 157 : grid_points_per_node[target_node] += grid_points_per_element; 158 : 159 : which_proc = which_proc + 1 == number_of_procs ? 0 : which_proc + 1; 160 : } 161 : } 162 : } 163 : element_array.doneInserting(); 164 : 165 : Parallel::printf("\n%s\n", domain::diagnostic_info( 166 : domain.blocks().size(), local_cache, 167 : elements_per_core, elements_per_node, 168 : grid_points_per_core, grid_points_per_node)); 169 : } 170 : }; 171 : 172 : /*! 173 : * \brief The parallel component responsible for managing the DG elements that 174 : * compose the computational domain 175 : * 176 : * This parallel component will perform the actions specified by the 177 : * `PhaseDepActionList`. 178 : * 179 : * \note This parallel component is nearly identical to 180 : * `Evolution/DiscontinuousGalerkin/DgElementArray.hpp` right now, but will 181 : * likely diverge in the future, for instance to support a multigrid domain. 182 : * 183 : */ 184 : template <typename Metavariables, typename PhaseDepActionList, 185 : typename ElementsAllocator = 186 : DefaultElementsAllocator<Metavariables::volume_dim>> 187 1 : struct DgElementArray { 188 0 : static constexpr size_t volume_dim = Metavariables::volume_dim; 189 : static_assert( 190 : tt::assert_conforms_to_v<ElementsAllocator, 191 : Parallel::protocols::ArrayElementsAllocator>); 192 : 193 0 : using chare_type = Parallel::Algorithms::Array; 194 0 : static constexpr bool checkpoint_data = true; 195 0 : using metavariables = Metavariables; 196 0 : using phase_dependent_action_list = PhaseDepActionList; 197 0 : using array_index = ElementId<volume_dim>; 198 : 199 0 : using const_global_cache_tags = tmpl::list<domain::Tags::Domain<volume_dim>, 200 : domain::Tags::ElementDistribution>; 201 : 202 0 : using array_allocation_tags = 203 : typename ElementsAllocator::template array_allocation_tags< 204 : DgElementArray>; 205 : 206 0 : using simple_tags_from_options = 207 : tmpl::append<Parallel::get_simple_tags_from_options< 208 : Parallel::get_initialization_actions_list< 209 : phase_dependent_action_list>>, 210 : array_allocation_tags>; 211 : 212 0 : static void allocate_array( 213 : Parallel::CProxy_GlobalCache<Metavariables>& global_cache, 214 : const tuples::tagged_tuple_from_typelist<simple_tags_from_options>& 215 : initialization_items, 216 : const tuples::tagged_tuple_from_typelist<array_allocation_tags>& 217 : array_allocation_items = {}, 218 : const std::unordered_set<size_t>& procs_to_ignore = {}) { 219 : ElementsAllocator::template apply<DgElementArray>( 220 : global_cache, initialization_items, array_allocation_items, 221 : procs_to_ignore); 222 : } 223 : 224 0 : static void execute_next_phase( 225 : const Parallel::Phase next_phase, 226 : Parallel::CProxy_GlobalCache<Metavariables>& global_cache) { 227 : auto& local_cache = *Parallel::local_branch(global_cache); 228 : Parallel::get_parallel_component<DgElementArray>(local_cache) 229 : .start_phase(next_phase); 230 : } 231 : }; 232 : 233 : } // namespace elliptic