Line data Source code
1 0 : // Distributed under the MIT License. 2 : // See LICENSE.txt for details. 3 : 4 : #pragma once 5 : 6 : #include <charm++.h> 7 : #include <cstddef> 8 : #include <functional> 9 : #include <optional> 10 : #include <unordered_map> 11 : #include <unordered_set> 12 : #include <vector> 13 : 14 : #include "Domain/Block.hpp" 15 : #include "Domain/Creators/DomainCreator.hpp" 16 : #include "Domain/Creators/OptionTags.hpp" 17 : #include "Domain/Creators/Tags/Domain.hpp" 18 : #include "Domain/Creators/Tags/InitialExtents.hpp" 19 : #include "Domain/Creators/Tags/InitialRefinementLevels.hpp" 20 : #include "Domain/Domain.hpp" 21 : #include "Domain/ElementDistribution.hpp" 22 : #include "Domain/Structure/ElementId.hpp" 23 : #include "Domain/Structure/InitialElementIds.hpp" 24 : #include "Domain/Tags.hpp" 25 : #include "Domain/Tags/ElementDistribution.hpp" 26 : #include "Elliptic/DiscontinuousGalerkin/Tags.hpp" 27 : #include "Parallel/Algorithms/AlgorithmArray.hpp" 28 : #include "Parallel/DomainDiagnosticInfo.hpp" 29 : #include "Parallel/GlobalCache.hpp" 30 : #include "Parallel/Info.hpp" 31 : #include "Parallel/Local.hpp" 32 : #include "Parallel/ParallelComponentHelpers.hpp" 33 : #include "Parallel/Phase.hpp" 34 : #include "Parallel/Printf/Printf.hpp" 35 : #include "Parallel/Protocols/ArrayElementsAllocator.hpp" 36 : #include "Parallel/Tags/Parallelization.hpp" 37 : #include "Utilities/Literals.hpp" 38 : #include "Utilities/Numeric.hpp" 39 : #include "Utilities/ProtocolHelpers.hpp" 40 : #include "Utilities/System/ParallelInfo.hpp" 41 : #include "Utilities/TMPL.hpp" 42 : #include "Utilities/TaggedTuple.hpp" 43 : 44 : namespace elliptic { 45 : 46 : /*! 47 : * \brief A `Parallel::protocols::ArrayElementsAllocator` that creates array 48 : * elements to cover the initial computational domain 49 : * 50 : * An element is created for every element ID in every block, determined by the 51 : * `initial_element_ids` function and the option-created `domain::Tags::Domain` 52 : * and `domain::Tags::InitialRefinementLevels`. The elements are distributed 53 : * on processors using the `domain::BlockZCurveProcDistribution`. In both cases, 54 : * an unordered set of `size_t`s can be passed to the `allocate_array` function 55 : * which represents physical processors to avoid placing elements on. `Element`s 56 : * are distributed to processors according to their computational costs 57 : * determined by the number of grid points. 58 : */ 59 : template <size_t Dim> 60 1 : struct DefaultElementsAllocator 61 : : tt::ConformsTo<Parallel::protocols::ArrayElementsAllocator> { 62 : template <typename ParallelComponent> 63 0 : using array_allocation_tags = tmpl::list<>; 64 : 65 : template <typename ParallelComponent, typename Metavariables, 66 : typename... InitializationTags> 67 0 : static void apply( 68 : Parallel::CProxy_GlobalCache<Metavariables>& global_cache, 69 : const tuples::TaggedTuple<InitializationTags...>& initialization_items, 70 : const tuples::tagged_tuple_from_typelist< 71 : typename ParallelComponent::array_allocation_tags>& 72 : /*array_allocation_items*/ 73 : = {}, 74 : const std::unordered_set<size_t>& procs_to_ignore = {}) { 75 : auto& local_cache = *Parallel::local_branch(global_cache); 76 : auto& element_array = 77 : Parallel::get_parallel_component<ParallelComponent>(local_cache); 78 : const auto& initial_extents = 79 : get<domain::Tags::InitialExtents<Dim>>(initialization_items); 80 : const auto& quadrature = 81 : Parallel::get<elliptic::dg::Tags::Quadrature>(local_cache); 82 : 83 : const auto& domain = Parallel::get<domain::Tags::Domain<Dim>>(local_cache); 84 : const auto& initial_refinement_levels = 85 : get<domain::Tags::InitialRefinementLevels<Dim>>(initialization_items); 86 : 87 : const size_t number_of_procs = 88 : Parallel::number_of_procs<size_t>(local_cache); 89 : const size_t number_of_nodes = 90 : Parallel::number_of_nodes<size_t>(local_cache); 91 : const size_t num_of_procs_to_use = number_of_procs - procs_to_ignore.size(); 92 : 93 : const auto& blocks = domain.blocks(); 94 : 95 : const std::optional<domain::ElementWeight>& element_weight = 96 : get<domain::Tags::ElementDistribution>(local_cache); 97 : 98 : domain::BlockZCurveProcDistribution<Dim> element_distribution{}; 99 : if (element_weight.has_value()) { 100 : const std::unordered_map<ElementId<Dim>, double> element_costs = 101 : domain::get_element_costs(blocks, initial_refinement_levels, 102 : initial_extents, element_weight.value(), 103 : quadrature); 104 : element_distribution = domain::BlockZCurveProcDistribution<Dim>{ 105 : element_costs, num_of_procs_to_use, 106 : blocks, initial_refinement_levels, 107 : initial_extents, procs_to_ignore}; 108 : } 109 : 110 : // Will be used to print domain diagnostic info 111 : std::vector<size_t> elements_per_core(number_of_procs, 0_st); 112 : std::vector<size_t> elements_per_node(number_of_nodes, 0_st); 113 : std::vector<size_t> grid_points_per_core(number_of_procs, 0_st); 114 : std::vector<size_t> grid_points_per_node(number_of_nodes, 0_st); 115 : 116 : size_t which_proc = 0; 117 : for (const auto& block : blocks) { 118 : const size_t grid_points_per_element = alg::accumulate( 119 : initial_extents[block.id()], 1_st, std::multiplies<size_t>()); 120 : 121 : const std::vector<ElementId<Dim>> element_ids = initial_element_ids( 122 : block.id(), initial_refinement_levels[block.id()]); 123 : 124 : // Distributed with weighted space filling curve 125 : if (element_weight.has_value()) { 126 : for (const auto& element_id : element_ids) { 127 : const size_t target_proc = 128 : element_distribution.get_proc_for_element(element_id); 129 : element_array(element_id) 130 : .insert(global_cache, initialization_items, target_proc); 131 : 132 : const size_t target_node = 133 : Parallel::node_of<size_t>(target_proc, local_cache); 134 : ++elements_per_core[target_proc]; 135 : ++elements_per_node[target_node]; 136 : grid_points_per_core[target_proc] += grid_points_per_element; 137 : grid_points_per_node[target_node] += grid_points_per_element; 138 : } 139 : } else { 140 : // Distributed with round-robin 141 : for (size_t i = 0; i < element_ids.size(); ++i) { 142 : while (procs_to_ignore.find(which_proc) != procs_to_ignore.end()) { 143 : which_proc = which_proc + 1 == number_of_procs ? 0 : which_proc + 1; 144 : } 145 : 146 : element_array(ElementId<Dim>(element_ids[i])) 147 : .insert(global_cache, initialization_items, which_proc); 148 : 149 : const size_t target_node = 150 : Parallel::node_of<size_t>(which_proc, local_cache); 151 : ++elements_per_core[which_proc]; 152 : ++elements_per_node[target_node]; 153 : grid_points_per_core[which_proc] += grid_points_per_element; 154 : grid_points_per_node[target_node] += grid_points_per_element; 155 : 156 : which_proc = which_proc + 1 == number_of_procs ? 0 : which_proc + 1; 157 : } 158 : } 159 : } 160 : element_array.doneInserting(); 161 : 162 : Parallel::printf("\n%s\n", domain::diagnostic_info( 163 : domain.blocks().size(), local_cache, 164 : elements_per_core, elements_per_node, 165 : grid_points_per_core, grid_points_per_node)); 166 : } 167 : }; 168 : 169 : /*! 170 : * \brief The parallel component responsible for managing the DG elements that 171 : * compose the computational domain 172 : * 173 : * This parallel component will perform the actions specified by the 174 : * `PhaseDepActionList`. 175 : * 176 : * \note This parallel component is nearly identical to 177 : * `Evolution/DiscontinuousGalerkin/DgElementArray.hpp` right now, but will 178 : * likely diverge in the future, for instance to support a multigrid domain. 179 : * 180 : */ 181 : template <typename Metavariables, typename PhaseDepActionList, 182 : typename ElementsAllocator = 183 : DefaultElementsAllocator<Metavariables::volume_dim>> 184 1 : struct DgElementArray { 185 0 : static constexpr size_t volume_dim = Metavariables::volume_dim; 186 : static_assert( 187 : tt::assert_conforms_to_v<ElementsAllocator, 188 : Parallel::protocols::ArrayElementsAllocator>); 189 : 190 0 : using chare_type = Parallel::Algorithms::Array; 191 0 : using metavariables = Metavariables; 192 0 : using phase_dependent_action_list = PhaseDepActionList; 193 0 : using array_index = ElementId<volume_dim>; 194 : 195 0 : using const_global_cache_tags = tmpl::list<domain::Tags::Domain<volume_dim>, 196 : domain::Tags::ElementDistribution>; 197 : 198 0 : using array_allocation_tags = 199 : typename ElementsAllocator::template array_allocation_tags< 200 : DgElementArray>; 201 : 202 0 : using simple_tags_from_options = 203 : tmpl::append<Parallel::get_simple_tags_from_options< 204 : Parallel::get_initialization_actions_list< 205 : phase_dependent_action_list>>, 206 : array_allocation_tags>; 207 : 208 0 : static void allocate_array( 209 : Parallel::CProxy_GlobalCache<Metavariables>& global_cache, 210 : const tuples::tagged_tuple_from_typelist<simple_tags_from_options>& 211 : initialization_items, 212 : const tuples::tagged_tuple_from_typelist<array_allocation_tags>& 213 : array_allocation_items = {}, 214 : const std::unordered_set<size_t>& procs_to_ignore = {}) { 215 : ElementsAllocator::template apply<DgElementArray>( 216 : global_cache, initialization_items, array_allocation_items, 217 : procs_to_ignore); 218 : } 219 : 220 0 : static void execute_next_phase( 221 : const Parallel::Phase next_phase, 222 : Parallel::CProxy_GlobalCache<Metavariables>& global_cache) { 223 : auto& local_cache = *Parallel::local_branch(global_cache); 224 : Parallel::get_parallel_component<DgElementArray>(local_cache) 225 : .start_phase(next_phase); 226 : } 227 : }; 228 : 229 : } // namespace elliptic