SpECTRE Documentation Coverage Report
Current view: top level - ParallelAlgorithms/Events - MonitorMemory.hpp Hit Total Coverage
Commit: d0fc80462417e83e5cddfa1b9901bb4a9b6af4d6 Lines: 2 24 8.3 %
Date: 2024-03-29 00:33:31
Legend: Lines: hit not hit

          Line data    Source code
       1           0 : // Distributed under the MIT License.
       2             : // See LICENSE.txt for details.
       3             : 
       4             : #pragma once
       5             : 
       6             : #include <cstddef>
       7             : #include <limits>
       8             : #include <optional>
       9             : #include <pup.h>
      10             : #include <string>
      11             : #include <type_traits>
      12             : #include <unordered_map>
      13             : #include <utility>
      14             : #include <vector>
      15             : 
      16             : #include "DataStructures/DataBox/DataBox.hpp"
      17             : #include "DataStructures/DataBox/TagName.hpp"
      18             : #include "DataStructures/DataVector.hpp"
      19             : #include "Domain/Structure/Element.hpp"
      20             : #include "Domain/Structure/ElementId.hpp"
      21             : #include "Domain/Tags.hpp"
      22             : #include "IO/Observer/Helpers.hpp"
      23             : #include "IO/Observer/ObservationId.hpp"
      24             : #include "IO/Observer/ObserverComponent.hpp"
      25             : #include "IO/Observer/ReductionActions.hpp"
      26             : #include "IO/Observer/Tags.hpp"
      27             : #include "IO/Observer/TypeOfObservation.hpp"
      28             : #include "Options/Auto.hpp"
      29             : #include "Options/String.hpp"
      30             : #include "Parallel/GlobalCache.hpp"
      31             : #include "Parallel/Info.hpp"
      32             : #include "Parallel/Invoke.hpp"
      33             : #include "Parallel/Local.hpp"
      34             : #include "Parallel/MemoryMonitor/MemoryMonitor.hpp"
      35             : #include "Parallel/MemoryMonitor/Tags.hpp"
      36             : #include "Parallel/Reduction.hpp"
      37             : #include "Parallel/TypeTraits.hpp"
      38             : #include "ParallelAlgorithms/Actions/MemoryMonitor/ProcessArray.hpp"
      39             : #include "ParallelAlgorithms/Actions/MemoryMonitor/ProcessGroups.hpp"
      40             : #include "ParallelAlgorithms/Actions/MemoryMonitor/ProcessSingleton.hpp"
      41             : #include "ParallelAlgorithms/EventsAndTriggers/Event.hpp"
      42             : #include "Utilities/ErrorHandling/Error.hpp"
      43             : #include "Utilities/Functional.hpp"
      44             : #include "Utilities/Serialization/CharmPupable.hpp"
      45             : #include "Utilities/Serialization/Serialize.hpp"
      46             : #include "Utilities/TMPL.hpp"
      47             : 
      48             : /// \cond
      49             : namespace Parallel::Algorithms {
      50             : struct Array;
      51             : struct Group;
      52             : struct Nodegroup;
      53             : struct Singleton;
      54             : }  // namespace Parallel::Algorithms
      55             : /// \endcond
      56             : 
      57             : namespace Events {
      58             : /*!
      59             :  * \brief Event run on the DgElementArray that will monitor the memory usage of
      60             :  * parallel components in megabytes.
      61             :  *
      62             :  * \details Given a list of parallel component names from Options, this will
      63             :  * calculate the memory usage of each component and write it to disk in the
      64             :  * reductions file under the `/MemoryMonitors/` group. The name of each file is
      65             :  * the `pretty_type::name` of each parallel component.
      66             :  *
      67             :  * The parallel components available to monitor are the ones defined in the
      68             :  * `component_list` type alias in the metavariables. In addition to these
      69             :  * components, you can also monitor the size of the GlobalCache. To see which
      70             :  * parallel components are available to monitor, request to monitor an invalid
      71             :  * parallel component ("Blah" for example) in the input file. An ERROR will
      72             :  * occur and a list of the available components to monitor will be printed.
      73             :  *
      74             :  * \note Currently, the only Parallel::Algorithms::Array parallel component that
      75             :  * can be monitored is the DgElementArray itself.
      76             :  */
      77             : 
      78             : template <size_t Dim>
      79           1 : class MonitorMemory : public Event {
      80             :  private:
      81             :   // Reduction data for arrays
      82           0 :   using ReductionData = Parallel::ReductionData<
      83             :       // Time
      84             :       Parallel::ReductionDatum<double, funcl::AssertEqual<>>,
      85             :       // Vector of total mem usage on each node
      86             :       Parallel::ReductionDatum<std::vector<double>,
      87             :                                funcl::ElementWise<funcl::Plus<>>>>;
      88             : 
      89             :  public:
      90           0 :   explicit MonitorMemory(CkMigrateMessage* msg);
      91             :   using PUP::able::register_constructor;
      92           0 :   WRAPPED_PUPable_decl_template(MonitorMemory);  // NOLINT
      93             : 
      94           0 :   struct ComponentsToMonitor {
      95           0 :     using type =
      96             :         Options::Auto<std::vector<std::string>, Options::AutoLabel::All>;
      97           0 :     static constexpr Options::String help = {
      98             :         "Names of parallel components to monitor the memory usage of. If you'd "
      99             :         "like to monitor all available parallel components, pass 'All' "
     100             :         "instead."};
     101             :   };
     102             : 
     103           0 :   using options = tmpl::list<ComponentsToMonitor>;
     104             : 
     105           0 :   static constexpr Options::String help =
     106             :       "Observe memory usage of parallel components.";
     107             : 
     108           0 :   MonitorMemory() = default;
     109             : 
     110             :   template <typename Metavariables>
     111           0 :   MonitorMemory(
     112             :       const std::optional<std::vector<std::string>>& components_to_monitor,
     113             :       const Options::Context& context, Metavariables /*meta*/);
     114             : 
     115           0 :   using observed_reduction_data_tags =
     116             :       observers::make_reduction_data_tags<tmpl::list<ReductionData>>;
     117             : 
     118           0 :   using compute_tags_for_observation_box = tmpl::list<>;
     119             : 
     120           0 :   using return_tags = tmpl::list<>;
     121           0 :   using argument_tags = tmpl::list<domain::Tags::Element<Dim>>;
     122             : 
     123             :   template <typename Metavariables, typename ArrayIndex,
     124             :             typename ParallelComponent>
     125           0 :   void operator()(const ::Element<Dim>& element,
     126             :                   Parallel::GlobalCache<Metavariables>& cache,
     127             :                   const ArrayIndex& array_index,
     128             :                   const ParallelComponent* const /*meta*/,
     129             :                   const ObservationValue& observation_value) const;
     130             : 
     131           0 :   using observation_registration_tags = tmpl::list<>;
     132             : 
     133             :   std::optional<
     134             :       std::pair<observers::TypeOfObservation, observers::ObservationKey>>
     135           0 :   get_observation_type_and_key_for_registration() const {
     136             :     return {};
     137             :   }
     138             : 
     139           0 :   using is_ready_argument_tags = tmpl::list<>;
     140             : 
     141             :   template <typename Metavariables, typename ArrayIndex, typename Component>
     142           0 :   bool is_ready(Parallel::GlobalCache<Metavariables>& /*cache*/,
     143             :                 const ArrayIndex& /*array_index*/,
     144             :                 const Component* const /*meta*/) const {
     145             :     return true;
     146             :   }
     147             : 
     148           1 :   bool needs_evolved_variables() const override { return false; }
     149             : 
     150             :   // NOLINTNEXTLINE(google-runtime-references)
     151           0 :   void pup(PUP::er& p) override;
     152             : 
     153             :  private:
     154           0 :   std::unordered_set<std::string> components_to_monitor_{};
     155             : };
     156             : 
     157             : /// \cond
     158             : template <size_t Dim>
     159             : MonitorMemory<Dim>::MonitorMemory(CkMigrateMessage* msg) : Event(msg) {}
     160             : 
     161             : template <size_t Dim>
     162             : template <typename Metavariables>
     163             : MonitorMemory<Dim>::MonitorMemory(
     164             :     const std::optional<std::vector<std::string>>& components_to_monitor,
     165             :     const Options::Context& context, Metavariables /*meta*/) {
     166             :   using component_list = tmpl::push_back<typename Metavariables::component_list,
     167             :                                          Parallel::GlobalCache<Metavariables>>;
     168             :   std::unordered_map<std::string, std::string> existing_components{};
     169             :   std::string str_component_list{};
     170             : 
     171             :   tmpl::for_each<component_list>(
     172             :       [&existing_components, &str_component_list](auto component_v) {
     173             :         using component = tmpl::type_from<decltype(component_v)>;
     174             :         const std::string component_name = pretty_type::name<component>();
     175             :         const std::string chare_type_name =
     176             :             pretty_type::name<typename component::chare_type>();
     177             :         existing_components[component_name] = chare_type_name;
     178             :         // Only Array we can monitor is DgElementArray
     179             :         if (chare_type_name != "Array") {
     180             :           str_component_list += " - " + component_name + "\n";
     181             :         } else if (component_name == "DgElementArray") {
     182             :           str_component_list += " - " + component_name + "\n";
     183             :         }
     184             :       });
     185             : 
     186             :   // A list of names was specified
     187             :   if (components_to_monitor.has_value()) {
     188             :     for (const auto& component : *components_to_monitor) {
     189             :       // Do some checks:
     190             :       //  1. Make sure the components requested are viable components. This
     191             :       //     protects against spelling errors.
     192             :       //  2. Currently the only charm Array you can monitor the memory of is
     193             :       //     the DgElementArray so enforce this.
     194             :       if (existing_components.count(component) != 1) {
     195             :         PARSE_ERROR(
     196             :             context,
     197             :             "Cannot monitor memory usage of unknown parallel component '"
     198             :                 << component
     199             :                 << "'. Please choose from the existing parallel components:\n"
     200             :                 << str_component_list);
     201             :       } else if (existing_components.at(component) == "Array" and
     202             :                  component != "DgElementArray") {
     203             :         PARSE_ERROR(
     204             :             context,
     205             :             "Cannot monitor the '"
     206             :                 << component
     207             :                 << "' parallel component. Currently, the only Array parallel "
     208             :                    "component allowed to be monitored is the "
     209             :                    "DgElementArray.");
     210             :       }
     211             : 
     212             :       components_to_monitor_.insert(component);
     213             :     }
     214             :   } else {
     215             :     // 'All' was specified. Filter out Array components that are not the
     216             :     // DgElementArray
     217             :     for (const auto& [name, chare] : existing_components) {
     218             :       if (chare != "Array") {
     219             :         components_to_monitor_.insert(name);
     220             :       } else if (name == "DgElementArray") {
     221             :         components_to_monitor_.insert(name);
     222             :       }
     223             :     }
     224             :   }
     225             : }
     226             : 
     227             : template <size_t Dim>
     228             : template <typename Metavariables, typename ArrayIndex,
     229             :           typename ParallelComponent>
     230             : void MonitorMemory<Dim>::operator()(
     231             :     const ::Element<Dim>& element, Parallel::GlobalCache<Metavariables>& cache,
     232             :     const ArrayIndex& array_index, const ParallelComponent* const /*meta*/,
     233             :     const ObservationValue& observation_value) const {
     234             :   using component_list = tmpl::push_back<typename Metavariables::component_list,
     235             :                                          Parallel::GlobalCache<Metavariables>>;
     236             : 
     237             :   tmpl::for_each<component_list>([this, &observation_value, &element, &cache,
     238             :                                   &array_index](auto component_v) {
     239             :     using component = tmpl::type_from<decltype(component_v)>;
     240             : 
     241             :     // If we aren't monitoring this parallel component, then just exit now
     242             :     if (components_to_monitor_.count(pretty_type::name<component>()) != 1) {
     243             :       return;
     244             :     }
     245             : 
     246             :     // Certain components only need to be triggered once, so we have a special
     247             :     // element designated to be the one that triggers memory monitoring, the
     248             :     // 0th element.
     249             :     const auto& element_id = element.id();
     250             :     // Avoid GCC-7 compiler warning about unused variable (in the Array if
     251             :     // constexpr branch)
     252             :     [[maybe_unused]] const bool designated_element =
     253             :         is_zeroth_element(element_id);
     254             : 
     255             :     // If this is an array, this is run on every element. It has already
     256             :     // been asserted in the constructor that the only Array the MemoryMonitor
     257             :     // can monitor is the DgElementArray itself. If you want to monitor other
     258             :     // Arrays, the implementation will need to be generalized.
     259             :     if constexpr (Parallel::is_array_v<component>) {
     260             :       auto& memory_monitor_proxy = Parallel::get_parallel_component<
     261             :           mem_monitor::MemoryMonitor<Metavariables>>(cache);
     262             :       auto array_element_proxy =
     263             :           Parallel::get_parallel_component<component>(cache)[array_index];
     264             :       const double size_in_bytes = static_cast<double>(
     265             :           size_of_object_in_bytes(*Parallel::local(array_element_proxy)));
     266             :       const double size_in_megabytes = size_in_bytes / 1.0e6;
     267             : 
     268             :       // vector the size of the number of nodes we are running on. Set the
     269             :       // 'my_node'th element of the vector to the size of this Element. Then
     270             :       // when we reduce, we will have a vector with 'num_nodes' elements, each
     271             :       // of which represents the total memory usage of all Elements on that
     272             :       // node.
     273             :       const size_t num_nodes = Parallel::number_of_nodes<size_t>(
     274             :           *Parallel::local(array_element_proxy));
     275             :       const size_t my_node =
     276             :           Parallel::my_node<size_t>(*Parallel::local(array_element_proxy));
     277             :       std::vector<double> data(num_nodes, 0.0);
     278             :       data[my_node] = size_in_megabytes;
     279             : 
     280             :       Parallel::contribute_to_reduction<
     281             :           mem_monitor::ProcessArray<ParallelComponent>>(
     282             :           ReductionData{observation_value.value, data}, array_element_proxy,
     283             :           memory_monitor_proxy);
     284             :     } else if constexpr (Parallel::is_singleton_v<component>) {
     285             :       // If this is a singleton, we only run this once so use the designated
     286             :       // element. Nothing to reduce with singletons so just call the simple
     287             :       // action on the singleton
     288             :       if (designated_element) {
     289             :         auto& singleton_proxy =
     290             :             Parallel::get_parallel_component<component>(cache);
     291             : 
     292             :         Parallel::simple_action<mem_monitor::ProcessSingleton>(
     293             :             singleton_proxy, observation_value.value);
     294             :       }
     295             :     } else if constexpr (Parallel::is_nodegroup_v<component> or
     296             :                          Parallel::is_group_v<component>) {
     297             :       // If this is a (node)group, call a simple action on each branch if on
     298             :       // the designated element
     299             :       if (designated_element) {
     300             :         // Can't run simple actions on the cache so broadcast a specific entry
     301             :         // method that will calculate the size and send it to the memory
     302             :         // monitor
     303             :         if constexpr (std::is_same_v<component,
     304             :                                      Parallel::GlobalCache<Metavariables>>) {
     305             :           auto cache_proxy = cache.get_this_proxy();
     306             : 
     307             :           // This will be called on all branches of the GlobalCache
     308             :           cache_proxy.compute_size_for_memory_monitor(observation_value.value);
     309             :         } else {
     310             :           // Groups and nodegroups share an action
     311             :           auto& group_proxy =
     312             :               Parallel::get_parallel_component<component>(cache);
     313             : 
     314             :           // This will be called on all branches of the (node)group
     315             :           Parallel::simple_action<mem_monitor::ProcessGroups>(
     316             :               group_proxy, observation_value.value);
     317             :         }
     318             :       }
     319             :     }
     320             :   });
     321             : }
     322             : 
     323             : template <size_t Dim>
     324             : void MonitorMemory<Dim>::pup(PUP::er& p) {
     325             :   Event::pup(p);
     326             :   p | components_to_monitor_;
     327             : }
     328             : 
     329             : template <size_t Dim>
     330             : PUP::able::PUP_ID MonitorMemory<Dim>::my_PUP_ID = 0;  // NOLINT
     331             : /// \endcond
     332             : 
     333             : }  // namespace Events

Generated by: LCOV version 1.14