Line data Source code
1 1 : // Distributed under the MIT License.
2 : // See LICENSE.txt for details.
3 :
4 : /// \file
5 : /// Defines class h5::H5File
6 :
7 : #pragma once
8 :
9 : #include <algorithm>
10 : #include <exception>
11 : #include <hdf5.h>
12 : #include <memory>
13 : #include <ostream>
14 : #include <string>
15 : #include <tuple>
16 : #include <type_traits>
17 : #include <typeinfo>
18 : #include <vector>
19 :
20 : #include "IO/H5/AccessType.hpp"
21 : #include "IO/H5/CheckH5.hpp"
22 : #include "IO/H5/Header.hpp" // IWYU pragma: keep
23 : #include "IO/H5/Helpers.hpp"
24 : #include "IO/H5/Object.hpp"
25 : #include "IO/H5/OpenGroup.hpp"
26 : #include "Utilities/ErrorHandling/Error.hpp"
27 : #include "Utilities/FileSystem.hpp"
28 : #include "Utilities/PrettyType.hpp"
29 :
30 : namespace h5 {
31 : /*!
32 : * \ingroup HDF5Group
33 : * \brief Opens an HDF5 file for access and allows manipulation of data
34 : *
35 : * Opens an HDF5 file either in ReadOnly or ReadWrite mode depending on the
36 : * template parameter `Access_t`. In ReadWrite mode h5::Object's can be inserted
37 : * into the file, and objects can be retrieved to have their data manipulated.
38 : * Example objects are dat files, text files, and volume data files. A single
39 : * H5File can contain many different objects so that the number of files stored
40 : * during a simulation is reduced.
41 : *
42 : * When an h5::object inside an H5File is opened or created the H5File object
43 : * holds a copy of the h5::object.
44 : *
45 : * \warning Only one object can be open at a time, which means if a reference to
46 : * the object is kept around after the H5File's current object is closed there
47 : * is a dangling reference. Also, this means that after you insert an object,
48 : * you must close that object before you can insert/open another.
49 : *
50 : * \example
51 : * To open a file for read-write access:
52 : * \snippet Test_H5File.cpp h5file_readwrite_file
53 : *
54 : * \note The dangling reference issue could be fixed by having a function in
55 : * addition to `get` that takes a lambda. The lambda takes exactly one parameter
56 : * of the type of the h5::Object it will be operating on. While this approach is
57 : * likely to be syntactically strange for many users it will most likely be more
58 : * performant than the `shared_ptr` solution.
59 : *
60 : * @tparam Access_t either h5::AccessType::ReadWrite or h5::AccessType::ReadOnly
61 : */
62 : template <AccessType Access_t>
63 1 : class H5File {
64 : public:
65 : // empty constructor for classes which store an H5File and need to be
66 : // charm-compatible.
67 0 : H5File() = default;
68 :
69 : /*!
70 : * \requires `file_name` is a valid path and ends in `.h5`.
71 : * \effects On object creation opens the HDF5 file at `file_name`
72 : *
73 : * @param file_name the path to the file to open or create
74 : * @param append_to_file if true allow appending to the file, otherwise abort
75 : * the simulation if the file exists
76 : * @param input_source a string containing the input source options (yaml
77 : * formatted). Defaults to an empty string; when writing, specify the provided
78 : * yaml input options (if any) to write them to the output file's
79 : * `InputSource.yaml` attribute.
80 : * @param use_file_locking Toggle file locking (default false).
81 : * HDF5 file locking is explained here:
82 : * https://github.com/HDFGroup/hdf5/blob/develop/doc/file-locking.md.
83 : * This toggle only has an effect if the HDF5 library supports
84 : * 'H5Pset_file_locking'. Otherwise, file locking is enabled if the HDF5
85 : * library was built with it, which it probably was. If file locking is
86 : * enabled, simulations may crash when the file they try to access is being
87 : * read by another process (like an analysis tool). We could make this more
88 : * resilient in the future by waiting to acquire the file lock with a timeout,
89 : * and/or retrying IO operations after progressively longer wait times (e.g.
90 : * first try again right away, then also print to terminal after some retries,
91 : * then eventually abort to avoid wasting compute time on a run that can't do
92 : * IO).
93 : */
94 1 : explicit H5File(std::string file_name, bool append_to_file = false,
95 : const std::string& input_source = ""s,
96 : bool use_file_locking = false);
97 :
98 : /// \cond HIDDEN_SYMBOLS
99 : ~H5File();
100 : /// \endcond
101 :
102 : /// @{
103 : /*!
104 : * \brief It does not make sense to copy an object referring to a file, only
105 : * to move it.
106 : */
107 1 : H5File(const H5File& /*rhs*/) = delete;
108 1 : H5File& operator=(const H5File& /*rhs*/) = delete;
109 : /// @}
110 :
111 : /// \cond HIDDEN_SYMBOLS
112 : H5File(H5File&& rhs); // NOLINT
113 : H5File& operator=(H5File&& rhs); // NOLINT
114 : /// \endcond
115 :
116 : /// Get name of the H5 file
117 1 : const std::string& name() const { return file_name_; }
118 :
119 : /// Get a std::vector of the names of all immediate subgroups of the file
120 1 : const std::vector<std::string> groups() const {
121 : return h5::get_group_names(file_id_, "/");
122 : }
123 :
124 : /// \brief Return a vector of all filenames in the H5 file
125 : /// \tparam ObjectType Only return a vector that contains this type of file.
126 : /// Default is `void` which returns all files.
127 : template <typename ObjectType = void>
128 1 : const std::vector<std::string> all_files(const std::string& group_name) const;
129 :
130 : /// Get the InputSource.yaml string embedded in the file
131 1 : std::string input_source() const;
132 :
133 : /// @{
134 : /*!
135 : * \requires `ObjectType` is a valid h5::Object derived class, `path`
136 : * is a valid path in the HDF5 file
137 : * \return a reference to the object inside the HDF5 file.
138 : *
139 : * @tparam ObjectType the type of the h5::Object to be retrieved, e.g. Dat
140 : * @param path the path of the retrieved object
141 : * @param args arguments forwarded to the ObjectType constructor
142 : */
143 : template <
144 : typename ObjectType, typename... Args,
145 : typename std::enable_if_t<((void)sizeof(ObjectType),
146 : Access_t == AccessType::ReadWrite)>* = nullptr>
147 1 : ObjectType& get(const std::string& path, Args&&... args);
148 :
149 : template <typename ObjectType, typename... Args>
150 1 : const ObjectType& get(const std::string& path, Args&&... args) const;
151 : /// @}
152 :
153 : /*!
154 : * \brief Insert an object into an H5 file.
155 : *
156 : * \requires `ObjectType` is a valid h5::Object derived class, `path` is a
157 : * valid path in the HDF5 file, and `args` are valid arguments to be forwarded
158 : * to the constructor of `ObjectType`.
159 : * \effects Creates a new H5 object of type `ObjectType` at the location
160 : * `path` in the HDF5 file.
161 : *
162 : * \return a reference the created object.
163 : *
164 : * @tparam ObjectType the type of the h5::Object to be inserted, e.g. Dat
165 : * @param path the path of the inserted object
166 : * @param args additional arguments to be passed to the constructor of the
167 : * object
168 : */
169 : template <typename ObjectType, typename... Args>
170 1 : ObjectType& insert(const std::string& path, Args&&... args);
171 :
172 : /*!
173 : * \brief Inserts an object like `insert` if it does not exist, returns the
174 : * object if it does.
175 : */
176 : template <typename ObjectType, typename... Args>
177 1 : ObjectType& try_insert(const std::string& path, Args&&... args);
178 :
179 : /*!
180 : * \effects Closes the current object, if there is none then has no effect
181 : */
182 1 : void close_current_object() const { current_object_ = nullptr; }
183 :
184 : /*!
185 : * \effects Closes the H5 file. No H5 operations are permitted after this
186 : * operation.
187 : */
188 1 : void close() const;
189 :
190 : template <typename ObjectType>
191 0 : bool exists(const std::string& path) const {
192 : auto exists_group_name = check_if_object_exists<ObjectType>(path);
193 : return std::get<0>(exists_group_name);
194 : }
195 :
196 : private:
197 : /// \cond HIDDEN_SYMBOLS
198 : template <typename ObjectType,
199 : std::enable_if_t<((void)sizeof(ObjectType),
200 : Access_t == AccessType::ReadWrite)>* = nullptr>
201 : ObjectType& convert_to_derived(
202 : std::unique_ptr<h5::Object>& current_object); // NOLINT
203 : template <typename ObjectType>
204 : const ObjectType& convert_to_derived(
205 : const std::unique_ptr<h5::Object>& current_object) const;
206 :
207 : void insert_header();
208 : void insert_source_archive();
209 :
210 : template <typename ObjectType>
211 : std::tuple<bool, detail::OpenGroup, std::string> check_if_object_exists(
212 : const std::string& path) const;
213 :
214 : std::string file_name_;
215 : // NOLINTNEXTLINE(spectre-mutable)
216 : mutable hid_t file_id_{-1};
217 : // NOLINTNEXTLINE(spectre-mutable)
218 : mutable std::unique_ptr<h5::Object> current_object_{nullptr};
219 : std::vector<std::string> h5_groups_;
220 : /// \endcond HIDDEN_SYMBOLS
221 : };
222 :
223 : // ======================================================================
224 : // H5File Definitions
225 : // ======================================================================
226 :
227 : template <AccessType Access_t>
228 : template <typename ObjectType>
229 : const std::vector<std::string> H5File<Access_t>::all_files(
230 : const std::string& group_name) const {
231 : std::vector<std::string> groups = h5::get_group_names(file_id_, group_name);
232 :
233 : // Loop through the initial files and groups and get all subfiles and groups
234 : std::vector<std::string> all_files_and_groups{};
235 : for (auto it = groups.begin(); it != groups.end(); ++it) {
236 : // Full group name
237 : const std::string prefix =
238 : group_name == "/" ? group_name : (group_name + "/");
239 :
240 : // If this is a file, there aren't any subfiles so add it to the overall
241 : // list and continue. Most extensions follow the ".XYZ" rule. Headers are
242 : // special though because they end in ".tar.gz"
243 : const auto extension_pos = it->find_last_of(".");
244 : if (not(extension_pos == std::string::npos) and
245 : (it->size() - extension_pos == 4 or
246 : it->substr(extension_pos) == ".gz")) {
247 : all_files_and_groups.insert(all_files_and_groups.end(), prefix + *it);
248 : continue;
249 : }
250 :
251 : // Get all sub files
252 : auto extra_files_and_groups = all_files(prefix + *it);
253 :
254 : // Insert the files to the overall list
255 : all_files_and_groups.insert(all_files_and_groups.end(),
256 : extra_files_and_groups.begin(),
257 : extra_files_and_groups.end());
258 : }
259 :
260 : // Filter out the ones we don't want
261 : if constexpr (not std::is_same_v<ObjectType, void>) {
262 : const auto range_end = std::remove_if(
263 : all_files_and_groups.begin(), all_files_and_groups.end(),
264 : [](const std::string& t) {
265 : return t.find(ObjectType::extension()) == std::string::npos;
266 : });
267 : // Shrink down the vector
268 : all_files_and_groups.erase(range_end, all_files_and_groups.end());
269 : all_files_and_groups.shrink_to_fit();
270 : }
271 :
272 : return all_files_and_groups;
273 : }
274 :
275 : template <AccessType Access_t>
276 : template <typename ObjectType, typename... Args,
277 : typename std::enable_if_t<((void)sizeof(ObjectType),
278 : Access_t == AccessType::ReadWrite)>*>
279 : ObjectType& H5File<Access_t>::get(const std::string& path, Args&&... args) {
280 : // Ensure we call the const version of the get function to avoid infinite
281 : // recursion. The reason this is implemented in this manner is to avoid code
282 : // duplication.
283 : // clang-tidy: do not use const_cast
284 : return const_cast<ObjectType&>( // NOLINT
285 : static_cast<H5File<Access_t> const*>(this)->get<ObjectType>(
286 : path, std::forward<Args>(args)...));
287 : }
288 :
289 : template <AccessType Access_t>
290 : template <typename ObjectType, typename... Args>
291 : const ObjectType& H5File<Access_t>::get(const std::string& path,
292 : Args&&... args) const {
293 : if (current_object_ != nullptr) {
294 : ERROR("Object " << current_object_->subfile_path()
295 : << " already open. Cannot open object " << path << ".");
296 : }
297 : // C++17: structured bindings
298 : auto exists_group_name = check_if_object_exists<ObjectType>(path);
299 : hid_t group_id = std::get<1>(exists_group_name).id();
300 : if (not std::get<0>(exists_group_name)) {
301 : ERROR("Cannot open the object '" << path + ObjectType::extension()
302 : << "' because it does not exist.");
303 : }
304 : current_object_ = std::make_unique<ObjectType>(
305 : std::get<0>(exists_group_name), std::move(std::get<1>(exists_group_name)),
306 : group_id, std::move(std::get<2>(exists_group_name)),
307 : std::forward<Args>(args)...);
308 : return dynamic_cast<const ObjectType&>(*current_object_);
309 : }
310 :
311 : template <AccessType Access_t>
312 : template <typename ObjectType, typename... Args>
313 : ObjectType& H5File<Access_t>::insert(const std::string& path, Args&&... args) {
314 : static_assert(AccessType::ReadWrite == Access_t,
315 : "Can only insert into ReadWrite access H5 files.");
316 : if (current_object_ != nullptr) {
317 : ERROR("Object " << current_object_->subfile_path()
318 : << " already open. Cannot insert object " << path << ".");
319 : }
320 : // C++17: structured bindings
321 : auto exists_group_name = check_if_object_exists<ObjectType>(path);
322 : if (std::get<0>(exists_group_name)) {
323 : ERROR(
324 : "Cannot insert an Object that already exists. Failed to add Object "
325 : "named: "
326 : << path);
327 : }
328 :
329 : hid_t group_id = std::get<1>(exists_group_name).id();
330 : return convert_to_derived<ObjectType>(
331 : current_object_ = std::make_unique<ObjectType>(
332 : std::get<0>(exists_group_name),
333 : std::move(std::get<1>(exists_group_name)), group_id,
334 : std::move(std::get<2>(exists_group_name)),
335 : std::forward<Args>(args)...));
336 : }
337 :
338 : template <AccessType Access_t>
339 : template <typename ObjectType, typename... Args>
340 : ObjectType& H5File<Access_t>::try_insert(const std::string& path,
341 : Args&&... args) {
342 : static_assert(AccessType::ReadWrite == Access_t,
343 : "Can only insert into ReadWrite access H5 files.");
344 : if (current_object_ != nullptr) {
345 : ERROR("Object " << current_object_->subfile_path()
346 : << " already open. Cannot try to insert object " << path
347 : << ".");
348 : }
349 : // C++17: structured bindings
350 : auto exists_group_name = check_if_object_exists<ObjectType>(path);
351 : hid_t group_id = std::get<1>(exists_group_name).id();
352 : return convert_to_derived<ObjectType>(
353 : current_object_ = std::make_unique<ObjectType>(
354 : std::get<0>(exists_group_name),
355 : std::move(std::get<1>(exists_group_name)), group_id,
356 : std::move(std::get<2>(exists_group_name)),
357 : std::forward<Args>(args)...));
358 : }
359 :
360 : /// \cond HIDDEN_SYMBOLS
361 : template <AccessType Access_t>
362 : template <typename ObjectType,
363 : typename std::enable_if_t<((void)sizeof(ObjectType),
364 : Access_t == AccessType::ReadWrite)>*>
365 : ObjectType& H5File<Access_t>::convert_to_derived(
366 : std::unique_ptr<h5::Object>& current_object) {
367 : if (nullptr == current_object) {
368 : ERROR("No object to convert."); // LCOV_EXCL_LINE
369 : }
370 : try {
371 : return dynamic_cast<ObjectType&>(*current_object);
372 : // LCOV_EXCL_START
373 : } catch (const std::bad_cast& e) {
374 : ERROR("Failed to cast to object.\nCast error: " << e.what());
375 : // LCOV_EXCL_STOP
376 : }
377 : }
378 : template <AccessType Access_t>
379 : template <typename ObjectType>
380 : const ObjectType& H5File<Access_t>::convert_to_derived(
381 : const std::unique_ptr<h5::Object>& current_object) const {
382 : if (nullptr == current_object) {
383 : ERROR("No object to convert.");
384 : }
385 : try {
386 : return dynamic_cast<const ObjectType&>(*current_object);
387 : } catch (const std::bad_cast& e) {
388 : ERROR("Failed to cast to object.\nCast error: " << e.what());
389 : }
390 : }
391 :
392 : template <AccessType Access_t>
393 : template <typename ObjectType>
394 : std::tuple<bool, detail::OpenGroup, std::string>
395 : H5File<Access_t>::check_if_object_exists(const std::string& path) const {
396 : std::string name_only = "/";
397 : if (path != "/") {
398 : name_only = file_system::get_file_name(path);
399 : }
400 : const std::string name_with_extension = name_only + ObjectType::extension();
401 : detail::OpenGroup group(file_id_, file_system::get_parent_path(path),
402 : Access_t);
403 : #pragma GCC diagnostic push
404 : #pragma GCC diagnostic ignored "-Wold-style-cast"
405 : const bool object_exists =
406 : name_with_extension == "/" or
407 : H5Lexists(group.id(), name_with_extension.c_str(), H5P_DEFAULT) or
408 : H5Aexists(group.id(), name_with_extension.c_str());
409 : #pragma GCC diagnostic pop
410 : return std::make_tuple(object_exists, std::move(group), std::move(name_only));
411 : }
412 :
413 : template <>
414 : inline void H5File<AccessType::ReadWrite>::insert_header() {
415 : insert<h5::Header>("/header");
416 : }
417 : // Not tested because it is only required to get code to compile, if statement
418 : // in constructor prevents call.
419 : template <>
420 : inline void H5File<AccessType::ReadOnly>::insert_header() {} // LCOV_EXCL_LINE
421 :
422 : /// \endcond
423 : } // namespace h5
|