Line data Source code
1 0 : // Distributed under the MIT License. 2 : // See LICENSE.txt for details. 3 : 4 : #pragma once 5 : 6 : #include <iosfwd> 7 : 8 : namespace sys { 9 : /// The cache location to prefetch data to. 10 1 : enum class PrefetchTo : int { 11 : /// Prefetch into L1 data cache 12 : /// 13 : /// Typically the fastest CPU cache, about 32 KB in size. 14 : L1Cache = 3, 15 : /// Prefetch into L2 data cache 16 : /// 17 : /// Typically the second fastest CPU cache, about 128 or 256 KB in size. 18 : L2Cache = 2, 19 : /// Prefetch into L3 data cache 20 : /// 21 : /// Typically the slowest CPU cache and is shared among multiple cores with 22 : /// sizes varying by factors of several. 23 : L3Cache = 1, 24 : /// Non-temporal is an element that is unlikely to be re-used. E.g., read 25 : /// once, written but never read. 26 : NonTemporal = 0, 27 : /// Prefetch to L1 data cache for writing 28 : WriteL1Cache = 7, 29 : /// Prefetch to L2 data cache for writing 30 : WriteL2Cache = 6 31 : }; 32 : 33 : /// \brief Prefetch data into a specific level of data cache. 34 : template <PrefetchTo CacheLocation> 35 : #if defined(__GNUC__) 36 : __attribute__((always_inline)) inline 37 : #endif 38 1 : void prefetch(const void* address_to_prefetch) { 39 : // The enum values are bit flags where the lowest two bits (right-most) 40 : // control the cache level and the 3rd bit controls whether it is a write-only 41 : // or read-write operation. 42 : __builtin_prefetch(address_to_prefetch, 43 : (static_cast<int>(CacheLocation) >> 2) & 1, 44 : static_cast<int>(CacheLocation) & 0x3); 45 : } 46 : 47 0 : std::ostream& operator<<(std::ostream& os, PrefetchTo cache_location); 48 : } // namespace sys