//====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file /// \brief Provides definitions for Target specific Grid Values /// //===----------------------------------------------------------------------===// #ifndef LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H #define LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H namespace llvm { namespace omp { /// \brief Defines various target-specific GPU grid values that must be /// consistent between host RTL (plugin), device RTL, and clang. /// We can change grid values for a "fat" binary so that different /// passes get the correct values when generating code for a /// multi-target binary. Both amdgcn and nvptx values are stored in /// this file. In the future, should there be differences between GPUs /// of the same architecture, then simply make a different array and /// use the new array name. /// /// Example usage in clang: /// const unsigned slot_size = /// ctx.GetTargetInfo().getGridValue().GV_Warp_Size; /// /// Example usage in libomptarget/deviceRTLs: /// #include "llvm/Frontend/OpenMP/OMPGridValues.h" /// #ifdef __AMDGPU__ /// #define GRIDVAL AMDGPUGridValues /// #else /// #define GRIDVAL NVPTXGridValues /// #endif /// ... Then use this reference for GV_Warp_Size in the deviceRTL source. /// llvm::omp::GRIDVAL().GV_Warp_Size /// /// Example usage in libomptarget hsa plugin: /// #include "llvm/Frontend/OpenMP/OMPGridValues.h" /// #define GRIDVAL AMDGPUGridValues /// ... Then use this reference to access GV_Warp_Size in the hsa plugin. /// llvm::omp::GRIDVAL().GV_Warp_Size /// /// Example usage in libomptarget cuda plugin: /// #include "llvm/Frontend/OpenMP/OMPGridValues.h" /// #define GRIDVAL NVPTXGridValues /// ... Then use this reference to access GV_Warp_Size in the cuda plugin. /// llvm::omp::GRIDVAL().GV_Warp_Size /// struct GV { /// The size reserved for data in a shared memory slot. unsigned GV_Slot_Size; /// The default value of maximum number of threads in a worker warp. unsigned GV_Warp_Size; constexpr unsigned warpSlotSize() const { return GV_Warp_Size * GV_Slot_Size; } /// the maximum number of teams. unsigned GV_Max_Teams; // The default number of teams in the absence of any other information. unsigned GV_Default_Num_Teams; // An alternative to the heavy data sharing infrastructure that uses global // memory is one that uses device __shared__ memory. The amount of such space // (in bytes) reserved by the OpenMP runtime is noted here. unsigned GV_SimpleBufferSize; // The absolute maximum team size for a working group unsigned GV_Max_WG_Size; // The default maximum team size for a working group unsigned GV_Default_WG_Size; constexpr unsigned maxWarpNumber() const { return GV_Max_WG_Size / GV_Warp_Size; } }; /// For AMDGPU GPUs static constexpr GV AMDGPUGridValues64 = { 256, // GV_Slot_Size 64, // GV_Warp_Size (1 << 16), // GV_Max_Teams 440, // GV_Default_Num_Teams 896, // GV_SimpleBufferSize 1024, // GV_Max_WG_Size, 256, // GV_Default_WG_Size }; static constexpr GV AMDGPUGridValues32 = { 256, // GV_Slot_Size 32, // GV_Warp_Size (1 << 16), // GV_Max_Teams 440, // GV_Default_Num_Teams 896, // GV_SimpleBufferSize 1024, // GV_Max_WG_Size, 256, // GV_Default_WG_Size }; template constexpr const GV &getAMDGPUGridValues() { static_assert(wavesize == 32 || wavesize == 64, "Unexpected wavesize"); return wavesize == 32 ? AMDGPUGridValues32 : AMDGPUGridValues64; } /// For Nvidia GPUs static constexpr GV NVPTXGridValues = { 256, // GV_Slot_Size 32, // GV_Warp_Size (1 << 16), // GV_Max_Teams 3200, // GV_Default_Num_Teams 896, // GV_SimpleBufferSize 1024, // GV_Max_WG_Size 128, // GV_Default_WG_Size }; } // namespace omp } // namespace llvm #endif // LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H