diff --git a/llvm/include/llvm/ADT/GenericUniformityImpl.h b/llvm/include/llvm/ADT/GenericUniformityImpl.h index d10355fff1bea..92661eeb02604 100644 --- a/llvm/include/llvm/ADT/GenericUniformityImpl.h +++ b/llvm/include/llvm/ADT/GenericUniformityImpl.h @@ -51,6 +51,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Uniformity.h" #include "llvm/Support/raw_ostream.h" #define DEBUG_TYPE "uniformity" @@ -406,6 +407,11 @@ template class GenericUniformityAnalysisImpl { void recordTemporalDivergence(ConstValueRefT, const InstructionT *, const CycleT *); + /// @brief Uniformity of any instruction operands. + /// @param I instruction. + /// @return vector containing boolean value for corrosponding operands. + llvm::SmallVector + getOperandUniformities(const Instruction &I) const; protected: /// \brief Value/block pair representing a single phi input. diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 022530dc846ea..81bc8d291c2b2 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -23,6 +23,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Uniformity.h" #include "llvm/Analysis/IVDescriptors.h" #include "llvm/IR/FMF.h" #include "llvm/IR/InstrTypes.h" @@ -1916,6 +1917,14 @@ class TargetTransformInfo { const Function &F, SmallVectorImpl> &LB) const; + /// Target can implement more complex patterns for getting Uniformity of an + /// instruction.Currently Uniformity analysis catagorises instructions with a + /// fixed set of InstructionUniformity values: Default, AlwaysUniform and + /// NeverUniform. + std::optional getInstructionUniformity( + const Instruction &I, + SmallVector OperandUniformities) const; + private: std::unique_ptr TTIImpl; }; diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 990252b1e5743..6412aa56a4ab0 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -1147,6 +1147,12 @@ class TargetTransformInfoImplBase { const Function &F, SmallVectorImpl> &LB) const {} + virtual std::optional getInstructionUniformity( + const Instruction &I, + SmallVector OperandUniformities) const { + return std::nullopt; + } + protected: // Obtain the minimum required size to hold the value (without the sign) // In case of a vector it returns the min required size for one element. diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 8548afea72964..b9e85a15c4315 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1476,6 +1476,13 @@ void TargetTransformInfo::collectKernelLaunchBounds( return TTIImpl->collectKernelLaunchBounds(F, LB); } +std::optional +TargetTransformInfo::getInstructionUniformity( + const Instruction &I, + SmallVector OperandUniformities) const { + return TTIImpl->getInstructionUniformity(I, OperandUniformities); +} + TargetTransformInfoImplBase::~TargetTransformInfoImplBase() = default; TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {} diff --git a/llvm/lib/Analysis/UniformityAnalysis.cpp b/llvm/lib/Analysis/UniformityAnalysis.cpp index 2101fdfacfc8f..0228fcd62f6df 100644 --- a/llvm/lib/Analysis/UniformityAnalysis.cpp +++ b/llvm/lib/Analysis/UniformityAnalysis.cpp @@ -29,6 +29,35 @@ bool llvm::GenericUniformityAnalysisImpl::markDefsDivergent( return markDivergent(cast(&Instr)); } +template <> +bool llvm::GenericUniformityAnalysisImpl::isDivergentUse( + const Use &U) const { + const auto *V = U.get(); + if (isDivergent(V)) + return true; + if (const auto *DefInstr = dyn_cast(V)) { + const auto *UseInstr = cast(U.getUser()); + return isTemporalDivergent(*UseInstr->getParent(), *DefInstr); + } + return false; +} + +template <> +llvm::SmallVector +llvm::GenericUniformityAnalysisImpl::getOperandUniformities( + const Instruction &I) const { + SmallVector OperandUniformities; + for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { + if (!isa(I.getOperand(i)) && !isa(I.getOperand(i))) + continue; + const Use &U = I.getOperandUse(i); + OperandUniformities.push_back(isDivergentUse(U) + ? InstructionUniformity::NeverUniform + : InstructionUniformity::AlwaysUniform); + } + return OperandUniformities; +} + template <> void llvm::GenericUniformityAnalysisImpl::initialize() { for (auto &I : instructions(F)) { if (TTI->isSourceOfDivergence(&I)) @@ -36,6 +65,7 @@ template <> void llvm::GenericUniformityAnalysisImpl::initialize() { else if (TTI->isAlwaysUniform(&I)) addUniformOverride(I); } + for (auto &Arg : F.args()) { if (TTI->isSourceOfDivergence(&Arg)) { markDivergent(&Arg); @@ -47,9 +77,23 @@ template <> void llvm::GenericUniformityAnalysisImpl::pushUsers( const Value *V) { for (const auto *User : V->users()) { - if (const auto *UserInstr = dyn_cast(User)) { + const auto *UserInstr = dyn_cast(User); + if (!UserInstr) + continue; + + if (!TTI) { markDivergent(*UserInstr); + continue; } + + auto Uniformity = TTI->getInstructionUniformity( + *UserInstr, getOperandUniformities(*UserInstr)); + if (!Uniformity || *Uniformity == InstructionUniformity::Default) + markDivergent(*UserInstr); // fallback: conservative + else if (*Uniformity == InstructionUniformity::NeverUniform) + markDivergent(*UserInstr); + else if (*Uniformity == InstructionUniformity::AlwaysUniform) + addUniformOverride(*UserInstr); } } @@ -88,19 +132,6 @@ void llvm::GenericUniformityAnalysisImpl< } } -template <> -bool llvm::GenericUniformityAnalysisImpl::isDivergentUse( - const Use &U) const { - const auto *V = U.get(); - if (isDivergent(V)) - return true; - if (const auto *DefInstr = dyn_cast(V)) { - const auto *UseInstr = cast(U.getUser()); - return isTemporalDivergent(*UseInstr->getParent(), *DefInstr); - } - return false; -} - // This ensures explicit instantiation of // GenericUniformityAnalysisImpl::ImplDeleter::operator() template class llvm::GenericUniformityInfo; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 204d3df546bbf..57c8fdcf085ef 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -1422,3 +1422,21 @@ void GCNTTIImpl::collectKernelLaunchBounds( LB.push_back({"amdgpu-waves-per-eu[0]", WavesPerEU.first}); LB.push_back({"amdgpu-waves-per-eu[1]", WavesPerEU.second}); } + +std::optional GCNTTIImpl::getInstructionUniformity( + const Instruction &I, + SmallVector OperandUniformities) const { + if (const auto *II = dyn_cast(&I)) { + switch (II->getIntrinsicID()) { + case Intrinsic::amdgcn_permlane64: + if (llvm::any_of(OperandUniformities, [](InstructionUniformity U) { + return U == InstructionUniformity::AlwaysUniform; + })) + return InstructionUniformity::AlwaysUniform; + return InstructionUniformity::Default; + default: + break; + } + } + return std::nullopt; +} \ No newline at end of file diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index f6f7bd4bfcf5b..af47cf4b8f068 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -290,6 +290,9 @@ class GCNTTIImpl final : public BasicTTIImplBase { void collectKernelLaunchBounds( const Function &F, SmallVectorImpl> &LB) const override; + std::optional getInstructionUniformity( + const Instruction &I, + SmallVector OperandUniformities) const override; }; } // end namespace llvm diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll new file mode 100644 index 0000000000000..4bb89516b2e81 --- /dev/null +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll @@ -0,0 +1,25 @@ +; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s + +; CHECK: ALL VALUES UNIFORM +define amdgpu_kernel void @permlane64_constant(ptr addrspace(1) %out) { + %v = call i32 @llvm.amdgcn.permlane64(i32 7) + store i32 %v, ptr addrspace(1) %out + ret void +} + +; CHECK: ALL VALUES UNIFORM +define amdgpu_kernel void @permlane64_uniform(ptr addrspace(1) %out, i32 %src) { + %v = call i32 @llvm.amdgcn.permlane64(i32 %src) + store i32 %v, ptr addrspace(1) %out + ret void +} + +; CHECK: DIVERGENT: %tid = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK: DIVERGENT: %v = call i32 @llvm.amdgcn.permlane64.i32(i32 %tid) +define amdgpu_kernel void @permlane64_nonuniform(i32 addrspace(1)* %out) { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %v = call i32 @llvm.amdgcn.permlane64(i32 %tid) + %out_ptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tid + store i32 %v, i32 addrspace(1)* %out_ptr + ret void +}