-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[HLSL] [DXIL] Implement the AddUint64
HLSL function and the UAddc
DXIL op
#125319
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-codegen Author: Deric Cheung (Icohedron) ChangesThis PR addresses #99205
Notes:
Patch is 23.03 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/125319.diff 13 Files Affected:
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 60c360d4a9e0759..ffa60bf98aea06f 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4753,6 +4753,12 @@ def GetDeviceSideMangledName : LangBuiltin<"CUDA_LANG"> {
}
// HLSL
+def HLSLAddUint64: LangBuiltin<"HLSL_LANG"> {
+ let Spellings = ["__builtin_hlsl_adduint64"];
+ let Attributes = [NoThrow, Const];
+ let Prototype = "void(...)";
+}
+
def HLSLResourceGetPointer : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_resource_getpointer"];
let Attributes = [NoThrow];
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 774e5484cfa0e75..2d7d306a2074179 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10623,7 +10623,7 @@ def err_second_argument_to_cwsc_not_pointer : Error<
"second argument to __builtin_call_with_static_chain must be of pointer type">;
def err_vector_incorrect_num_elements : Error<
- "%select{too many|too few}0 elements in vector %select{initialization|operand}3 (expected %1 elements, have %2)">;
+ "%select{too many|too few|incorrect number of}0 elements in vector %select{initialization|operand}3 (expected %1 elements, have %2)">;
def err_altivec_empty_initializer : Error<"expected initializer">;
def err_invalid_neon_type_code : Error<
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 26bccccdc5e36e5..3a2c74f39afa78f 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19105,6 +19105,51 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
return nullptr;
switch (BuiltinID) {
+ case Builtin::BI__builtin_hlsl_adduint64: {
+ Value *OpA = EmitScalarExpr(E->getArg(0));
+ Value *OpB = EmitScalarExpr(E->getArg(1));
+ assert(E->getArg(0)->getType()->hasIntegerRepresentation() &&
+ E->getArg(1)->getType()->hasIntegerRepresentation() &&
+ "AddUint64 operands must have an integer representation");
+ assert(((E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() ==
+ 2 &&
+ E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() ==
+ 2) ||
+ (E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() ==
+ 4 &&
+ E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() ==
+ 4)) &&
+ "input vectors must have 2 or 4 elements each");
+
+ llvm::Value *Result = PoisonValue::get(OpA->getType());
+ uint64_t NumElements =
+ E->getArg(0)->getType()->castAs<VectorType>()->getNumElements();
+ for (uint64_t i = 0; i < NumElements / 2; ++i) {
+
+ // Obtain low and high words of inputs A and B
+ llvm::Value *LowA = Builder.CreateExtractElement(OpA, 2 * i + 0);
+ llvm::Value *HighA = Builder.CreateExtractElement(OpA, 2 * i + 1);
+ llvm::Value *LowB = Builder.CreateExtractElement(OpB, 2 * i + 0);
+ llvm::Value *HighB = Builder.CreateExtractElement(OpB, 2 * i + 1);
+
+ // Use an uadd_with_overflow to compute the sum of low words and obtain a
+ // carry value
+ llvm::Value *Carry;
+ llvm::Value *LowSum = EmitOverflowIntrinsic(
+ *this, llvm::Intrinsic::uadd_with_overflow, LowA, LowB, Carry);
+ llvm::Value *ZExtCarry = Builder.CreateZExt(Carry, HighA->getType());
+
+ // Sum the high words and the carry
+ llvm::Value *HighSum = Builder.CreateAdd(HighA, HighB);
+ llvm::Value *HighSumPlusCarry = Builder.CreateAdd(HighSum, ZExtCarry);
+
+ // Insert the low and high word sums into the result vector
+ Result = Builder.CreateInsertElement(Result, LowSum, 2 * i + 0);
+ Result = Builder.CreateInsertElement(Result, HighSumPlusCarry, 2 * i + 1,
+ "hlsl.AddUint64");
+ }
+ return Result;
+ }
case Builtin::BI__builtin_hlsl_resource_getpointer: {
Value *HandleOp = EmitScalarExpr(E->getArg(0));
Value *IndexOp = EmitScalarExpr(E->getArg(1));
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index d1e4eb08aa7646a..8043fcbc87c479a 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -138,6 +138,27 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_acos)
float4 acos(float4);
//===----------------------------------------------------------------------===//
+// AddUint64 builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T AddUint64(T a, T b)
+/// \brief Implements unsigned 64-bit integer addition using pairs of unsigned
+/// 32-bit integers.
+/// \param x [in] The first unsigned 32-bit integer pair(s)
+/// \param y [in] The second unsigned 32-bit integer pair(s)
+///
+/// This function takes one or two pairs (low, high) of unsigned 32-bit integer
+/// values and returns pairs (low, high) of unsigned 32-bit integer
+/// values representing the result of unsigned 64-bit integer addition.
+
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_adduint64)
+uint32_t2 AddUint64(uint32_t2, uint32_t2);
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_adduint64)
+uint32_t4 AddUint64(uint32_t4, uint32_t4);
+
+// //===----------------------------------------------------------------------===//
// all builtins
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index f26469e6a2f1d7c..9b1110f441013a7 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -21,6 +21,7 @@
#include "clang/AST/Type.h"
#include "clang/AST/TypeLoc.h"
#include "clang/Basic/Builtins.h"
+#include "clang/Basic/DiagnosticParse.h"
#include "clang/Basic/DiagnosticSema.h"
#include "clang/Basic/IdentifierTable.h"
#include "clang/Basic/LLVM.h"
@@ -2023,6 +2024,18 @@ static bool CheckAllArgsHaveFloatRepresentation(Sema *S, CallExpr *TheCall) {
checkAllFloatTypes);
}
+static bool CheckUnsignedIntegerRepresentation(Sema *S, CallExpr *TheCall) {
+ auto checkUnsignedInteger = [](clang::QualType PassedType) -> bool {
+ clang::QualType BaseType =
+ PassedType->isVectorType()
+ ? PassedType->getAs<clang::VectorType>()->getElementType()
+ : PassedType;
+ return !BaseType->isUnsignedIntegerType();
+ };
+ return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.UnsignedIntTy,
+ checkUnsignedInteger);
+}
+
static bool CheckFloatOrHalfRepresentations(Sema *S, CallExpr *TheCall) {
auto checkFloatorHalf = [](clang::QualType PassedType) -> bool {
clang::QualType BaseType =
@@ -2214,6 +2227,42 @@ static bool CheckResourceHandle(
// returning an ExprError
bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
switch (BuiltinID) {
+ case Builtin::BI__builtin_hlsl_adduint64: {
+ if (SemaRef.checkArgCount(TheCall, 2))
+ return true;
+ if (CheckVectorElementCallArgs(&SemaRef, TheCall))
+ return true;
+ if (CheckUnsignedIntegerRepresentation(&SemaRef, TheCall))
+ return true;
+
+ // CheckVectorElementCallArgs(...) guarantees both args are the same type.
+ assert(TheCall->getArg(0)->getType() == TheCall->getArg(1)->getType() &&
+ "Both args must be of the same type");
+
+ // ensure both args are vectors
+ auto *VTy = TheCall->getArg(0)->getType()->getAs<VectorType>();
+ if (!VTy) {
+ SemaRef.Diag(TheCall->getBeginLoc(),
+ diag::err_vector_incorrect_num_elements)
+ << 2 << "2 or 4" << 1 << /*operand*/ 1;
+ return true;
+ }
+
+ // ensure both args have 2 elements, or both args have 4 elements
+ int NumElementsArg1 = VTy->getNumElements();
+ if (NumElementsArg1 != 2 && NumElementsArg1 != 4) {
+ SemaRef.Diag(TheCall->getBeginLoc(),
+ diag::err_vector_incorrect_num_elements)
+ << 2 << "2 or 4" << NumElementsArg1 << /*operand*/ 1;
+ return true;
+ }
+
+ ExprResult A = TheCall->getArg(0);
+ QualType ArgTyA = A.get()->getType();
+ // return type is the same as the input type
+ TheCall->setType(ArgTyA);
+ break;
+ }
case Builtin::BI__builtin_hlsl_resource_getpointer: {
if (SemaRef.checkArgCount(TheCall, 2) ||
CheckResourceHandle(&SemaRef, TheCall, 0) ||
diff --git a/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl b/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl
new file mode 100644
index 000000000000000..4141aef69323dfc
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/AddUint64.hlsl
@@ -0,0 +1,71 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \
+// RUN: -emit-llvm -disable-llvm-passes -o - | \
+// RUN: FileCheck %s --check-prefixes=CHECK
+
+
+// CHECK-LABEL: define noundef <2 x i32> @_Z20test_AddUint64_uint2Dv2_jS_(
+// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x i32>, align 8
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x i32>, align 8
+// CHECK-NEXT: store <2 x i32> [[A]], ptr [[A_ADDR]], align 8
+// CHECK-NEXT: store <2 x i32> [[B]], ptr [[B_ADDR]], align 8
+// CHECK-NEXT: [[A_LOAD:%.*]] = load <2 x i32>, ptr [[A_ADDR]], align 8
+// CHECK-NEXT: [[B_LOAD:%.*]] = load <2 x i32>, ptr [[B_ADDR]], align 8
+// CHECK-NEXT: [[LowA:%.*]] = extractelement <2 x i32> [[A_LOAD]], i64 0
+// CHECK-NEXT: [[HighA:%.*]] = extractelement <2 x i32> [[A_LOAD]], i64 1
+// CHECK-NEXT: [[LowB:%.*]] = extractelement <2 x i32> [[B_LOAD]], i64 0
+// CHECK-NEXT: [[HighB:%.*]] = extractelement <2 x i32> [[B_LOAD]], i64 1
+// CHECK-NEXT: [[UAddc:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LowA]], i32 [[LowB]])
+// CHECK-NEXT: [[Carry:%.*]] = extractvalue { i32, i1 } [[UAddc]], 1
+// CHECK-NEXT: [[LowSum:%.*]] = extractvalue { i32, i1 } [[UAddc]], 0
+// CHECK-NEXT: [[CarryZExt:%.*]] = zext i1 [[Carry]] to i32
+// CHECK-NEXT: [[HighSum:%.*]] = add i32 [[HighA]], [[HighB]]
+// CHECK-NEXT: [[HighSumPlusCarry:%.*]] = add i32 [[HighSum]], [[CarryZExt]]
+// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[LowSum]], i64 0
+// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = insertelement <2 x i32> [[HLSL_ADDUINT64_UPTO0]], i32 [[HighSumPlusCarry]], i64 1
+// CHECK-NEXT: ret <2 x i32> [[HLSL_ADDUINT64]]
+//
+uint2 test_AddUint64_uint2(uint2 a, uint2 b) {
+ return AddUint64(a, b);
+}
+
+// CHECK-LABEL: define noundef <4 x i32> @_Z20test_AddUint64_uint4Dv4_jS_(
+// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT: store <4 x i32> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT: store <4 x i32> [[B]], ptr [[B_ADDR]], align 16
+// CHECK-NEXT: [[A_LOAD:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT: [[B_LOAD:%.*]] = load <4 x i32>, ptr [[B_ADDR]], align 16
+// CHECK-NEXT: [[LowA:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 0
+// CHECK-NEXT: [[HighA:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 1
+// CHECK-NEXT: [[LowB:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 0
+// CHECK-NEXT: [[HighB:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 1
+// CHECK-NEXT: [[UAddc:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LowA]], i32 [[LowB]])
+// CHECK-NEXT: [[Carry:%.*]] = extractvalue { i32, i1 } [[UAddc]], 1
+// CHECK-NEXT: [[LowSum:%.*]] = extractvalue { i32, i1 } [[UAddc]], 0
+// CHECK-NEXT: [[CarryZExt:%.*]] = zext i1 [[Carry]] to i32
+// CHECK-NEXT: [[HighSum:%.*]] = add i32 [[HighA]], [[HighB]]
+// CHECK-NEXT: [[HighSumPlusCarry:%.*]] = add i32 [[HighSum]], [[CarryZExt]]
+// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[LowSum]], i64 0
+// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO1:%.*]] = insertelement <4 x i32> [[HLSL_ADDUINT64_UPTO0]], i32 [[HighSumPlusCarry]], i64 1
+// CHECK-NEXT: [[LowA1:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 2
+// CHECK-NEXT: [[HighA1:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 3
+// CHECK-NEXT: [[LowB1:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 2
+// CHECK-NEXT: [[HighB1:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 3
+// CHECK-NEXT: [[UAddc1:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LowA1]], i32 [[LowB1]])
+// CHECK-NEXT: [[Carry1:%.*]] = extractvalue { i32, i1 } [[UAddc1]], 1
+// CHECK-NEXT: [[LowSum1:%.*]] = extractvalue { i32, i1 } [[UAddc1]], 0
+// CHECK-NEXT: [[CarryZExt1:%.*]] = zext i1 [[Carry1]] to i32
+// CHECK-NEXT: [[HighSum1:%.*]] = add i32 [[HighA1]], [[HighB1]]
+// CHECK-NEXT: [[HighSumPlusCarry1:%.*]] = add i32 [[HighSum1]], [[CarryZExt1]]
+// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO2:%.*]] = insertelement <4 x i32> [[HLSL_ADDUINT64_UPTO1]], i32 [[LowSum1]], i64 2
+// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = insertelement <4 x i32> [[HLSL_ADDUINT64_UPTO2]], i32 [[HighSumPlusCarry1]], i64 3
+// CHECK-NEXT: ret <4 x i32> [[HLSL_ADDUINT64]]
+//
+uint4 test_AddUint64_uint4(uint4 a, uint4 b) {
+ return AddUint64(a, b);
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl
new file mode 100644
index 000000000000000..4d3d4f48b21eb7e
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl
@@ -0,0 +1,46 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify
+
+uint2 test_too_few_arg() {
+ return __builtin_hlsl_adduint64();
+ // expected-error@-1 {{too few arguments to function call, expected 2, have 0}}
+}
+
+uint4 test_too_many_arg(uint4 a) {
+ return __builtin_hlsl_adduint64(a, a, a);
+ // expected-error@-1 {{too many arguments to function call, expected 2, have 3}}
+}
+
+uint2 test_mismatched_arg_types(uint2 a, uint4 b) {
+ return __builtin_hlsl_adduint64(a, b);
+ // expected-error@-1 {{all arguments to '__builtin_hlsl_adduint64' must have the same type}}
+}
+
+uint2 test_too_many_arg_elements(uint3 a, uint3 b) {
+ return __builtin_hlsl_adduint64(a, b);
+ // expected-error@-1 {{incorrect number of elements in vector operand (expected 2 or 4 elements, have 3)}}
+}
+
+uint4 test_too_few_arg_elements(uint3 a, uint3 b) {
+ return __builtin_hlsl_adduint64(a, b);
+ // expected-error@-1 {{incorrect number of elements in vector operand (expected 2 or 4 elements, have 3)}}
+}
+
+uint2 test_scalar_arg_type(uint a) {
+ return __builtin_hlsl_adduint64(a, a);
+ // expected-error@-1 {{incorrect number of elements in vector operand (expected 2 or 4 elements, have 1)}}
+}
+
+uint2 test_signed_integer_args(int2 a, int2 b) {
+ return __builtin_hlsl_adduint64(a, b);
+// expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(unsigned int)))) unsigned int' (vector of 2 'unsigned int' values)}}
+}
+
+struct S {
+ uint2 a;
+};
+
+uint2 test_incorrect_arg_type(S a) {
+ return __builtin_hlsl_adduint64(a, a);
+ // expected-error@-1 {{passing 'S' to parameter of incompatible type 'unsigned int'}}
+}
+
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index d099bb395449da9..ec31d27a1ed7a02 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -50,6 +50,7 @@ def HandleTy : DXILOpParamType;
def ResBindTy : DXILOpParamType;
def ResPropsTy : DXILOpParamType;
def SplitDoubleTy : DXILOpParamType;
+def BinaryWithCarryTy : DXILOpParamType;
class DXILOpClass;
@@ -738,6 +739,18 @@ def UMin : DXILOp<40, binary> {
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
}
+def UAddc : DXILOp<44, binaryWithCarryOrBorrow > {
+ let Doc = "Unsigned 32-bit integer arithmetic add with carry. uaddc(a,b) = (a+b, a+b overflowed ? 1 : 0)";
+ // TODO: This `let intrinsics = ...` line may be uncommented when
+ // https://github.com/llvm/llvm-project/issues/113192 is fixed
+ // let intrinsics = [IntrinSelect<int_uadd_with_overflow>];
+ let arguments = [OverloadTy, OverloadTy];
+ let result = BinaryWithCarryTy;
+ let overloads = [Overloads<DXIL1_0, [Int32Ty]>];
+ let stages = [Stages<DXIL1_0, [all_stages]>];
+ let attributes = [Attributes<DXIL1_0, [ReadNone]>];
+}
+
def FMad : DXILOp<46, tertiary> {
let Doc = "Floating point arithmetic multiply/add operation. fmad(m,a,b) = m "
"* a + b.";
diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
index badd5aabd6432d2..af309663c6044df 100644
--- a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
@@ -230,6 +230,14 @@ static StructType *getSplitDoubleType(LLVMContext &Context) {
return StructType::create({Int32Ty, Int32Ty}, "dx.types.splitdouble");
}
+static StructType *getBinaryWithCarryType(LLVMContext &Context) {
+ if (auto *ST = StructType::getTypeByName(Context, "dx.types.i32c"))
+ return ST;
+ Type *Int32Ty = Type::getInt32Ty(Context);
+ Type *Int1Ty= Type::getInt1Ty(Context);
+ return StructType::create({Int32Ty, Int1Ty}, "dx.types.i32c");
+}
+
static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
Type *OverloadTy) {
switch (Kind) {
@@ -273,6 +281,8 @@ static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
return getResPropsType(Ctx);
case OpParamType::SplitDoubleTy:
return getSplitDoubleType(Ctx);
+ case OpParamType::BinaryWithCarryTy:
+ return getBinaryWithCarryType(Ctx);
}
llvm_unreachable("Invalid parameter kind");
return nullptr;
@@ -539,6 +549,10 @@ StructType *DXILOpBuilder::getSplitDoubleType(LLVMContext &Context) {
return ::getSplitDoubleType(Context);
}
+StructType *DXILOpBuilder::getBinaryWithCarryType(LLVMContext &Context) {
+ return ::getBinaryWithCarryType(Context);
+}
+
StructType *DXILOpBuilder::getHandleType() {
return ::getHandleType(IRB.getContext());
}
diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.h b/llvm/lib/Target/DirectX/DXILOpBuilder.h
index df5a0240870f4a4..8e13b87a2be10ac 100644
--- a/llvm/lib/Target/DirectX/DXILOpBuilder.h
+++ b/llvm/lib/Target/DirectX/DXILOpBuilder.h
@@ -53,6 +53,9 @@ class DXILOpBuilder {
/// Get the `%dx.types.splitdouble` type.
StructType *getSplitDoubleType(LLVMContext &Context);
+ /// Get the `%dx.types.i32c` type.
+ StructType *getBinaryWithCarryType(LLVMContext &Context);
+
/// Get the `%dx.types.Handle` type.
StructType *getHandleType();
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 0c245c1a43d31cb..d639430ba5d31dc 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -359,17 +359,16 @@ class OpLowerer {
return lowerToBindAndAnnotateHandle(F);
}
- Error replaceSplitDoubleCallUsages(CallInst *Intrin, CallInst *Op) {
+ Error replaceExtractElementTypeOfCallUsages(CallInst *Intrin, CallInst *Op) {
for (Use &U : make_early_inc_range(Intrin->uses())) {
if (auto *EVI = dyn_cast<ExtractValueInst>(U.getUser())) {
if (EVI->getNumIndices() != 1)
- return createStringError(std::errc::invalid_argument,
- "Splitdouble has only 2 elements");
+ return createStringError(std::errc::invalid_argument, (std::string(Intrin->getOpcodeName()) + " has only 2 elements").c_str());
EVI->setOperand(0, Op);
} else {
return make_error<StringError>(
- "Splitdouble use is not ExtractValueInst",
+ (std::string(Intrin->getOpcodeName()) + " use is not ExtractValueInst").c_str(),
inconvertibleErrorCode());
}
}
@@ -821,7 +820...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would like to see if there is another reviewer has a suggestion of how we could re-use the built-in
for (Use &U : make_early_inc_range(Intrin->uses())) { | ||
if (auto *EVI = dyn_cast<ExtractValueInst>(U.getUser())) { | ||
|
||
if (EVI->getNumIndices() != 1) | ||
return createStringError(std::errc::invalid_argument, | ||
"Splitdouble has only 2 elements"); | ||
return createStringError( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are we able to create a test that exercises this error? If not, should these actually be asserts?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think it is possible to make an HLSL test that exercises the error on if (EVI->getNumIndices() != 1)
. It's just invalid LLVM IR to have more indices than allowed for the struct type. It is caught by some other error handler:
$ opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library /home/icohedron/workspace/feature-uaddc/llvm/test/CodeGen/DirectX/UAddc.ll
/home/icohedron/workspace/feature-uaddc/build/bin/opt: /home/icohedron/workspace/feature-uaddc/llvm/test/CodeGen/DirectX/UAddc.ll:35:25: error: invalid indices for extractvalue
%carry = extractvalue { i32, i1 } %uaddc, 1, 0
The other error is able to be exercised just by having a use that is not an extractvalue. For example, returning the { i32, i1 }
:
$ opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library /home/icohedron/workspace/feature-uaddc/llvm/test/CodeGen/DirectX/UAddc.ll
error: <unknown>:0:0: in function test_UAddc2 { i32, i1 } (i32, i32): call use is not ExtractValueInst
I also see now that std::string(Intrin->getOpcodeName()
only says "call" and doesn't mention the name of the actual intrinsic being called. So that needs to be fixed, and an error test should be made.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can replace the error (that checks the number of indicies = 1) with an assert, but since I changed the function name to be more generic, I don't think it should even be present.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should there even be an error if there are non-extractvalue
instructions using the result of the call?
The function name I chose (replaceExtractElementTypeOfCallUsages
) implies it will just change the type of extractvalue
instructions that use the result of the call.
Edit: I see now that the name is also bad. It should be replaceExtractValueTypeOfCallUsages
)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To be as generic as possible, I could replace the entire function so that it reads:
Error replaceExtractElementTypeOfCallUsages(CallInst *Intrin, CallInst *Op) {
for (Use &U : make_early_inc_range(Intrin->uses())) {
U.set(Op);
}
Intrin->eraseFromParent();
return Error::success();
}
As far as I understand, this function would just replace all uses of the intrinsic (Intrin
) with the new intrinsic replacing it (Op
). The function name would need to be changed, or it could be inlined into replaceFunctionWithNamedStructOp
, since that is the only user of replaceExtractElementTypeOfCallUsages
.
This change does pass all the existing llvm directx codegen tests.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The new replaceFunctionWithNamedStructOp
would look like this
[[nodiscard]] bool replaceFunctionWithNamedStructOp(Function &F,
dxil::OpCode DXILOp,
Type *NewRetTy) {
bool IsVectorArgExpansion = isVectorArgExpansion(F);
return replaceFunction(F, [&](CallInst *CI) -> Error {
SmallVector<Value *> Args;
OpBuilder.getIRB().SetInsertPoint(CI);
if (IsVectorArgExpansion) {
SmallVector<Value *> NewArgs = argVectorFlatten(CI, OpBuilder.getIRB());
Args.append(NewArgs.begin(), NewArgs.end());
} else
Args.append(CI->arg_begin(), CI->arg_end());
Expected<CallInst *> OpCall =
OpBuilder.tryCreateOp(DXILOp, Args, CI->getName(), NewRetTy);
if (Error E = OpCall.takeError())
return E;
for (Use &U : make_early_inc_range(CI->uses())) {
U.set(*OpCall);
}
CI->eraseFromParent();
return Error::success();
});
}
It works. All aggregate operations (extractvalue
, insertvalue
) get replaced correctly.
The only issue is if a function returns the result directly:
define noundef { i32, i1 } @test_UAddc2(i32 noundef %a, i32 noundef %b) {
; CHECK-LABEL: define noundef %dx.types.i32c @test_UAddc2(
; CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) {
; CHECK-NEXT: [[UAddc:%.*]] = call %dx.types.i32c @dx.op.binaryWithCarryOrBorrow.i32(i32 44, i32 [[A]], i32 [[B]])
; CHECK-NEXT: ret %dx.types.i32c [[Result]]
;
%uaddc = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
ret { i32, i1 } %uaddc
}
It results in an error that reads:
opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library /home/icohedron/workspace/feature-uaddc/llvm/test/CodeGen/DirectX/UAddc.ll
Function return type does not match operand type of return inst!
ret %dx.types.i32c %uaddc1
{ i32, i1 }in function test_UAddc2
LLVM ERROR: Broken function found, compilation aborted!
...
I would need to change the function definition's return type and replace all uses of this function.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since replacing the function definition and its uses seems to be more complicated than is currently worth implementing, I have renamed replaceExtractElementTypeOfCallUsages
to replaceAggregateTypeOfCallUsages
and made it replace both ExtractValueInst
s and InsertValueInst
s in commit 39f17d5.
It throws an error if there is any use that is not an extractvalue or insertvalue.
Tests for insertvalue and the error have been added to the UAddc directx codegen tests.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@joaosaffran Maybe you can provide some input here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it is alright to reuse splitdouble
logic for this intrinsic. We have issue #113192 that keeps track of doing this struct lowering more generic. There is a list of intrinsic there that requires similar lowering. At the time issue #113192 was created, we discussed that some intrinsic lowering will have specificities. So I would suggest commenting the implementation of replaceFunctionWithNamedStructOp
into issue #113192, but I would wait for more intrinsic that require lowering to be added, so we can have more information to create the generic lowering function.
About the error being handled in if (EVI->getNumIndices() != 1)
, if it's being caught somewhere else, feel free to remove it. This was originally added due to the wonky lowering process of splitdouble.
@@ -230,6 +230,14 @@ static StructType *getSplitDoubleType(LLVMContext &Context) { | |||
return StructType::create({Int32Ty, Int32Ty}, "dx.types.splitdouble"); | |||
} | |||
|
|||
static StructType *getBinaryWithCarryType(LLVMContext &Context) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is maybe something that we could consider being generated from DXIL.td
, alongside the other custom struct types. Maybe not for this pull request, but we can make a note here for the follow up pr: #113192
@@ -19105,6 +19105,51 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, | |||
return nullptr; | |||
|
|||
switch (BuiltinID) { | |||
case Builtin::BI__builtin_hlsl_adduint64: { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I suppose I know why we weren't able to re-use the __builtin_add_c
but for other reviewers it would be good to add context as a pr comment here. Maybe they will have suggestions as to how we could use it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, I see that you have it in the commit notes. I still think it would be worth noting with more context here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
__builtin_addc
was not able to be used to implement AddUint64
in hlsl_intrinsics.h
and (by extension) hlsl_detail.h
because its carryout
argument is a pointer (as documented here).
Since pointers are not supported in HLSL, an error is emitted when running HLSL codegen tests with an example implementation like the following in hlsl_intrinsics.h
.
_HLSL_AVAILABILITY(shadermodel, 6.0)
const inline uint32_t2 AddUint64(uint32_t2 a, uint32_t2 b) {
uint32_t carry;
uint32_t low_sum = __builtin_addc(a.x, b.x, 0, &carry);
uint32_t high_sum = __builtin_addc(a.y, b.y, carry, nullptr);
return uint32_t2(low_sum, high_sum);
}
build/lib/clang/20/include/hlsl/hlsl_intrinsics.h:158:50: error: the '&' operator is unsupported in HLSL
158 | uint32_t low_sum = __builtin_addc(a.x, b.x, 0, &carry);
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So while HLSL does not support pointers we do have a concept of out args. if you search for EmitHLSLOutArgExpr
I think you can find some uses. My thinking is maybe we could do our own builtin like you have done but without the pointer and have an anonymous struct returned. then we could still piggy back off of the code genen for __builtin_addc even if we don't use the builtin itself. Maybe thats more complicated than it has to be, but it could be a way to keep the codegen for the uadd_with_overflow
intrinsic in one place.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you think it is something I should do for this implementation?
Are there other HLSL functions that would benefit from / reuse the new builtin using the out args?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So the downside of what I suggested is that it would be a hybrid solution. You are writing the algorithm in HLSL, but you are also massaging the codegen to do out args instead of pointers, and write sema checks because we have to introduce a new builtin.
My thinking was there would be less total codgen if we did it the way I suggested and some of the sema checks would benefit from language rules instead of us having to put a bunch of effort into HLSLSema.cpp
. I don't have a strong opinion. So I won't make a requirement here.
@@ -10623,7 +10623,7 @@ def err_second_argument_to_cwsc_not_pointer : Error< | |||
"second argument to __builtin_call_with_static_chain must be of pointer type">; | |||
|
|||
def err_vector_incorrect_num_elements : Error< | |||
"%select{too many|too few}0 elements in vector %select{initialization|operand}3 (expected %1 elements, have %2)">; | |||
"%select{too many|too few|incorrect number of}0 elements in vector %select{initialization|operand}3 (expected %1 elements, have %2)">; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't like this change. this should stay being a diagnostic about the number of elements but in SemaHLSL you are using it to say not a vector
your code:
if (!VTy) {
SemaRef.Diag(TheCall->getBeginLoc(),
diag::err_vector_incorrect_num_elements)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would still need this change for the other error message when a uint3 is passed.
We don't know if the programmer intended to use the uint2 or the uint4 version of the function, so uint3 has neither more or less elements than expected -- it's just an incorrect number.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My latest commit (28b7eb8) changed the diag message. Is it now to your liking, or do you still think the diag message should be changed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I made a new commit (d2b315a) that creates a new diag message specifically for AddUint64, as per your suggestion (which I can't seem to find anymore?).
Is this better?
3843a08
to
d2b315a
Compare
@@ -12553,6 +12553,8 @@ def err_std_initializer_list_malformed : Error< | |||
"%0 layout not recognized. Must be a non-polymorphic class type with no bases and two fields: a 'const E *' and either another 'const E *' or a 'std::size_t'">; | |||
|
|||
// HLSL Diagnostics | |||
def err_hlsl_adduint64_invalid_arguments: Error< |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Few things I think there is already an error for the scalar when expecting a vector case so we should just use that one.
Second this title is too specific to the adduint64 case. we should allow this to be more configurable.
I also don't think this should be hlsl specific. You weren't off base to use err_vector_incorrect_num_elements
. It is correct that the number of elements is wrong but it would be better is if we could specify something like Invalid element count of 3 when expecting even element counts in the range of 2 to 4
. That way we could make 2 and 4 configurable via min max args. Also be nice to make the type configurable too.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I pushed a new commit (eac0fe5).
The case of a scalar being passed as an argument is now back to using err_vec_builtin_non_vector
to emit an example error message of: all arguments to AddUint64 must be vectors
The case of a uint3
being passed as an argument is now using a new diag called err_invalid_even_odd_vector_element_count
which emits an example error message of: invalid element count of 3 in vector operand (expected an even element count in the range of 2 and 4)
.
I omitted the expected type from the error message, because passing in an invalid type would hit a separate error handler anyways. (The error would be passing 'int3' (aka 'vector<int, 3>') to parameter of incompatible type '__attribute__((__vector_size__(3 * sizeof(unsigned int)))) unsigned int' (vector of 3 'unsigned int' values)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My comments are addressed and this LGTM if we go ahead with the current implementation. That being said, I think if there is a (not-too-complex) way that we can re-use the codegen, then I would vote for that. So maybe we could bring this up quickly in a design meeting.
llvm/lib/Target/DirectX/DXIL.td
Outdated
@@ -50,6 +50,7 @@ def HandleTy : DXILOpParamType; | |||
def ResBindTy : DXILOpParamType; | |||
def ResPropsTy : DXILOpParamType; | |||
def SplitDoubleTy : DXILOpParamType; | |||
def BinaryWithCarryTy : DXILOpParamType; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is reminding me how much I didn't like how splitdouble ended up. Seems like we need a more generic way to define our anonymous struct return types.
llvm/test/CodeGen/DirectX/UAddc.ll
Outdated
@@ -0,0 +1,40 @@ | |||
; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s | |||
|
|||
; CHECK: %dx.types.i32c = type { i32, i1 } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We need a vec2, and vec4 test that uses the vector scalarizer pass. What you will find is we can't scalarize UAddc without adding it to llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
isTargetIntrinsicTriviallyScalarizable
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In the current implementation, UAddc is only used with scalar i32 arguments because the codegen for AddUint64 only uses the scalar version of the uadd_with_overflow llvm intrinsic.
The only case where the resulting front-end IR, DXIL, and SPIR-V will differ from the current implementation is when AddUint64
receives uint4
arguments.
With the current version of the PR, I can do a successful end-to-end compilation of of the AddUint64.hlsl
test (which uses uint2 and uint4 vectors) using the command:
build/bin/clang -cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library clang/test/CodeGenHLSL/builtins/AddUint64.hlsl -emit-llvm -o - | build/bin/opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library -o -
.
This works because the vector is already scalarized in the clang/front-end codegen.
I will work on reimplementing AddUint64 for the uint4 case by using the vector uadd_with_overflow, and make it scalarizable.
ret i16 %result | ||
} | ||
|
||
; CHECK: error: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm assuming the not
in the run line is letting you do multiple error checks in this file? If that is the case there are potentially many *_error.ll cases that could be consolidated. Maybe worth writing up a ticket for this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
needs DirectXTargetTransformInfo.cpp
change and coresponding vector to scalarization tests before I can sign off. but this is very close.
Summary: We don't support offloading on Darwin. This fails because there's some handling missing somewhere else that likely won't ever be added.
Delete references to: * `VectorLoadToMemrefLoadLowering`, * `VectorStoreToMemrefStoreLowering`. These patters were removed in llvm#121454.
update_analyze_test_checks has improved the checks since these were last updated. Reduce noise diffs in future patches.
…llvm#126119) Summary: This definition is more portable since it defines the correct value for the target. I got rid of the helper mostly because I think it's easy enough to use now that it's a type and being explicit about what's `undef` or `poison` is good.
… if we're blending inplace/splatable shuffle inputs on AVX2 targets (llvm#126420) More aggressively use broadcast instructions where possible Fixes llvm#50315
Oops. My bad everyone! I did not mean to ping so many people or to introduce all these other commits. I tried to push one commit but git complained that my branch was somehow behind its remote counterpart. I will be more careful in the future by double-checking the git diff and logs before pushing. |
With this blunder, I will just close this and make a new PR. |
…L op (#127137) Fixes #99205. - Implements the HLSL intrinsic `AddUint64` used to perform unsigned 64-bit integer addition by using pairs of unsigned 32-bit integers instead of native 64-bit types - The LLVM intrinsic `uadd_with_overflow` is used in the implementation of `AddUint64` in `CGBuiltin.cpp` - The DXIL op `UAddc` was defined in `DXIL.td`, and a lowering of the LLVM intrinsic `uadd_with_overflow` to the `UAddc` DXIL op was implemented in `DXILOpLowering.cpp` Notes: - `__builtin_addc` was not able to be used to implement `AddUint64` in `hlsl_intrinsics.h` because its `CarryOut` argument is a pointer, and pointers are not supported in HLSL - A lowering of the LLVM intrinsic `uadd_with_overflow` to SPIR-V [already exists](https://github.com/llvm/llvm-project/blob/main/llvm/test/CodeGen/SPIRV/llvm-intrinsics/uadd.with.overflow.ll) - When lowering the LLVM intrinsic `uadd_with_overflow` to the `UAddc` DXIL op, the anonymous struct type `{ i32, i1 }` is replaced with a named struct type `%dx.types.i32c`. This aspect of the implementation may be changed when issue #113192 gets addressed - Fixes issues mentioned in the comments on the original PR #125319 --------- Co-authored-by: Finn Plummer <[email protected]> Co-authored-by: Farzon Lotfi <[email protected]> Co-authored-by: Chris B <[email protected]> Co-authored-by: Justin Bogner <[email protected]>
…L op (llvm#127137) Fixes llvm#99205. - Implements the HLSL intrinsic `AddUint64` used to perform unsigned 64-bit integer addition by using pairs of unsigned 32-bit integers instead of native 64-bit types - The LLVM intrinsic `uadd_with_overflow` is used in the implementation of `AddUint64` in `CGBuiltin.cpp` - The DXIL op `UAddc` was defined in `DXIL.td`, and a lowering of the LLVM intrinsic `uadd_with_overflow` to the `UAddc` DXIL op was implemented in `DXILOpLowering.cpp` Notes: - `__builtin_addc` was not able to be used to implement `AddUint64` in `hlsl_intrinsics.h` because its `CarryOut` argument is a pointer, and pointers are not supported in HLSL - A lowering of the LLVM intrinsic `uadd_with_overflow` to SPIR-V [already exists](https://github.com/llvm/llvm-project/blob/main/llvm/test/CodeGen/SPIRV/llvm-intrinsics/uadd.with.overflow.ll) - When lowering the LLVM intrinsic `uadd_with_overflow` to the `UAddc` DXIL op, the anonymous struct type `{ i32, i1 }` is replaced with a named struct type `%dx.types.i32c`. This aspect of the implementation may be changed when issue llvm#113192 gets addressed - Fixes issues mentioned in the comments on the original PR llvm#125319 --------- Co-authored-by: Finn Plummer <[email protected]> Co-authored-by: Farzon Lotfi <[email protected]> Co-authored-by: Chris B <[email protected]> Co-authored-by: Justin Bogner <[email protected]>
This PR addresses #99205
AddUint64
used to perform unsigned 64-bit integer addition by using pairs of unsigned 32-bit integers instead of native 64-bit typesuadd_with_overflow
is used in the implementation ofAddUint64
inCGBuiltin.cpp
UAddc
was defined inDXIL.td
, and a lowering of the LLVM intrinsicuadd_with_overflow
to theUAddc
DXIL op was implemented inDXILOpLowering.cpp
Notes:
__builtin_addc
was not able to be used to implementAddUint64
inhlsl_intrinsics.h
because itsCarryOut
argument is a pointer, and pointers are not supported in HLSLuadd_with_overflow
to SPIR-V already existsuadd_with_overflow
to theUAddc
DXIL op, the anonymous struct type{ i32, i1 }
is replaced with a named struct type%dx.types.i32c
. This aspect of the implementation may be changed when issue [DirectX] Handle named structs in DXILOpLowering in a generic way #113192 gets addressedThis PR has been closed due to a catastrophic git-related mishap. A new PR #127137 was made to continue this work.