-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU] Add support for v_cvt_pk_f16_bf8
on gfx1250
#145753
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-ir Author: Shilei Tian (shiltian) ChangesCo-authored-by: Shilei Tian <[email protected]> Patch is 31.64 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/145753.diff 25 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 94fa3e9b74c46..1d1f5a4ee3f9f 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -643,6 +643,7 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_f16_f32, "V2hV2hfUiIb", "nc", "f32-to-f16
TARGET_BUILTIN(__builtin_amdgcn_s_setprio_inc_wg, "vIs", "n", "setprio-inc-wg-inst")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_bf8, "V2hs", "nc", "gfx1250-insts")
#undef BUILTIN
#undef TARGET_BUILTIN
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
index 3f7a2d8649740..e2c6a4a3f15f3 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
@@ -30,3 +30,23 @@ void test_cvt_pk_f16_fp8(global half2* out, short a)
{
out[0] = __builtin_amdgcn_cvt_pk_f16_fp8(a);
}
+
+// CHECK-LABEL: @test_cvt_pk_f16_bf8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
+// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i16 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2
+// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[A_ADDR_ASCAST]], align 2
+// CHECK-NEXT: [[TMP1:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pk.f16.bf8(i16 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds <2 x half>, ptr addrspace(1) [[TMP2]], i64 0
+// CHECK-NEXT: store <2 x half> [[TMP1]], ptr addrspace(1) [[ARRAYIDX]], align 4
+// CHECK-NEXT: ret void
+//
+void test_cvt_pk_f16_bf8(global half2* out, short a)
+{
+ out[0] = __builtin_amdgcn_cvt_pk_f16_bf8(a);
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 72b0aa01f71aa..6f974c97361de 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -592,6 +592,10 @@ def int_amdgcn_cvt_pk_f16_fp8 : DefaultAttrsIntrinsic<
[llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrSpeculatable]
>, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_fp8">;
+def int_amdgcn_cvt_pk_f16_bf8 : DefaultAttrsIntrinsic<
+ [llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrSpeculatable]
+>, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_bf8">;
+
class AMDGPUCvtScaleF32Intrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
[DstTy], [Src0Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 50d297cd096a6..b20760c356263 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4542,6 +4542,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_cvt_pk_i16:
case Intrinsic::amdgcn_cvt_pk_u16:
case Intrinsic::amdgcn_cvt_pk_f16_fp8:
+ case Intrinsic::amdgcn_cvt_pk_f16_bf8:
case Intrinsic::amdgcn_fmed3:
case Intrinsic::amdgcn_cubeid:
case Intrinsic::amdgcn_cubema:
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 1f44cd7775ad9..d504c8134202d 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -741,6 +741,9 @@ let SubtargetPredicate = isGFX1250Plus in {
defm V_CVT_PK_F16_FP8 : VOP1Inst_t16_with_profiles<"v_cvt_pk_f16_fp8",
VOPProfile_CVT_PK_F16_F8, VOPProfile_CVT_PK_F16_F8_true16, VOPProfile_CVT_PK_F16_F8_fake16,
int_amdgcn_cvt_pk_f16_fp8>;
+ defm V_CVT_PK_F16_BF8 : VOP1Inst_t16_with_profiles<"v_cvt_pk_f16_bf8",
+ VOPProfile_CVT_PK_F16_F8, VOPProfile_CVT_PK_F16_F8_true16, VOPProfile_CVT_PK_F16_F8_fake16,
+ int_amdgcn_cvt_pk_f16_bf8>;
}
} // End SubtargetPredicate = isGFX1250Plus
@@ -1078,6 +1081,7 @@ defm V_CVT_F32_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00b>;
defm V_CVT_F32_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16", "V_CVT_F32_BF16_gfx1250">;
defm V_CVT_PK_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>;
+defm V_CVT_PK_F16_BF8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x076>;
//===----------------------------------------------------------------------===//
// GFX10.
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f16.fp8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f16.fp8.ll
index 67b5381646e29..243f6c4d23732 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f16.fp8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f16.fp8.ll
@@ -4,6 +4,41 @@
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 %s -o - | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL-REAL16 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 %s -o - | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL-FAKE16 %s
+define amdgpu_ps float @test_cvt_pk_f16_bf8_v(i16 %a) {
+; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_f16_bf8_v:
+; GFX1250-SDAG-REAL16: ; %bb.0:
+; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_f16_bf8 v0, v0.l
+; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
+;
+; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_f16_bf8_v:
+; GFX1250-SDAG-FAKE16: ; %bb.0:
+; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_f16_bf8 v0, v0
+; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_f16_bf8_v:
+; GFX1250-GISEL-REAL16: ; %bb.0:
+; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_f16_bf8 v0, v0.l
+; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
+;
+; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_f16_bf8_v:
+; GFX1250-GISEL-FAKE16: ; %bb.0:
+; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_f16_bf8 v0, v0
+; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
+ %cvt = tail call <2 x half> @llvm.amdgcn.cvt.pk.f16.bf8(i16 %a)
+ %ret = bitcast <2 x half> %cvt to float
+ ret float %ret
+}
+
+define amdgpu_ps float @test_cvt_pk_f16_bf8_s(i16 inreg %a) {
+; GFX1250-LABEL: test_cvt_pk_f16_bf8_s:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: v_cvt_pk_f16_bf8 v0, s0
+; GFX1250-NEXT: ; return to shader part epilog
+ %cvt = tail call <2 x half> @llvm.amdgcn.cvt.pk.f16.bf8(i16 %a)
+ %ret = bitcast <2 x half> %cvt to float
+ ret float %ret
+}
+
define amdgpu_ps float @test_cvt_pk_f16_fp8_v(i16 %a) {
; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_f16_fp8_v:
; GFX1250-SDAG-REAL16: ; %bb.0:
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
index 94ac6571935f8..e62eb6fbb723c 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
@@ -46,6 +46,15 @@ v_cvt_f32_bf16 v5, src_scc
v_cvt_f32_bf16 v127, 0x8000
// GFX1250: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00]
+v_cvt_pk_f16_bf8 v1, v2
+// GFX1250: v_cvt_pk_f16_bf8 v1, v2 ; encoding: [0x02,0xed,0x02,0x7e]
+
+v_cvt_pk_f16_bf8 v1, s2
+// GFX1250: v_cvt_pk_f16_bf8 v1, s2 ; encoding: [0x02,0xec,0x02,0x7e]
+
+v_cvt_pk_f16_bf8 v1, 100
+// GFX1250: v_cvt_pk_f16_bf8 v1, 0x64 ; encoding: [0xff,0xec,0x02,0x7e,0x64,0x00,0x00,0x00]
+
v_cvt_pk_f16_fp8 v1, v2
// GFX1250: v_cvt_pk_f16_fp8 v1, v2 ; encoding: [0x02,0xeb,0x02,0x7e]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
index d6a0a95b60762..37f39546ae13d 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
@@ -49,6 +49,15 @@ v_cvt_f32_bf16 v127, 0x8000
v_cvt_f32_bf16 v5, v1.h
// GFX1250: v_cvt_f32_bf16_e32 v5, v1.h ; encoding: [0x81,0xe5,0x0a,0x7e]
+v_cvt_pk_f16_bf8 v1, v2
+// GFX1250: v_cvt_pk_f16_bf8 v1, v2 ; encoding: [0x02,0xed,0x02,0x7e]
+
+v_cvt_pk_f16_bf8 v1, s2
+// GFX1250: v_cvt_pk_f16_bf8 v1, s2 ; encoding: [0x02,0xec,0x02,0x7e]
+
+v_cvt_pk_f16_bf8 v1, 100
+// GFX1250: v_cvt_pk_f16_bf8 v1, 0x64 ; encoding: [0xff,0xec,0x02,0x7e,0x64,0x00,0x00,0x00]
+
v_cvt_pk_f16_fp8 v1, v2
// GFX1250: v_cvt_pk_f16_fp8 v1, v2 ; encoding: [0x02,0xeb,0x02,0x7e]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
index 674a4f738d8a1..1ec54d137b335 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
@@ -61,3 +61,7 @@ v_cvt_f32_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:
v_cvt_pk_f16_fp8 v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_pk_f16_bf8 v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
+// GFX1250: v_cvt_pk_f16_bf8_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xec,0x02,0x7e,0x02,0xe4,0x04,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
index 2ac6e0962fc89..d674a9ea06843 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
@@ -62,6 +62,14 @@ v_cvt_f32_bf16 v5, v1.h quad_perm:[3,2,1,0]
// GFX1250: v_cvt_f32_bf16_dpp v5, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x81,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_cvt_pk_f16_bf8 v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
+// GFX1250: v_cvt_pk_f16_bf8_dpp v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xec,0x02,0x7e,0x02,0xe4,0x04,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_pk_f16_bf8 v1, v2.h quad_perm:[0,1,2,3]
+// GFX1250: v_cvt_pk_f16_bf8_dpp v1, v2.h quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xec,0x02,0x7e,0x82,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_cvt_pk_f16_fp8 v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
index de01522820134..9ab3a8adfa511 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
@@ -17,3 +17,7 @@ v_cvt_f32_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
v_cvt_pk_f16_fp8 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xea,0x02,0x7e,0x02,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_pk_f16_bf8 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_cvt_pk_f16_bf8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xec,0x02,0x7e,0x02,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
index 6d11be647001b..6904624471801 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
@@ -25,3 +25,11 @@ v_cvt_pk_f16_fp8 v1, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
v_cvt_pk_f16_fp8 v1, v2.h dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xea,0x02,0x7e,0x82,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_pk_f16_bf8 v1, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_cvt_pk_f16_bf8_dpp v1, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xec,0x02,0x7e,0x02,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_pk_f16_bf8 v1, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_cvt_pk_f16_bf8_dpp v1, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xec,0x02,0x7e,0x82,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_err.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_err.s
index 70879ddf0b50f..c393d3e819880 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_err.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_err.s
@@ -1,5 +1,15 @@
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s
+v_cvt_pk_f16_bf8 v1, v2 clamp
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// GFX1250-ERR-NEXT:{{^}}v_cvt_pk_f16_bf8 v1, v2 clamp
+// GFX1250-ERR-NEXT:{{^}} ^
+
+v_cvt_pk_f16_bf8 v1, v2 mul:2
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+// GFX1250-ERR-NEXT:{{^}}v_cvt_pk_f16_bf8 v1, v2 mul:2
+// GFX1250-ERR-NEXT:{{^}} ^
+
v_cvt_pk_f16_fp8 v1, v2 clamp
// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// GFX1250-ERR-NEXT:{{^}}v_cvt_pk_f16_fp8 v1, v2 clamp
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
index 6354820f96c16..f6c7cf8006508 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
@@ -76,6 +76,18 @@ v_cvt_f32_bf16_e64 v5, -1 op_sel:[1]
v_cvt_f32_bf16_e64 v5, src_scc op_sel:[1]
// GFX1250: v_cvt_f32_bf16_e64 v5, src_scc op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0xfd,0x00,0x00,0x00]
+v_cvt_pk_f16_bf8 v1, v150
+// GFX1250: v_cvt_pk_f16_bf8 v1, v150 ; encoding: [0x01,0x00,0xf6,0xd5,0x96,0x01,0x00,0x00]
+
+v_cvt_pk_f16_bf8 v1, v2 op_sel:[1]
+// GFX1250: v_cvt_pk_f16_bf8 v1, v2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0x02,0x01,0x00,0x00]
+
+v_cvt_pk_f16_bf8 v1, v150 op_sel:[1]
+// GFX1250: v_cvt_pk_f16_bf8 v1, v150 op_sel:[1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0x96,0x01,0x00,0x00]
+
+v_cvt_pk_f16_bf8 v1, s2 op_sel:[1]
+// GFX1250: v_cvt_pk_f16_bf8 v1, s2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0x02,0x00,0x00,0x00]
+
v_cvt_pk_f16_fp8 v1, v150
// GFX1250: v_cvt_pk_f16_fp8 v1, v150 ; encoding: [0x01,0x00,0xf5,0xd5,0x96,0x01,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s
index a3596d4332c55..531d734a0683d 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s
@@ -79,6 +79,18 @@ v_cvt_f32_bf16_e64 v5, src_scc op_sel:[1]
v_cvt_f32_bf16_e64 v5, v128.h
// GFX1250: v_cvt_f32_bf16_e64 v5, v128.h op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x80,0x01,0x00,0x00]
+v_cvt_pk_f16_bf8 v1, v150
+// GFX1250: v_cvt_pk_f16_bf8 v1, v150 ; encoding: [0x01,0x00,0xf6,0xd5,0x96,0x01,0x00,0x00]
+
+v_cvt_pk_f16_bf8 v1, v2 op_sel:[1]
+// GFX1250: v_cvt_pk_f16_bf8 v1, v2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0x02,0x01,0x00,0x00]
+
+v_cvt_pk_f16_bf8 v1, v150 op_sel:[1]
+// GFX1250: v_cvt_pk_f16_bf8 v1, v150 op_sel:[1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0x96,0x01,0x00,0x00]
+
+v_cvt_pk_f16_bf8 v1, s2 op_sel:[1]
+// GFX1250: v_cvt_pk_f16_bf8 v1, s2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0x02,0x00,0x00,0x00]
+
v_cvt_pk_f16_fp8 v1, v150
// GFX1250: v_cvt_pk_f16_fp8 v1, v150 ; encoding: [0x01,0x00,0xf5,0xd5,0x96,0x01,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s
index 3fd7d00cd4190..844b4259229ed 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s
@@ -46,6 +46,14 @@ v_cvt_f32_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_cvt_pk_f16_bf8 v1, v128 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
+// GFX1250: v_cvt_pk_f16_bf8_e64_dpp v1, v128 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0xf6,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_pk_f16_bf8 v1, v128 op_sel:[1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
+// GFX1250: v_cvt_pk_f16_bf8_e64_dpp v1, v128 op_sel:[1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0xf6,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_cvt_pk_f16_fp8 v1, v128 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s
index a2bc2b4310c74..32c2e54cf0e71 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s
@@ -50,6 +50,14 @@ v_cvt_f32_bf16_e64_dpp v5, v128.h quad_perm:[3,2,1,0]
// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v128.h op_sel:[1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_cvt_pk_f16_bf8 v1, v128.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
+// GFX1250: v_cvt_pk_f16_bf8_e64_dpp v1, v128.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0xf6,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_cvt_pk_f16_bf8 v1, v128.h quad_perm:[0,1,2,3]
+// GFX1250: v_cvt_pk_f16_bf8_e64_dpp v1, v128.h op_sel:[1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x08,0xf6,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_cvt_pk_f16_fp8 v1, v128.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1
// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s
index c53ba6958e47a..75692c7422f64 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s
@@ -2,6 +2,14 @@
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
+v_cvt_pk_f16_bf8 v1, v128 dpp8:...
[truncated]
|
5e439f7
to
cd383fa
Compare
76ed960
to
5d44b53
Compare
cd383fa
to
c9b4b83
Compare
5d44b53
to
3517ce4
Compare
c9b4b83
to
5e9ceee
Compare
Co-authored-by: Shilei Tian <[email protected]>
3517ce4
to
f155e2e
Compare
Co-authored-by: Shilei Tian [email protected]