diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 854eb2f8dd6df..1e7fdb53059e2 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -1529,30 +1529,30 @@ def int_nvvm_mbarrier_pending_count : // pointer's alignment. def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty], [llvm_anyptr_ty, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture>], + [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture>], "llvm.nvvm.ldu.global.i">; def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty], [llvm_anyptr_ty, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture>], + [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture>], "llvm.nvvm.ldu.global.f">; def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture>], + [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture>], "llvm.nvvm.ldu.global.p">; // Generated within nvvm. Use for ldg on sm_35 or later. Second arg is the // pointer's alignment. def int_nvvm_ldg_global_i : Intrinsic<[llvm_anyint_ty], [llvm_anyptr_ty, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture>], + [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture>], "llvm.nvvm.ldg.global.i">; def int_nvvm_ldg_global_f : Intrinsic<[llvm_anyfloat_ty], [llvm_anyptr_ty, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture>], + [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture>], "llvm.nvvm.ldg.global.f">; def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture>], + [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture>], "llvm.nvvm.ldg.global.p">; // Use for generic pointers diff --git a/llvm/test/Transforms/DCE/nvvm-ldu-ldg-willreturn.ll b/llvm/test/Transforms/DCE/nvvm-ldu-ldg-willreturn.ll new file mode 100644 index 0000000000000..64a023ef45137 --- /dev/null +++ b/llvm/test/Transforms/DCE/nvvm-ldu-ldg-willreturn.ll @@ -0,0 +1,187 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S < %s -passes=dce | FileCheck %s + +; ldu/ldg intrinsics were erroneously not marked IntrWillReturn, preventing +; them from being eliminated at IR level when dead. + +declare i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align) +declare i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align) +declare i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align) +declare i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 %align) +declare ptr @llvm.nvvm.ldu.global.p.p1(ptr addrspace(1) %ptr, i32 %align) +declare float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 %align) +declare double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 %align) +declare half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align) +declare <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align) + +declare i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align) +declare i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align) +declare i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align) +declare i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 %align) +declare ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1) %ptr, i32 %align) +declare float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 %align) +declare double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 %align) +declare half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align) +declare <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align) + +define void @test_ldu_i8_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldu_i8_dead( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: ret void +; + %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4) + ret void +} + +define void @test_ldu_i16_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldu_i16_dead( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: ret void +; + %val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2) + ret void +} + +define void @test_ldu_i32_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldu_i32_dead( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: ret void +; + %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4) + ret void +} + +define void @test_ldu_i64_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldu_i64_dead( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: ret void +; + %val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8) + ret void +} + +define void @test_ldu_p_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldu_p_dead( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: ret void +; + %val = tail call ptr @llvm.nvvm.ldu.global.p.p1(ptr addrspace(1) %ptr, i32 8) + ret void +} + +define void @test_ldu_f32_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldu_f32_dead( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: ret void +; + %val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4) + ret void +} + +define void @test_ldu_f64_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldu_f64_dead( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: ret void +; + %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8) + ret void +} + +define void @test_ldu_f16_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldu_f16_dead( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: ret void +; + %val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2) + ret void +} + +define void @test_ldu_v2f16_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldu_v2f16_dead( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: ret void +; + %val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4) + ret void +} + +define void @test_ldg_i8_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldg_i8_dead( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: ret void +; + %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4) + ret void +} + +define void @test_ldg_i16_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldg_i16_dead( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: ret void +; + %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2) + ret void +} + +define void @test_ldg_i32_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldg_i32_dead( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: ret void +; + %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4) + ret void +} + +define void @test_ldg_i64_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldg_i64_dead( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: ret void +; + %val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8) + ret void +} + +define void @test_ldg_p_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldg_p_dead( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: ret void +; + %val = tail call ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1) %ptr, i32 8) + ret void +} + +define void @test_ldg_f32_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldg_f32_dead( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: ret void +; + %val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4) + ret void +} + +define void @test_ldg_f64_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldg_f64_dead( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: ret void +; + %val = tail call double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8) + ret void +} + +define void @test_ldg_f16_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldg_f16_dead( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: ret void +; + %val = tail call half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2) + ret void +} + +define void @test_ldg_v2f16_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldg_v2f16_dead( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: ret void +; + %val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4) + ret void +}