-
-
Notifications
You must be signed in to change notification settings - Fork 14.2k
Open
Labels
A-LLVMArea: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues.Area: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues.C-bugCategory: This is a bug.Category: This is a bug.F-gpu_offload`#![feature(gpu_offload)]``#![feature(gpu_offload)]`
Description
Offload generates declarations for __tgt_register_lib and __tgt_unregister_lib routines with wrong parameters.
I tried this code (extracted from #150413):
#![allow(internal_features, non_camel_case_types, non_snake_case)]
#![feature(abi_gpu_kernel)]
#![feature(rustc_attrs)]
#![feature(core_intrinsics)]
#![cfg_attr(target_arch = "amdgpu", feature(stdarch_amdgpu))]
#![no_std]
#[cfg(target_os = "linux")]
extern crate libc;
#[panic_handler]
fn panic(_: &core::panic::PanicInfo) -> ! {
loop {}
}
#[cfg(target_os = "linux")]
#[unsafe(no_mangle)]
#[inline(never)]
unsafe fn main() {
let A = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0];
unsafe {
core::intrinsics::offload::<_, _, ()>(foo, (A.as_ptr() as *const [f32; 6],));
};
}
#[cfg(target_os = "linux")]
unsafe extern "C" {
pub fn foo(A: *const [f32; 6]) -> ();
}
#[cfg(not(target_os = "linux"))]
#[unsafe(no_mangle)]
#[inline(never)]
#[rustc_offload_kernel]
pub extern "gpu-kernel" fn foo(A: *const [f32; 6]) -> () {}I expected to see this happen: The generated LLVM IR should have the declarations for those two routines with a single pointer parameter, as shown below.
declare void @__tgt_register_lib(ptr %0)
declare void @__tgt_unregister_lib(ptr %0)Instead, the resulting LLVM IR declares 9 pointer parameters for those routines:
; ModuleID = 'e46kgqes3cuj6fp89atdfry6m'
source_filename = "e46kgqes3cuj6fp89atdfry6m"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.__tgt_offload_entry = type { i64, i16, i16, i32, ptr, ptr, i64, i64, ptr }
%struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr }
%struct.__tgt_kernel_arguments = type { i32, i32, ptr, ptr, ptr, ptr, ptr, ptr, i64, i64, [3 x i32], [3 x i32], i32 }
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8
@.offload_sizes.foo = private unnamed_addr constant [1 x i64] [i64 24]
@.offload_maptypes.foo = private unnamed_addr constant [1 x i64] [i64 33]
@.foo.region_id = weak constant i8 0
@.offloading.entry_name.foo = internal unnamed_addr constant [4 x i8] c"foo\00", section ".llvm.rodata.offloading", align 1
@.offloading.entry.foo = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @.foo.region_id, ptr @.offloading.entry_name.foo, i64 0, i64 0, ptr null }, section "llvm_offload_entries", align 8
; Function Attrs: nounwind
declare i32 @__tgt_target_kernel(ptr %0, i64 %1, i32 %2, i32 %3, ptr %4, ptr %5) #0
; Function Attrs: nounwind
declare void @__tgt_target_data_begin_mapper(ptr %0, i64 %1, i32 %2, ptr %3, ptr %4, ptr %5, ptr %6, ptr %7, ptr %8) #0
; Function Attrs: nounwind
declare void @__tgt_target_data_update_mapper(ptr %0, i64 %1, i32 %2, ptr %3, ptr %4, ptr %5, ptr %6, ptr %7, ptr %8) #0
; Function Attrs: nounwind
declare void @__tgt_target_data_end_mapper(ptr %0, i64 %1, i32 %2, ptr %3, ptr %4, ptr %5, ptr %6, ptr %7, ptr %8) #0
declare void @__tgt_register_lib(ptr %0, i64 %1, i32 %2, ptr %3, ptr %4, ptr %5, ptr %6, ptr %7, ptr %8)
declare void @__tgt_unregister_lib(ptr %0, i64 %1, i32 %2, ptr %3, ptr %4, ptr %5, ptr %6, ptr %7, ptr %8)
declare void @__tgt_init_all_rtls()
; Function Attrs: noreturn nounwind nonlazybind uwtable
define hidden void @_RNvCsaJEegvYxiuD_7___rustc17rust_begin_unwind(ptr align 8 %_1) unnamed_addr #1 !dbg !5 {
start:
%_1.dbg.spill = alloca [8 x i8], align 8
store ptr %_1, ptr %_1.dbg.spill, align 8
#dbg_declare(ptr %_1.dbg.spill, !142, !DIExpression(), !143)
br label %bb1, !dbg !144
bb1: ; preds = %bb1, %start
br label %bb1, !dbg !144
}
; Function Attrs: alwaysinline nounwind nonlazybind uwtable
define internal ptr @_RNvMNtCsbROv6HdTZO3_4core5sliceSd6as_ptrCsi2Hs52l1lP8_1r(ptr align 8 %self.0, i64 %self.1) unnamed_addr #2 !dbg !145 {
start:
%self.dbg.spill = alloca [16 x i8], align 8
store ptr %self.0, ptr %self.dbg.spill, align 8
%0 = getelementptr inbounds i8, ptr %self.dbg.spill, i64 8
store i64 %self.1, ptr %0, align 8
#dbg_declare(ptr %self.dbg.spill, !160, !DIExpression(), !161)
ret ptr %self.0, !dbg !162
}
; Function Attrs: noinline nounwind nonlazybind uwtable
define void @main() unnamed_addr #3 !dbg !163 {
start:
%A = alloca [48 x i8], align 8
#dbg_declare(ptr %A, !166, !DIExpression(), !169)
%EmptyDesc = alloca %struct.__tgt_bin_desc, align 8, !dbg !170
%.offload_baseptrs = alloca [1 x ptr], align 8, !dbg !170
%.offload_ptrs = alloca [1 x ptr], align 8, !dbg !170
%.offload_sizes = alloca [1 x i64], align 8, !dbg !170
%kernel_args = alloca %struct.__tgt_kernel_arguments, align 8, !dbg !170
%0 = getelementptr inbounds nuw double, ptr %A, i64 0, !dbg !171
%1 = getelementptr inbounds nuw double, ptr %A, i64 6, !dbg !171
br label %repeat_loop_header, !dbg !171
call void @llvm.memset.p0.i64(ptr align 8 %EmptyDesc, i8 0, i64 32, i1 false), !dbg !170
%2 = getelementptr inbounds float, ptr %_5, i32 0, !dbg !170
call void @__tgt_register_lib(ptr %EmptyDesc), !dbg !170
call void @__tgt_init_all_rtls(), !dbg !170
%3 = getelementptr inbounds [1 x ptr], ptr %.offload_baseptrs, i32 0, i32 0, !dbg !170
store ptr %_5, ptr %3, align 8, !dbg !170
%4 = getelementptr inbounds [1 x ptr], ptr %.offload_ptrs, i32 0, i32 0, !dbg !170
store ptr %2, ptr %4, align 8, !dbg !170
%5 = getelementptr inbounds [1 x i64], ptr %.offload_sizes, i32 0, i32 0, !dbg !170
store i64 24, ptr %5, align 8, !dbg !170
%6 = getelementptr inbounds [1 x ptr], ptr %.offload_baseptrs, i32 0, i32 0, !dbg !170
%7 = getelementptr inbounds [1 x ptr], ptr %.offload_ptrs, i32 0, i32 0, !dbg !170
%8 = getelementptr inbounds [1 x i64], ptr %.offload_sizes, i32 0, i32 0, !dbg !170
call void @__tgt_target_data_begin_mapper(ptr @1, i64 -1, i32 1, ptr %6, ptr %7, ptr %8, ptr @.offload_maptypes.foo, ptr null, ptr null), !dbg !170
%9 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 0, !dbg !170
store i32 3, ptr %9, align 4, !dbg !170
%10 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 1, !dbg !170
store i32 1, ptr %10, align 4, !dbg !170
%11 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 2, !dbg !170
store ptr %6, ptr %11, align 8, !dbg !170
%12 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 3, !dbg !170
store ptr %7, ptr %12, align 8, !dbg !170
%13 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 4, !dbg !170
store ptr %8, ptr %13, align 8, !dbg !170
%14 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 5, !dbg !170
store ptr @.offload_maptypes.foo, ptr %14, align 8, !dbg !170
%15 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 6, !dbg !170
store ptr null, ptr %15, align 8, !dbg !170
%16 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 7, !dbg !170
store ptr null, ptr %16, align 8, !dbg !170
%17 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 8, !dbg !170
store i64 0, ptr %17, align 8, !dbg !170
%18 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 9, !dbg !170
store i64 0, ptr %18, align 8, !dbg !170
%19 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 10, !dbg !170
store [3 x i32] [i32 2097152, i32 0, i32 0], ptr %19, align 4, !dbg !170
%20 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 11, !dbg !170
store [3 x i32] [i32 256, i32 0, i32 0], ptr %20, align 4, !dbg !170
%21 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 12, !dbg !170
store i32 0, ptr %21, align 4, !dbg !170
%22 = call i32 @__tgt_target_kernel(ptr @1, i64 -1, i32 2097152, i32 256, ptr @.foo.region_id, ptr %kernel_args), !dbg !170
%23 = getelementptr inbounds [1 x ptr], ptr %.offload_baseptrs, i32 0, i32 0, !dbg !170
%24 = getelementptr inbounds [1 x ptr], ptr %.offload_ptrs, i32 0, i32 0, !dbg !170
%25 = getelementptr inbounds [1 x i64], ptr %.offload_sizes, i32 0, i32 0, !dbg !170
call void @__tgt_target_data_end_mapper(ptr @1, i64 -1, i32 1, ptr %23, ptr %24, ptr %25, ptr @.offload_maptypes.foo, ptr null, ptr null), !dbg !170
call void @__tgt_unregister_lib(ptr %EmptyDesc), !dbg !170
repeat_loop_header: ; preds = %repeat_loop_body, %start
%26 = phi ptr [ %0, %start ], [ %28, %repeat_loop_body ]
%27 = icmp ne ptr %26, %1
br i1 %27, label %repeat_loop_body, label %repeat_loop_next
repeat_loop_body: ; preds = %repeat_loop_header
store double 1.000000e+00, ptr %26, align 8
%28 = getelementptr inbounds double, ptr %26, i64 1
br label %repeat_loop_header
repeat_loop_next: ; preds = %repeat_loop_header
%_5 = call ptr @_RNvMNtCsbROv6HdTZO3_4core5sliceSd6as_ptrCsi2Hs52l1lP8_1r(ptr align 8 %A, i64 6) #5, !dbg !172
%dummy = load volatile ptr, ptr @.offload_sizes.foo, align 8, !dbg !170
%dummy1 = load volatile ptr, ptr @.offloading.entry.foo, align 8, !dbg !170
}
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
declare void @llvm.memset.p0.i64(ptr writeonly captures(none) %0, i8 %1, i64 %2, i1 immarg %3) #4
attributes #0 = { nounwind }
attributes #1 = { noreturn nounwind nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #2 = { alwaysinline nounwind nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #3 = { noinline nounwind nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) }
attributes #5 = { alwaysinline nounwind }
!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}
!llvm.dbg.cu = !{!3}Meta
HEAD on the main branch.
Metadata
Metadata
Assignees
Labels
A-LLVMArea: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues.Area: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues.C-bugCategory: This is a bug.Category: This is a bug.F-gpu_offload`#![feature(gpu_offload)]``#![feature(gpu_offload)]`