Skip to content

Offload generates wrong declarations for LLVM target routines #150415

@kevinsala

Description

@kevinsala

Offload generates declarations for __tgt_register_lib and __tgt_unregister_lib routines with wrong parameters.

I tried this code (extracted from #150413):

#![allow(internal_features, non_camel_case_types, non_snake_case)]
#![feature(abi_gpu_kernel)]
#![feature(rustc_attrs)]
#![feature(core_intrinsics)]
#![cfg_attr(target_arch = "amdgpu", feature(stdarch_amdgpu))]
#![no_std]

#[cfg(target_os = "linux")]
extern crate libc;

#[panic_handler]
fn panic(_: &core::panic::PanicInfo) -> ! {
    loop {}
}

#[cfg(target_os = "linux")]
#[unsafe(no_mangle)]
#[inline(never)]
unsafe fn main() {
    let A = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0];

    unsafe {
        core::intrinsics::offload::<_, _, ()>(foo, (A.as_ptr() as *const [f32; 6],));
    };
}

#[cfg(target_os = "linux")]
unsafe extern "C" {
    pub fn foo(A: *const [f32; 6]) -> ();
}

#[cfg(not(target_os = "linux"))]
#[unsafe(no_mangle)]
#[inline(never)]
#[rustc_offload_kernel]
pub extern "gpu-kernel" fn foo(A: *const [f32; 6]) -> () {}

I expected to see this happen: The generated LLVM IR should have the declarations for those two routines with a single pointer parameter, as shown below.

declare void @__tgt_register_lib(ptr %0)

declare void @__tgt_unregister_lib(ptr %0)

Instead, the resulting LLVM IR declares 9 pointer parameters for those routines:

; ModuleID = 'e46kgqes3cuj6fp89atdfry6m'
source_filename = "e46kgqes3cuj6fp89atdfry6m"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.__tgt_offload_entry = type { i64, i16, i16, i32, ptr, ptr, i64, i64, ptr }
%struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr }
%struct.__tgt_kernel_arguments = type { i32, i32, ptr, ptr, ptr, ptr, ptr, ptr, i64, i64, [3 x i32], [3 x i32], i32 }

@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8
@.offload_sizes.foo = private unnamed_addr constant [1 x i64] [i64 24]
@.offload_maptypes.foo = private unnamed_addr constant [1 x i64] [i64 33]
@.foo.region_id = weak constant i8 0
@.offloading.entry_name.foo = internal unnamed_addr constant [4 x i8] c"foo\00", section ".llvm.rodata.offloading", align 1
@.offloading.entry.foo = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @.foo.region_id, ptr @.offloading.entry_name.foo, i64 0, i64 0, ptr null }, section "llvm_offload_entries", align 8

; Function Attrs: nounwind
declare i32 @__tgt_target_kernel(ptr %0, i64 %1, i32 %2, i32 %3, ptr %4, ptr %5) #0

; Function Attrs: nounwind
declare void @__tgt_target_data_begin_mapper(ptr %0, i64 %1, i32 %2, ptr %3, ptr %4, ptr %5, ptr %6, ptr %7, ptr %8) #0

; Function Attrs: nounwind
declare void @__tgt_target_data_update_mapper(ptr %0, i64 %1, i32 %2, ptr %3, ptr %4, ptr %5, ptr %6, ptr %7, ptr %8) #0

; Function Attrs: nounwind
declare void @__tgt_target_data_end_mapper(ptr %0, i64 %1, i32 %2, ptr %3, ptr %4, ptr %5, ptr %6, ptr %7, ptr %8) #0

declare void @__tgt_register_lib(ptr %0, i64 %1, i32 %2, ptr %3, ptr %4, ptr %5, ptr %6, ptr %7, ptr %8)

declare void @__tgt_unregister_lib(ptr %0, i64 %1, i32 %2, ptr %3, ptr %4, ptr %5, ptr %6, ptr %7, ptr %8)

declare void @__tgt_init_all_rtls()

; Function Attrs: noreturn nounwind nonlazybind uwtable
define hidden void @_RNvCsaJEegvYxiuD_7___rustc17rust_begin_unwind(ptr align 8 %_1) unnamed_addr #1 !dbg !5 {
start:
  %_1.dbg.spill = alloca [8 x i8], align 8
  store ptr %_1, ptr %_1.dbg.spill, align 8
    #dbg_declare(ptr %_1.dbg.spill, !142, !DIExpression(), !143)
  br label %bb1, !dbg !144

bb1:                                              ; preds = %bb1, %start
  br label %bb1, !dbg !144
}

; Function Attrs: alwaysinline nounwind nonlazybind uwtable
define internal ptr @_RNvMNtCsbROv6HdTZO3_4core5sliceSd6as_ptrCsi2Hs52l1lP8_1r(ptr align 8 %self.0, i64 %self.1) unnamed_addr #2 !dbg !145 {
start:
  %self.dbg.spill = alloca [16 x i8], align 8
  store ptr %self.0, ptr %self.dbg.spill, align 8
  %0 = getelementptr inbounds i8, ptr %self.dbg.spill, i64 8
  store i64 %self.1, ptr %0, align 8
    #dbg_declare(ptr %self.dbg.spill, !160, !DIExpression(), !161)
  ret ptr %self.0, !dbg !162
}

; Function Attrs: noinline nounwind nonlazybind uwtable
define void @main() unnamed_addr #3 !dbg !163 {
start:
  %A = alloca [48 x i8], align 8
    #dbg_declare(ptr %A, !166, !DIExpression(), !169)
  %EmptyDesc = alloca %struct.__tgt_bin_desc, align 8, !dbg !170
  %.offload_baseptrs = alloca [1 x ptr], align 8, !dbg !170
  %.offload_ptrs = alloca [1 x ptr], align 8, !dbg !170
  %.offload_sizes = alloca [1 x i64], align 8, !dbg !170
  %kernel_args = alloca %struct.__tgt_kernel_arguments, align 8, !dbg !170
  %0 = getelementptr inbounds nuw double, ptr %A, i64 0, !dbg !171
  %1 = getelementptr inbounds nuw double, ptr %A, i64 6, !dbg !171
  br label %repeat_loop_header, !dbg !171
  call void @llvm.memset.p0.i64(ptr align 8 %EmptyDesc, i8 0, i64 32, i1 false), !dbg !170
  %2 = getelementptr inbounds float, ptr %_5, i32 0, !dbg !170
  call void @__tgt_register_lib(ptr %EmptyDesc), !dbg !170
  call void @__tgt_init_all_rtls(), !dbg !170
  %3 = getelementptr inbounds [1 x ptr], ptr %.offload_baseptrs, i32 0, i32 0, !dbg !170
  store ptr %_5, ptr %3, align 8, !dbg !170
  %4 = getelementptr inbounds [1 x ptr], ptr %.offload_ptrs, i32 0, i32 0, !dbg !170
  store ptr %2, ptr %4, align 8, !dbg !170
  %5 = getelementptr inbounds [1 x i64], ptr %.offload_sizes, i32 0, i32 0, !dbg !170
  store i64 24, ptr %5, align 8, !dbg !170
  %6 = getelementptr inbounds [1 x ptr], ptr %.offload_baseptrs, i32 0, i32 0, !dbg !170
  %7 = getelementptr inbounds [1 x ptr], ptr %.offload_ptrs, i32 0, i32 0, !dbg !170
  %8 = getelementptr inbounds [1 x i64], ptr %.offload_sizes, i32 0, i32 0, !dbg !170
  call void @__tgt_target_data_begin_mapper(ptr @1, i64 -1, i32 1, ptr %6, ptr %7, ptr %8, ptr @.offload_maptypes.foo, ptr null, ptr null), !dbg !170
  %9 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 0, !dbg !170
  store i32 3, ptr %9, align 4, !dbg !170
  %10 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 1, !dbg !170
  store i32 1, ptr %10, align 4, !dbg !170
  %11 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 2, !dbg !170
  store ptr %6, ptr %11, align 8, !dbg !170
  %12 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 3, !dbg !170
  store ptr %7, ptr %12, align 8, !dbg !170
  %13 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 4, !dbg !170
  store ptr %8, ptr %13, align 8, !dbg !170
  %14 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 5, !dbg !170
  store ptr @.offload_maptypes.foo, ptr %14, align 8, !dbg !170
  %15 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 6, !dbg !170
  store ptr null, ptr %15, align 8, !dbg !170
  %16 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 7, !dbg !170
  store ptr null, ptr %16, align 8, !dbg !170
  %17 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 8, !dbg !170
  store i64 0, ptr %17, align 8, !dbg !170
  %18 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 9, !dbg !170
  store i64 0, ptr %18, align 8, !dbg !170
  %19 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 10, !dbg !170
  store [3 x i32] [i32 2097152, i32 0, i32 0], ptr %19, align 4, !dbg !170
  %20 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 11, !dbg !170
  store [3 x i32] [i32 256, i32 0, i32 0], ptr %20, align 4, !dbg !170
  %21 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 12, !dbg !170
  store i32 0, ptr %21, align 4, !dbg !170
  %22 = call i32 @__tgt_target_kernel(ptr @1, i64 -1, i32 2097152, i32 256, ptr @.foo.region_id, ptr %kernel_args), !dbg !170
  %23 = getelementptr inbounds [1 x ptr], ptr %.offload_baseptrs, i32 0, i32 0, !dbg !170
  %24 = getelementptr inbounds [1 x ptr], ptr %.offload_ptrs, i32 0, i32 0, !dbg !170
  %25 = getelementptr inbounds [1 x i64], ptr %.offload_sizes, i32 0, i32 0, !dbg !170
  call void @__tgt_target_data_end_mapper(ptr @1, i64 -1, i32 1, ptr %23, ptr %24, ptr %25, ptr @.offload_maptypes.foo, ptr null, ptr null), !dbg !170
  call void @__tgt_unregister_lib(ptr %EmptyDesc), !dbg !170

repeat_loop_header:                               ; preds = %repeat_loop_body, %start
  %26 = phi ptr [ %0, %start ], [ %28, %repeat_loop_body ]
  %27 = icmp ne ptr %26, %1
  br i1 %27, label %repeat_loop_body, label %repeat_loop_next

repeat_loop_body:                                 ; preds = %repeat_loop_header
  store double 1.000000e+00, ptr %26, align 8
  %28 = getelementptr inbounds double, ptr %26, i64 1
  br label %repeat_loop_header

repeat_loop_next:                                 ; preds = %repeat_loop_header
  %_5 = call ptr @_RNvMNtCsbROv6HdTZO3_4core5sliceSd6as_ptrCsi2Hs52l1lP8_1r(ptr align 8 %A, i64 6) #5, !dbg !172
  %dummy = load volatile ptr, ptr @.offload_sizes.foo, align 8, !dbg !170
  %dummy1 = load volatile ptr, ptr @.offloading.entry.foo, align 8, !dbg !170
}

; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
declare void @llvm.memset.p0.i64(ptr writeonly captures(none) %0, i8 %1, i64 %2, i1 immarg %3) #4

attributes #0 = { nounwind }
attributes #1 = { noreturn nounwind nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #2 = { alwaysinline nounwind nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #3 = { noinline nounwind nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) }
attributes #5 = { alwaysinline nounwind }

!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}
!llvm.dbg.cu = !{!3}

Meta

HEAD on the main branch.

CC @ZuseZ4 @Sa4dUs

Metadata

Metadata

Assignees

Labels

A-LLVMArea: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues.C-bugCategory: This is a bug.F-gpu_offload`#![feature(gpu_offload)]`

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions