Description
Hi,
I've got some code which works on-O0
and -O1 -regalloc=basic
, but gets miscompiled with -O1
(and even -O1 -opt-bisect-limit=0
!) - the issue downstream is:
... which I've been able to somewhat minimize, down to:
Show LLVM IR
@buf = private unnamed_addr constant <{}> zeroinitializer, align 1
define internal fastcc void @main() unnamed_addr addrspace(1) #1 {
start:
%0 = alloca i64, align 1
%1 = alloca { ptr, i16 }, align 1
%2 = alloca i64, align 1
%buf = alloca [3 x i8], align 1
tail call void asm sideeffect alignstack "sei", "~{sreg},~{memory}"() #5, !srcloc !3
%3 = tail call i8 asm sideeffect alignstack "in ${0}, 0x3F", "=&r,~{sreg},~{memory}"() #5, !srcloc !4
tail call void asm sideeffect alignstack "cli", "~{sreg},~{memory}"() #5, !srcloc !5
store i64 12, ptr %2, align 1
call void asm sideeffect "", "r,~{memory}"(ptr nonnull %2) #5, !srcloc !6
%counter = load i64, ptr %2, align 1, !noundef !7
%dummy4.i = urem i64 %counter, 10
store i64 %dummy4.i, ptr %0, align 1, !noalias !8
call void asm sideeffect "", "r,~{memory}"(ptr nonnull %0) #5, !noalias !8, !srcloc !6
%4 = icmp ult i64 %counter, 10
br i1 %4, label %main.fmt_num.exit, label %bb6.i
bb2.i:
%5 = icmp ult i32 %i.05.i, 2147483647
%spec.select = select i1 %5, i16 0, i16 3
%spec.select2 = select i1 %5, ptr @buf, ptr %buf
br label %main.fmt_num.exit
bb6.i:
%n.06.i = phi i64 [ %6, %bb6.i ], [ %counter, %start ]
%i.05.i = phi i32 [ %7, %bb6.i ], [ 0, %start ]
%6 = udiv i64 %n.06.i, 10
%7 = add i32 %i.05.i, 1
%dummy.i = urem i64 %6, 10
store i64 %dummy.i, ptr %0, align 1, !noalias !8
call void asm sideeffect "", "r,~{memory}"(ptr nonnull %0) #5, !noalias !8, !srcloc !6
%8 = icmp ult i64 %n.06.i, 100
br i1 %8, label %bb2.i, label %bb6.i
main.fmt_num.exit:
%9 = phi i16 [ 3, %start ], [ %spec.select, %bb2.i ]
%10 = phi ptr [ %buf, %start ], [ %spec.select2, %bb2.i ]
store ptr %10, ptr %1, align 1
%11 = getelementptr inbounds i8, ptr %1, i16 2
store i16 %9, ptr %11, align 1
call void asm sideeffect "", "r,~{memory}"(ptr nonnull %1) #5, !srcloc !6
%12 = icmp sgt i8 %3, -1
br i1 %12, label %bb3, label %bb2
bb3:
call fastcc addrspace(1) void @report_irq_disabled() #5
br label %bb5.preheader
bb2:
call fastcc addrspace(1) void @report_irq_enabled() #5
br label %bb5.preheader
bb5.preheader:
br label %bb5
bb5:
br label %bb5
}
declare noundef zeroext i1 @uart_write(ptr noalias noundef nonnull align 1, i8 noundef) unnamed_addr addrspace(1) #2
declare fastcc void @report_irq_enabled() unnamed_addr addrspace(1) #0;
declare fastcc void @report_irq_disabled() unnamed_addr addrspace(1) #0;
attributes #0 = { noinline nounwind "target-cpu"="atmega328p" }
attributes #1 = { noreturn nounwind "target-cpu"="atmega328p" }
attributes #2 = { nounwind "target-cpu"="atmega328p" }
attributes #3 = { mustprogress nocallback nofree nounwind willreturn memory(argmem: write) }
attributes #4 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
attributes #5 = { nounwind }
attributes #6 = { noreturn nounwind }
!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}
!0 = !{i32 8, !"PIC Level", i32 2}
!1 = !{i32 7, !"PIE Level", i32 2}
!2 = !{!"rustc version 1.78.0-nightly (ee9c7c940 2024-02-14)"}
!3 = !{i32 733181}
!4 = !{i32 732071}
!5 = !{i32 731113}
!6 = !{i32 189156}
!7 = !{}
!8 = !{!9}
!9 = distinct !{!9, !10, !"main.fmt_num: %buf.0"}
!10 = distinct !{!10, !"main.fmt_num"}
Now, the most important bit is this assignment:
%3 = tail call addrspace(0) i8 asm sideeffect alignstack "in ${0}, 0x3F", "=&r,~{sreg},~{memory}"() #5, !srcloc !4
... which doesn't get used up until the end of the program, when it's being tested on:
%12 = icmp sgt i8 %3, -1
br i1 %12, label %bb3, label %bb2
LLVM (understandably) decides to spill the result of that in ..., 0x3f
into memory:
in r24, 63
std Y+3, r24
... to later load back:
ldd r22, Y+3
... and - even later - test on:
.LBB0_18:
/* ... */
tst r22
brmi .LBB0_20
The direct issue is that this ldd r22, Y+3
instruction is executed only in one code path (out of two) that leads to .LBB0_18
!
(that is, in the generated assembly, .LBB0_18
can be reached from two blocks, but ldd
happens in only one of them)
Code seems to be compiled correctly with -O0
and -O1 -regalloc=basic
- the later in particular keeps ldd
and tst
together instead of two different basic blocks:
ldd r24, Y+1
tst r24
brmi .LBB0_19
Metadata
Metadata
Assignees
Type
Projects
Status