Skip to content

[AVR/RegAllocGreedy] Allocator clobbers a live register #81911

Closed
@Patryk27

Description

@Patryk27

Hi,

I've got some code which works on-O0 and -O1 -regalloc=basic, but gets miscompiled with -O1 (and even -O1 -opt-bisect-limit=0!) - the issue downstream is:

Rahix/avr-hal#505 (comment)

... which I've been able to somewhat minimize, down to:

Show LLVM IR
@buf = private unnamed_addr constant <{}> zeroinitializer, align 1

define internal fastcc void @main() unnamed_addr addrspace(1) #1 {
start:
  %0 = alloca i64, align 1
  %1 = alloca { ptr, i16 }, align 1
  %2 = alloca i64, align 1
  %buf = alloca [3 x i8], align 1
  tail call void asm sideeffect alignstack "sei", "~{sreg},~{memory}"() #5, !srcloc !3
  %3 = tail call i8 asm sideeffect alignstack "in ${0}, 0x3F", "=&r,~{sreg},~{memory}"() #5, !srcloc !4
  tail call void asm sideeffect alignstack "cli", "~{sreg},~{memory}"() #5, !srcloc !5
  store i64 12, ptr %2, align 1
  call void asm sideeffect "", "r,~{memory}"(ptr nonnull %2) #5, !srcloc !6
  %counter = load i64, ptr %2, align 1, !noundef !7
  %dummy4.i = urem i64 %counter, 10
  store i64 %dummy4.i, ptr %0, align 1, !noalias !8
  call void asm sideeffect "", "r,~{memory}"(ptr nonnull %0) #5, !noalias !8, !srcloc !6
  %4 = icmp ult i64 %counter, 10
  br i1 %4, label %main.fmt_num.exit, label %bb6.i

bb2.i:
  %5 = icmp ult i32 %i.05.i, 2147483647
  %spec.select = select i1 %5, i16 0, i16 3
  %spec.select2 = select i1 %5, ptr @buf, ptr %buf
  br label %main.fmt_num.exit

bb6.i:
  %n.06.i = phi i64 [ %6, %bb6.i ], [ %counter, %start ]
  %i.05.i = phi i32 [ %7, %bb6.i ], [ 0, %start ]
  %6 = udiv i64 %n.06.i, 10
  %7 = add i32 %i.05.i, 1
  %dummy.i = urem i64 %6, 10
  store i64 %dummy.i, ptr %0, align 1, !noalias !8
  call void asm sideeffect "", "r,~{memory}"(ptr nonnull %0) #5, !noalias !8, !srcloc !6
  %8 = icmp ult i64 %n.06.i, 100
  br i1 %8, label %bb2.i, label %bb6.i

main.fmt_num.exit:
  %9 = phi i16 [ 3, %start ], [ %spec.select, %bb2.i ]
  %10 = phi ptr [ %buf, %start ], [ %spec.select2, %bb2.i ]
  store ptr %10, ptr %1, align 1
  %11 = getelementptr inbounds i8, ptr %1, i16 2
  store i16 %9, ptr %11, align 1
  call void asm sideeffect "", "r,~{memory}"(ptr nonnull %1) #5, !srcloc !6
  %12 = icmp sgt i8 %3, -1
  br i1 %12, label %bb3, label %bb2

bb3:
  call fastcc addrspace(1) void @report_irq_disabled() #5
  br label %bb5.preheader

bb2:
  call fastcc addrspace(1) void @report_irq_enabled() #5
  br label %bb5.preheader

bb5.preheader:
  br label %bb5

bb5:
  br label %bb5
}

declare noundef zeroext i1 @uart_write(ptr noalias noundef nonnull align 1, i8 noundef) unnamed_addr addrspace(1) #2
declare fastcc void @report_irq_enabled() unnamed_addr addrspace(1) #0;
declare fastcc void @report_irq_disabled() unnamed_addr addrspace(1) #0;

attributes #0 = { noinline nounwind "target-cpu"="atmega328p" }
attributes #1 = { noreturn nounwind "target-cpu"="atmega328p" }
attributes #2 = { nounwind "target-cpu"="atmega328p" }
attributes #3 = { mustprogress nocallback nofree nounwind willreturn memory(argmem: write) }
attributes #4 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
attributes #5 = { nounwind }
attributes #6 = { noreturn nounwind }

!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}

!0 = !{i32 8, !"PIC Level", i32 2}
!1 = !{i32 7, !"PIE Level", i32 2}
!2 = !{!"rustc version 1.78.0-nightly (ee9c7c940 2024-02-14)"}
!3 = !{i32 733181}
!4 = !{i32 732071}
!5 = !{i32 731113}
!6 = !{i32 189156}
!7 = !{}
!8 = !{!9}
!9 = distinct !{!9, !10, !"main.fmt_num: %buf.0"}
!10 = distinct !{!10, !"main.fmt_num"}

Now, the most important bit is this assignment:

%3 = tail call addrspace(0) i8 asm sideeffect alignstack "in ${0}, 0x3F", "=&r,~{sreg},~{memory}"() #5, !srcloc !4

... which doesn't get used up until the end of the program, when it's being tested on:

  %12 = icmp sgt i8 %3, -1
  br i1 %12, label %bb3, label %bb2

LLVM (understandably) decides to spill the result of that in ..., 0x3f into memory:

	in	r24, 63
	std	Y+3, r24

... to later load back:

	ldd	r22, Y+3

... and - even later - test on:

.LBB0_18:
	/* ... */        
	tst	r22
	brmi	.LBB0_20

The direct issue is that this ldd r22, Y+3 instruction is executed only in one code path (out of two) that leads to .LBB0_18!

(that is, in the generated assembly, .LBB0_18 can be reached from two blocks, but ldd happens in only one of them)

Code seems to be compiled correctly with -O0 and -O1 -regalloc=basic - the later in particular keeps ldd and tst together instead of two different basic blocks:

	ldd	r24, Y+1
	tst	r24
	brmi	.LBB0_19

Metadata

Metadata

Assignees

Type

No type

Projects

Status

Done

Relationships

None yet

Development

No branches or pull requests

Issue actions