Skip to content

LLVM inlining a function produces worse code than inlining it manually #121604

@Kmeakin

Description

@Kmeakin

https://godbolt.org/z/jdzErPMn7

use std::ops::RangeTo;

pub fn get_range_to(s: &str, range: RangeTo<usize>) -> Option<&str> {
    if is_char_boundary(s, range.end) {
        unsafe { Some(s.get_unchecked(..range.end)) }
    } else {
        None
    }
}

fn is_char_boundary(s: &str, index: usize) -> bool {
    let bytes = s.as_bytes();
    let len = bytes.len();

    if index == 0 {
        return true;
    }

    if index >= len {
        index == len
    } else {
        is_utf8_start_byte(bytes[index])
    }
}

fn is_utf8_start_byte(b: u8) -> bool {
    // This is bit magic equivalent to: b < 128 || b >= 192
    (b as i8) >= -0x40
}

This function from the Rust standard library (https://github.com/rust-lang/rust/blob/3f43b1a636738f41c48df073c5bcb97a97bf8459/library/core/src/str/traits.rs#L430) produces unnecessarily branchy code:

; AArch64
example::get_range_to::hecca4baa46cebf55:
        cbz     x2, .LBB0_4
        cmp     x2, x1
        b.hs    .LBB0_5
        ldrsb   w8, [x0, x2]
        cmn     w8, #64
        b.ge    .LBB0_4
.LBB0_3:
        mov     x0, xzr
.LBB0_4:
        mov     x1, x2
        ret
.LBB0_5:
        b.ne    .LBB0_3
        b       .LBB0_4


; x86_64
example::get_range_to::haafeb3d39ebfa3da:
        mov     rax, rdi
        test    rdx, rdx
        je      .LBB0_5
        cmp     rdx, rsi
        jae     .LBB0_2
        cmp     byte ptr [rax + rdx], -64
        jge     .LBB0_5
.LBB0_4:
        xor     eax, eax
.LBB0_5:
        ret
.LBB0_2:
        jne     .LBB0_4
        jmp     .LBB0_5

By inlining is_char_boundary manually, we can get much nicer code with fewer branches:

pub fn get_range_to_inlined(s: &str, range: RangeTo<usize>) -> Option<&str> {
    let end = range.end;
    let bytes = s.as_bytes();
    let len = bytes.len();

    if end == 0 {
        unsafe { return Some(s.get_unchecked(..end)) }
    }

    if end >= len {
        if end == len {
            unsafe { return Some(s.get_unchecked(..end)) }
        }
        return None;
    }

    let last = bytes[end];
    if !is_utf8_start_byte(last) {
        return None;
    }

    unsafe { Some(s.get_unchecked(..end)) }
}
; AArch64
example::get_range_to_inlined::hef86872e440018c7:
        cbz     x2, .LBB1_4
        cmp     x2, x1
        b.hs    .LBB1_3
        ldrsb   w8, [x0, x2]
        mov     x1, x2
        cmn     w8, #65
        csel    x0, x0, xzr, gt
        ret
.LBB1_3:
        csel    x0, x0, xzr, eq
.LBB1_4:
        mov     x1, x2
        ret

; x86_64
example::get_range_to_inlined::hf2fa833068e78843:
        mov     rax, rdi
        test    rdx, rdx
        je      .LBB1_3
        xor     ecx, ecx
        cmp     rdx, rsi
        jae     .LBB1_2
        cmp     byte ptr [rax + rdx], -64
        cmovl   rax, rcx
        ret
.LBB1_2:
        cmp     rdx, rsi
        cmovne  rax, rcx
.LBB1_3:
        ret

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions