Open
Description
https://godbolt.org/z/jdzErPMn7
use std::ops::RangeTo;
pub fn get_range_to(s: &str, range: RangeTo<usize>) -> Option<&str> {
if is_char_boundary(s, range.end) {
unsafe { Some(s.get_unchecked(..range.end)) }
} else {
None
}
}
fn is_char_boundary(s: &str, index: usize) -> bool {
let bytes = s.as_bytes();
let len = bytes.len();
if index == 0 {
return true;
}
if index >= len {
index == len
} else {
is_utf8_start_byte(bytes[index])
}
}
fn is_utf8_start_byte(b: u8) -> bool {
// This is bit magic equivalent to: b < 128 || b >= 192
(b as i8) >= -0x40
}
This function from the Rust standard library (https://github.com/rust-lang/rust/blob/3f43b1a636738f41c48df073c5bcb97a97bf8459/library/core/src/str/traits.rs#L430) produces unnecessarily branchy code:
; AArch64
example::get_range_to::hecca4baa46cebf55:
cbz x2, .LBB0_4
cmp x2, x1
b.hs .LBB0_5
ldrsb w8, [x0, x2]
cmn w8, #64
b.ge .LBB0_4
.LBB0_3:
mov x0, xzr
.LBB0_4:
mov x1, x2
ret
.LBB0_5:
b.ne .LBB0_3
b .LBB0_4
; x86_64
example::get_range_to::haafeb3d39ebfa3da:
mov rax, rdi
test rdx, rdx
je .LBB0_5
cmp rdx, rsi
jae .LBB0_2
cmp byte ptr [rax + rdx], -64
jge .LBB0_5
.LBB0_4:
xor eax, eax
.LBB0_5:
ret
.LBB0_2:
jne .LBB0_4
jmp .LBB0_5
By inlining is_char_boundary
manually, we can get much nicer code with fewer branches:
pub fn get_range_to_inlined(s: &str, range: RangeTo<usize>) -> Option<&str> {
let end = range.end;
let bytes = s.as_bytes();
let len = bytes.len();
if end == 0 {
unsafe { return Some(s.get_unchecked(..end)) }
}
if end >= len {
if end == len {
unsafe { return Some(s.get_unchecked(..end)) }
}
return None;
}
let last = bytes[end];
if !is_utf8_start_byte(last) {
return None;
}
unsafe { Some(s.get_unchecked(..end)) }
}
; AArch64
example::get_range_to_inlined::hef86872e440018c7:
cbz x2, .LBB1_4
cmp x2, x1
b.hs .LBB1_3
ldrsb w8, [x0, x2]
mov x1, x2
cmn w8, #65
csel x0, x0, xzr, gt
ret
.LBB1_3:
csel x0, x0, xzr, eq
.LBB1_4:
mov x1, x2
ret
; x86_64
example::get_range_to_inlined::hf2fa833068e78843:
mov rax, rdi
test rdx, rdx
je .LBB1_3
xor ecx, ecx
cmp rdx, rsi
jae .LBB1_2
cmp byte ptr [rax + rdx], -64
cmovl rax, rcx
ret
.LBB1_2:
cmp rdx, rsi
cmovne rax, rcx
.LBB1_3:
ret