Closed
Description
Consider the following constructor:
struct Foo([u8; 512]);
impl Foo {
fn new() -> Foo {
Foo([42u8; 512])
}
}
(stupid newtype with large and stupid content to trigger a recognizable memset
call)
Now, let's say we use the constructor in some way:
pub fn foo() -> Option<Foo> {
Some(Foo::new())
}
Typical rust would essentially have lots of constructs like this, in more elaborated forms.
The code above compiles to the following straightforward code:
push rbx
mov rbx, rdi // rdi is the pointer we got from the caller to store the (large) Option result.
lea rdi, [rbx + 1] // \
mov esi, 42 // | memset(ptr + 1, 42, 512)
mov edx, 512 // |
call memset@PLT // /
mov byte ptr [rbx], 1 // set the Option discriminant for Some.
mov rax, rbx // return the pointer where the Option is.
pop rbx
ret
Now, if for some reason the constructor is not inlined (and that can happen), here is what this becomes:
push r14
push rbx
sub rsp, 520 // prepare space on the stack
mov rbx, rdi // rdi is the pointer we got from the caller to store the (large) Option result.
lea r14, [rsp + 8] // \
mov rdi, r14 // | Foo::new(&buffer_on_the_stack)
call Foo::new // / meaning Foo::new will call memset(&buffer_on_the_stack, 42, 512)
lea rdi, [rbx + 1] // \
mov edx, 512 // | memcpy(ptr + 1, &buffer_on_the_stack, 512)
mov rsi, r14 // |
call memcpy@PLT // /
mov byte ptr [rbx], 1 // set the Option discriminant for Some.
mov rax, rbx // return the pointer where the Option is.
add rsp, 520
pop rbx
pop r14
ret
I don't see a reason why this couldn't be the following instead:
push rbx
mov rbx, rdi // rdi is the pointer we got from the caller to store the (large) Option result.
lea rdi, [rbx + 1] // \
call Foo::new // / Foo::new(ptr + 1)
mov byte ptr [rbx], 1 // set the Option discriminant for Some.
mov rax, rbx // return the pointer where the Option is.
pop rbx
ret
avoiding a useless copy that inlining avoided.