Skip to content

Extra level of indirection with explicit self #4402

Closed
@bstrie

Description

@bstrie

I found a multi-language benchmark online (https://gist.github.com/1170424) whose Rust version was outdated. I decided to update it.

Here's the original code:

struct Vec2 {
    x: f32,
    y: f32,
}

fn lerp(a: f32, b: f32, v: f32) -> f32 {
    a * (1f32 - v) + b * v
}

fn smooth(v: f32) -> f32 {
    v * v * (3f32 - 2f32 * v)
}

fn random_gradient(r: rand::Rng) -> Vec2 {
    let v = r.gen_float() * float::consts::pi * 2.0;
    Vec2{
        x: float::cos(v) as f32,
        y: float::sin(v) as f32,
    }
}

fn gradient(orig: Vec2, grad: Vec2, p: Vec2) -> f32 {
    let sp = Vec2{x: p.x - orig.x, y: p.y - orig.y};
    grad.x * sp.x + grad.y + sp.y
}

struct Noise2DContext {
    rgradients: ~[Vec2],
    permutations: ~[int],
}

fn Noise2DContext() -> ~Noise2DContext {
    let r = rand::Rng();
    let rgradients = do vec::from_fn(256) |_i| { random_gradient(r) };
    let mut permutations = do vec::from_fn(256) |i| { i as int };
    r.shuffle_mut(permutations);

    ~Noise2DContext{
        rgradients: move rgradients,
        permutations: move permutations,
    }
}

impl Noise2DContext {
    fn get_gradient(x: int, y: int) -> Vec2 {
        let idx = self.permutations[x & 255] + self.permutations[y & 255];
        self.rgradients[idx & 255]
    }

    fn get_gradients(gradients: &[mut Vec2 * 4], origins: &[mut Vec2 * 4], x: f32, y: f32) {
        let x0f = float::floor(x as libc::c_double) as f32;
        let y0f = float::floor(y as libc::c_double) as f32;
        let x0 = x0f as int;
        let y0 = y0f as int;
        let x1 = x0 + 1;
        let y1 = y0 + 1;

        gradients[0] = self.get_gradient(x0, y0);
        gradients[1] = self.get_gradient(x1, y0);
        gradients[2] = self.get_gradient(x0, y1);
        gradients[3] = self.get_gradient(x1, y1);

        origins[0] = Vec2{x: x0f + 0f32, y: y0f + 0f32};
        origins[1] = Vec2{x: x0f + 1f32, y: y0f + 0f32};
        origins[2] = Vec2{x: x0f + 0f32, y: y0f + 1f32};
        origins[3] = Vec2{x: x0f + 1f32, y: y0f + 1f32};
    }

    fn get(x: f32, y: f32) -> f32 {
        let p = Vec2{x: x, y: y};
        let gradients: [mut Vec2 * 4] = [mut
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
        ];
        let origins: [mut Vec2 * 4] = [mut
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
        ];
        self.get_gradients(&gradients, &origins, x, y);
        let v0 = gradient(origins[0], gradients[0], p);
        let v1 = gradient(origins[1], gradients[1], p);
        let v2 = gradient(origins[2], gradients[2], p);
        let v3 = gradient(origins[3], gradients[3], p);
        let fx = smooth(x - origins[0].x);
        let vx0 = lerp(v0, v1, fx);
        let vx1 = lerp(v2, v3, fx);
        let fy = smooth(y - origins[0].y);
        lerp(vx0, vx1, fy)
    }
}

fn main() {
    let symbols = [" ", "░", "▒", "▓", "█", "█"];
    let pixels = vec::to_mut(vec::from_elem(256*256, 0f32));
    let n2d = Noise2DContext();
    for int::range(0, 100) |_i| {
        for int::range(0, 256) |y| {
            for int::range(0, 256) |x| {
                let v = n2d.get(
                    x as f32 * 0.1f32,
                    y as f32 * 0.1f32
                ) * 0.5f32 + 0.5f32;
                pixels[y*256+x] = v;
            };
        };
    };

    for int::range(0, 256) |y| {
        for int::range(0, 256) |x| {
            io::print(symbols[pixels[y*256+x] / 0.2f32 as int]);
        }
        io::println("");
    }
}

I noticed that updating this code to use explicit self causes a noticeable perf hit, even though nmatsakis assures me that the semantics should be the same. For posterity, here's the updated version (the only difference is that the three methods are using explicit &self parameters):

struct Vec2 {
    x: f32,
    y: f32,
}

fn lerp(a: f32, b: f32, v: f32) -> f32 {
    a * (1f32 - v) + b * v
}

fn smooth(v: f32) -> f32 {
    v * v * (3f32 - 2f32 * v)
}

fn random_gradient(r: rand::Rng) -> Vec2 {
    let v = r.gen_float() * float::consts::pi * 2.0;
    Vec2{
        x: float::cos(v) as f32,
        y: float::sin(v) as f32,
    }
}

fn gradient(orig: Vec2, grad: Vec2, p: Vec2) -> f32 {
    let sp = Vec2{x: p.x - orig.x, y: p.y - orig.y};
    grad.x * sp.x + grad.y + sp.y
}

struct Noise2DContext {
    rgradients: ~[Vec2],
    permutations: ~[int],
}

fn Noise2DContext() -> ~Noise2DContext {
    let r = rand::Rng();
    let rgradients = do vec::from_fn(256) |_i| { random_gradient(r) };
    let mut permutations = do vec::from_fn(256) |i| { i as int };
    r.shuffle_mut(permutations);

    ~Noise2DContext{
        rgradients: move rgradients,
        permutations: move permutations,
    }
}

impl Noise2DContext {
    fn get_gradient(&self, x: int, y: int) -> Vec2 {
        let idx = self.permutations[x & 255] + self.permutations[y & 255];
        self.rgradients[idx & 255]
    }

    fn get_gradients(&self, gradients: &[mut Vec2 * 4], origins: &[mut Vec2 * 4], x: f32, y: f32) {
        let x0f = float::floor(x as libc::c_double) as f32;
        let y0f = float::floor(y as libc::c_double) as f32;
        let x0 = x0f as int;
        let y0 = y0f as int;
        let x1 = x0 + 1;
        let y1 = y0 + 1;

        gradients[0] = self.get_gradient(x0, y0);
        gradients[1] = self.get_gradient(x1, y0);
        gradients[2] = self.get_gradient(x0, y1);
        gradients[3] = self.get_gradient(x1, y1);

        origins[0] = Vec2{x: x0f + 0f32, y: y0f + 0f32};
        origins[1] = Vec2{x: x0f + 1f32, y: y0f + 0f32};
        origins[2] = Vec2{x: x0f + 0f32, y: y0f + 1f32};
        origins[3] = Vec2{x: x0f + 1f32, y: y0f + 1f32};
    }

    fn get(&self, x: f32, y: f32) -> f32 {
        let p = Vec2{x: x, y: y};
        let gradients: [mut Vec2 * 4] = [mut
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
        ];
        let origins: [mut Vec2 * 4] = [mut
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
            Vec2{x:0f32, y:0f32},
        ];
        self.get_gradients(&gradients, &origins, x, y);
        let v0 = gradient(origins[0], gradients[0], p);
        let v1 = gradient(origins[1], gradients[1], p);
        let v2 = gradient(origins[2], gradients[2], p);
        let v3 = gradient(origins[3], gradients[3], p);
        let fx = smooth(x - origins[0].x);
        let vx0 = lerp(v0, v1, fx);
        let vx1 = lerp(v2, v3, fx);
        let fy = smooth(y - origins[0].y);
        lerp(vx0, vx1, fy)
    }
}

fn main() {
    let symbols = [" ", "░", "▒", "▓", "█", "█"];
    let pixels = vec::to_mut(vec::from_elem(256*256, 0f32));
    let n2d = Noise2DContext();
    for int::range(0, 100) |_i| {
        for int::range(0, 256) |y| {
            for int::range(0, 256) |x| {
                let v = n2d.get(
                    x as f32 * 0.1f32,
                    y as f32 * 0.1f32
                ) * 0.5f32 + 0.5f32;
                pixels[y*256+x] = v;
            };
        };
    };

    for int::range(0, 256) |y| {
        for int::range(0, 256) |x| {
            io::print(symbols[pixels[y*256+x] / 0.2f32 as int]);
        }
        io::println("");
    }
}

Both versions were compiled with rustc --opt-level=3 and profiled as follows:

$ (perf stat -r 10 perlin-orig) 2> orig.txt
$ (perf stat -r 10 perlin-expself) 2> expself.txt

orig.txt:

 Performance counter stats for 'perlin-orig' (10 runs):

    1352.872451  task-clock-msecs         #      0.983 CPUs    ( +-   0.177% )
             97  context-switches         #      0.000 M/sec   ( +-   5.580% )
              1  CPU-migrations           #      0.000 M/sec   ( +-   9.091% )
            738  page-faults              #      0.001 M/sec   ( +-   0.036% )
   209379655582  cycles                   # 154766.738 M/sec   ( +-   5.099% )
   209379655582  instructions             #      1.000 IPC     ( +-   5.099% )
   209379655582  branches                 # 154766.738 M/sec   ( +-   5.099% )
   209379655582  branch-misses            #    100.000 %       ( +-   5.099% )
   209379655582  cache-references         # 154766.738 M/sec   ( +-   5.099% )
   209379655582  cache-misses             # 154766.738 M/sec   ( +-   5.099% )

    1.376399979  seconds time elapsed   ( +-   0.213% )

expself.txt:

 Performance counter stats for 'perlin-expself' (10 runs):

    1389.736506  task-clock-msecs         #      0.983 CPUs    ( +-   0.096% )
            103  context-switches         #      0.000 M/sec   ( +-   7.792% )
              1  CPU-migrations           #      0.000 M/sec   ( +-  11.111% )
            738  page-faults              #      0.001 M/sec   ( +-   0.049% )
   224626789476  cycles                   # 161632.646 M/sec   ( +-   6.994% )
   224626789476  instructions             #      1.000 IPC     ( +-   6.994% )
   224626789476  branches                 # 161632.646 M/sec   ( +-   6.994% )
   224626789476  branch-misses            #    100.000 %       ( +-   6.994% )
   224626789476  cache-references         # 161632.646 M/sec   ( +-   6.994% )
   224626789476  cache-misses             # 161632.646 M/sec   ( +-   6.994% )

    1.413482586  seconds time elapsed   ( +-   0.240% )

@nikomatsakis has a theory:

< bstrie> why is explicit self slower than implicit self :(
<@nmatsakis> bstrie: the trans for that is kind of bad... I wonder if it's
             introducing extra indirections or something
<@nmatsakis> the trans of self in general needs to be reworked

Metadata

Metadata

Assignees

No one assigned

    Labels

    A-codegenArea: Code generationA-trait-systemArea: Trait systemI-slowIssue: Problems and improvements with respect to performance of generated code.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions