Closed
Description
I found a multi-language benchmark online (https://gist.github.com/1170424) whose Rust version was outdated. I decided to update it.
Here's the original code:
struct Vec2 {
x: f32,
y: f32,
}
fn lerp(a: f32, b: f32, v: f32) -> f32 {
a * (1f32 - v) + b * v
}
fn smooth(v: f32) -> f32 {
v * v * (3f32 - 2f32 * v)
}
fn random_gradient(r: rand::Rng) -> Vec2 {
let v = r.gen_float() * float::consts::pi * 2.0;
Vec2{
x: float::cos(v) as f32,
y: float::sin(v) as f32,
}
}
fn gradient(orig: Vec2, grad: Vec2, p: Vec2) -> f32 {
let sp = Vec2{x: p.x - orig.x, y: p.y - orig.y};
grad.x * sp.x + grad.y + sp.y
}
struct Noise2DContext {
rgradients: ~[Vec2],
permutations: ~[int],
}
fn Noise2DContext() -> ~Noise2DContext {
let r = rand::Rng();
let rgradients = do vec::from_fn(256) |_i| { random_gradient(r) };
let mut permutations = do vec::from_fn(256) |i| { i as int };
r.shuffle_mut(permutations);
~Noise2DContext{
rgradients: move rgradients,
permutations: move permutations,
}
}
impl Noise2DContext {
fn get_gradient(x: int, y: int) -> Vec2 {
let idx = self.permutations[x & 255] + self.permutations[y & 255];
self.rgradients[idx & 255]
}
fn get_gradients(gradients: &[mut Vec2 * 4], origins: &[mut Vec2 * 4], x: f32, y: f32) {
let x0f = float::floor(x as libc::c_double) as f32;
let y0f = float::floor(y as libc::c_double) as f32;
let x0 = x0f as int;
let y0 = y0f as int;
let x1 = x0 + 1;
let y1 = y0 + 1;
gradients[0] = self.get_gradient(x0, y0);
gradients[1] = self.get_gradient(x1, y0);
gradients[2] = self.get_gradient(x0, y1);
gradients[3] = self.get_gradient(x1, y1);
origins[0] = Vec2{x: x0f + 0f32, y: y0f + 0f32};
origins[1] = Vec2{x: x0f + 1f32, y: y0f + 0f32};
origins[2] = Vec2{x: x0f + 0f32, y: y0f + 1f32};
origins[3] = Vec2{x: x0f + 1f32, y: y0f + 1f32};
}
fn get(x: f32, y: f32) -> f32 {
let p = Vec2{x: x, y: y};
let gradients: [mut Vec2 * 4] = [mut
Vec2{x:0f32, y:0f32},
Vec2{x:0f32, y:0f32},
Vec2{x:0f32, y:0f32},
Vec2{x:0f32, y:0f32},
];
let origins: [mut Vec2 * 4] = [mut
Vec2{x:0f32, y:0f32},
Vec2{x:0f32, y:0f32},
Vec2{x:0f32, y:0f32},
Vec2{x:0f32, y:0f32},
];
self.get_gradients(&gradients, &origins, x, y);
let v0 = gradient(origins[0], gradients[0], p);
let v1 = gradient(origins[1], gradients[1], p);
let v2 = gradient(origins[2], gradients[2], p);
let v3 = gradient(origins[3], gradients[3], p);
let fx = smooth(x - origins[0].x);
let vx0 = lerp(v0, v1, fx);
let vx1 = lerp(v2, v3, fx);
let fy = smooth(y - origins[0].y);
lerp(vx0, vx1, fy)
}
}
fn main() {
let symbols = [" ", "░", "▒", "▓", "█", "█"];
let pixels = vec::to_mut(vec::from_elem(256*256, 0f32));
let n2d = Noise2DContext();
for int::range(0, 100) |_i| {
for int::range(0, 256) |y| {
for int::range(0, 256) |x| {
let v = n2d.get(
x as f32 * 0.1f32,
y as f32 * 0.1f32
) * 0.5f32 + 0.5f32;
pixels[y*256+x] = v;
};
};
};
for int::range(0, 256) |y| {
for int::range(0, 256) |x| {
io::print(symbols[pixels[y*256+x] / 0.2f32 as int]);
}
io::println("");
}
}
I noticed that updating this code to use explicit self causes a noticeable perf hit, even though nmatsakis assures me that the semantics should be the same. For posterity, here's the updated version (the only difference is that the three methods are using explicit &self
parameters):
struct Vec2 {
x: f32,
y: f32,
}
fn lerp(a: f32, b: f32, v: f32) -> f32 {
a * (1f32 - v) + b * v
}
fn smooth(v: f32) -> f32 {
v * v * (3f32 - 2f32 * v)
}
fn random_gradient(r: rand::Rng) -> Vec2 {
let v = r.gen_float() * float::consts::pi * 2.0;
Vec2{
x: float::cos(v) as f32,
y: float::sin(v) as f32,
}
}
fn gradient(orig: Vec2, grad: Vec2, p: Vec2) -> f32 {
let sp = Vec2{x: p.x - orig.x, y: p.y - orig.y};
grad.x * sp.x + grad.y + sp.y
}
struct Noise2DContext {
rgradients: ~[Vec2],
permutations: ~[int],
}
fn Noise2DContext() -> ~Noise2DContext {
let r = rand::Rng();
let rgradients = do vec::from_fn(256) |_i| { random_gradient(r) };
let mut permutations = do vec::from_fn(256) |i| { i as int };
r.shuffle_mut(permutations);
~Noise2DContext{
rgradients: move rgradients,
permutations: move permutations,
}
}
impl Noise2DContext {
fn get_gradient(&self, x: int, y: int) -> Vec2 {
let idx = self.permutations[x & 255] + self.permutations[y & 255];
self.rgradients[idx & 255]
}
fn get_gradients(&self, gradients: &[mut Vec2 * 4], origins: &[mut Vec2 * 4], x: f32, y: f32) {
let x0f = float::floor(x as libc::c_double) as f32;
let y0f = float::floor(y as libc::c_double) as f32;
let x0 = x0f as int;
let y0 = y0f as int;
let x1 = x0 + 1;
let y1 = y0 + 1;
gradients[0] = self.get_gradient(x0, y0);
gradients[1] = self.get_gradient(x1, y0);
gradients[2] = self.get_gradient(x0, y1);
gradients[3] = self.get_gradient(x1, y1);
origins[0] = Vec2{x: x0f + 0f32, y: y0f + 0f32};
origins[1] = Vec2{x: x0f + 1f32, y: y0f + 0f32};
origins[2] = Vec2{x: x0f + 0f32, y: y0f + 1f32};
origins[3] = Vec2{x: x0f + 1f32, y: y0f + 1f32};
}
fn get(&self, x: f32, y: f32) -> f32 {
let p = Vec2{x: x, y: y};
let gradients: [mut Vec2 * 4] = [mut
Vec2{x:0f32, y:0f32},
Vec2{x:0f32, y:0f32},
Vec2{x:0f32, y:0f32},
Vec2{x:0f32, y:0f32},
];
let origins: [mut Vec2 * 4] = [mut
Vec2{x:0f32, y:0f32},
Vec2{x:0f32, y:0f32},
Vec2{x:0f32, y:0f32},
Vec2{x:0f32, y:0f32},
];
self.get_gradients(&gradients, &origins, x, y);
let v0 = gradient(origins[0], gradients[0], p);
let v1 = gradient(origins[1], gradients[1], p);
let v2 = gradient(origins[2], gradients[2], p);
let v3 = gradient(origins[3], gradients[3], p);
let fx = smooth(x - origins[0].x);
let vx0 = lerp(v0, v1, fx);
let vx1 = lerp(v2, v3, fx);
let fy = smooth(y - origins[0].y);
lerp(vx0, vx1, fy)
}
}
fn main() {
let symbols = [" ", "░", "▒", "▓", "█", "█"];
let pixels = vec::to_mut(vec::from_elem(256*256, 0f32));
let n2d = Noise2DContext();
for int::range(0, 100) |_i| {
for int::range(0, 256) |y| {
for int::range(0, 256) |x| {
let v = n2d.get(
x as f32 * 0.1f32,
y as f32 * 0.1f32
) * 0.5f32 + 0.5f32;
pixels[y*256+x] = v;
};
};
};
for int::range(0, 256) |y| {
for int::range(0, 256) |x| {
io::print(symbols[pixels[y*256+x] / 0.2f32 as int]);
}
io::println("");
}
}
Both versions were compiled with rustc --opt-level=3
and profiled as follows:
$ (perf stat -r 10 perlin-orig) 2> orig.txt
$ (perf stat -r 10 perlin-expself) 2> expself.txt
orig.txt:
Performance counter stats for 'perlin-orig' (10 runs):
1352.872451 task-clock-msecs # 0.983 CPUs ( +- 0.177% )
97 context-switches # 0.000 M/sec ( +- 5.580% )
1 CPU-migrations # 0.000 M/sec ( +- 9.091% )
738 page-faults # 0.001 M/sec ( +- 0.036% )
209379655582 cycles # 154766.738 M/sec ( +- 5.099% )
209379655582 instructions # 1.000 IPC ( +- 5.099% )
209379655582 branches # 154766.738 M/sec ( +- 5.099% )
209379655582 branch-misses # 100.000 % ( +- 5.099% )
209379655582 cache-references # 154766.738 M/sec ( +- 5.099% )
209379655582 cache-misses # 154766.738 M/sec ( +- 5.099% )
1.376399979 seconds time elapsed ( +- 0.213% )
expself.txt:
Performance counter stats for 'perlin-expself' (10 runs):
1389.736506 task-clock-msecs # 0.983 CPUs ( +- 0.096% )
103 context-switches # 0.000 M/sec ( +- 7.792% )
1 CPU-migrations # 0.000 M/sec ( +- 11.111% )
738 page-faults # 0.001 M/sec ( +- 0.049% )
224626789476 cycles # 161632.646 M/sec ( +- 6.994% )
224626789476 instructions # 1.000 IPC ( +- 6.994% )
224626789476 branches # 161632.646 M/sec ( +- 6.994% )
224626789476 branch-misses # 100.000 % ( +- 6.994% )
224626789476 cache-references # 161632.646 M/sec ( +- 6.994% )
224626789476 cache-misses # 161632.646 M/sec ( +- 6.994% )
1.413482586 seconds time elapsed ( +- 0.240% )
@nikomatsakis has a theory:
< bstrie> why is explicit self slower than implicit self :(
<@nmatsakis> bstrie: the trans for that is kind of bad... I wonder if it's
introducing extra indirections or something
<@nmatsakis> the trans of self in general needs to be reworked