Boost logo

Ublas :

From: Andreas Dolfen (andy_at_[hidden])
Date: 2006-04-25 15:46:13


Hi,

> The timing resolution is very low with most times around 0.01s. You can run
> bench1 with an argument to increase the number of times everything is
> executed.
>
Many thanks for this tip. I reran the benchmark with argument 100.
1_33_1 is still slower than 1_30_2. Any further ideas?

Thanks again
Andreas

DOUBLE
peak
plus
elapsed: 0.36 s, 264.91 Mflops
multiplies
elapsed: 0 s, INF Mflops
DOUBLE, 3
bench_1
inner_prod
C array
elapsed: 1.06 s, 449.846 Mflops
c_vector
elapsed: 4.63 s, 102.989 Mflops
vector<unbounded_array>
elapsed: 2.28 s, 209.139 Mflops
vector + vector
C array
elapsed: 0 s, INF Mflops
c_vector safe
elapsed: 11.17 s, 51.2269 Mflops
c_vector fast
elapsed: 6.02 s, 95.0506 Mflops
vector<unbounded_array> safe
elapsed: 36.71 s, 15.5872 Mflops
vector<unbounded_array> fast
elapsed: 3.89 s, 147.096 Mflops
bench_2
outer_prod
C array
elapsed: 0 s, INF Mflops
c_matrix, c_vector safe
elapsed: 7.88 s, 65.3533 Mflops
c_matrix, c_vector fast
elapsed: 5.15 s, 99.9969 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 13.95 s, 36.9164 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 3.78 s, 136.239 Mflops
prod (matrix, vector)
C array
elapsed: 0.03 s, 14305.1 Mflops
c_matrix, c_vector safe
elapsed: 8.75 s, 49.0461 Mflops
c_matrix, c_vector fast
elapsed: 3.53 s, 121.573 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 14.78 s, 29.0361 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 3.09 s, 138.885 Mflops
matrix + matrix
C array
elapsed: 0 s, INF Mflops
c_matrix safe
elapsed: 6.28 s, 82.0038 Mflops
c_matrix fast
elapsed: 4.07 s, 126.532 Mflops
matrix<unbounded_array> safe
elapsed: 13.4 s, 38.4317 Mflops
matrix<unbounded_array> fast
elapsed: 3.47 s, 148.41 Mflops
bench_3
prod (matrix, matrix)
C array
elapsed: 0.05 s, 8583.07 Mflops
c_matrix safe
elapsed: 3.02 s, 142.104 Mflops
c_matrix fast
elapsed: 2.52 s, 170.299 Mflops
matrix<unbounded_array> safe
elapsed: 5.8 s, 73.992 Mflops
matrix<unbounded_array> fast
elapsed: 2.48 s, 173.046 Mflops
DOUBLE, 10
bench_1
inner_prod
C array
elapsed: 1.06 s, 512.825 Mflops
c_vector
elapsed: 3.46 s, 157.108 Mflops
vector<unbounded_array>
elapsed: 1.37 s, 396.784 Mflops
vector + vector
C array
elapsed: 0 s, INF Mflops
c_vector safe
elapsed: 5.97 s, 95.8467 Mflops
c_vector fast
elapsed: 2.98 s, 192.015 Mflops
vector<unbounded_array> safe
elapsed: 11.44 s, 50.0179 Mflops
vector<unbounded_array> fast
elapsed: 1.21 s, 472.896 Mflops
bench_2
outer_prod
C array
elapsed: 0 s, INF Mflops
c_matrix, c_vector safe
elapsed: 4.46 s, 128.297 Mflops
c_matrix, c_vector fast
elapsed: 2.48 s, 230.728 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 2.17 s, 263.689 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 0.91 s, 628.796 Mflops
prod (matrix, vector)
C array
elapsed: 0.27 s, 2013.31 Mflops
c_matrix, c_vector safe
elapsed: 2.15 s, 252.835 Mflops
c_matrix, c_vector fast
elapsed: 1.86 s, 292.255 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 2.65 s, 205.13 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 1.42 s, 382.813 Mflops
matrix + matrix
C array
elapsed: 0 s, INF Mflops
c_matrix safe
elapsed: 1.69 s, 338.583 Mflops
c_matrix fast
elapsed: 0.96 s, 596.046 Mflops
matrix<unbounded_array> safe
elapsed: 3.39 s, 168.792 Mflops
matrix<unbounded_array> fast
elapsed: 2.12 s, 269.908 Mflops
bench_3
prod (matrix, matrix)
C array
elapsed: 0.32 s, 1698.73 Mflops
c_matrix safe
elapsed: 1.02 s, 532.936 Mflops
c_matrix fast
elapsed: 0.94 s, 578.292 Mflops
matrix<unbounded_array> safe
elapsed: 1.41 s, 385.528 Mflops
matrix<unbounded_array> fast
elapsed: 1.3 s, 418.15 Mflops
DOUBLE, 30
bench_1
inner_prod
C array
elapsed: 1.06 s, 530.819 Mflops
c_vector
elapsed: 2.62 s, 214.759 Mflops
vector<unbounded_array>
elapsed: 1.13 s, 497.936 Mflops
vector + vector
C array
elapsed: 0 s, INF Mflops
c_vector safe
elapsed: 5.6 s, 102.179 Mflops
c_vector fast
elapsed: 2.69 s, 212.715 Mflops
vector<unbounded_array> safe
elapsed: 4.07 s, 140.591 Mflops
vector<unbounded_array> fast
elapsed: 0.96 s, 596.046 Mflops
bench_2
outer_prod
C array
elapsed: 0.01 s, 51498.4 Mflops
c_matrix, c_vector safe
elapsed: 2.93 s, 175.763 Mflops
c_matrix, c_vector fast
elapsed: 2.19 s, 235.153 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 0.97 s, 530.911 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 0.61 s, 844.236 Mflops
prod (matrix, vector)
C array
elapsed: 0.27 s, 1875.56 Mflops
c_matrix, c_vector safe
elapsed: 1.42 s, 356.62 Mflops
c_matrix, c_vector fast
elapsed: 1.54 s, 328.832 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 1.23 s, 411.708 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 1.11 s, 456.217 Mflops
matrix + matrix
C array
elapsed: 0 s, INF Mflops
c_matrix safe
elapsed: 1.36 s, 378.665 Mflops
c_matrix fast
elapsed: 0.65 s, 792.283 Mflops
matrix<unbounded_array> safe
elapsed: 2.1 s, 245.231 Mflops
matrix<unbounded_array> fast
elapsed: 1.71 s, 301.16 Mflops
bench_3
prod (matrix, matrix)
C array
elapsed: 0.28 s, 1808.58 Mflops
c_matrix safe
elapsed: 0.82 s, 617.562 Mflops
c_matrix fast
elapsed: 0.8 s, 633.001 Mflops
matrix<unbounded_array> safe
elapsed: 1.03 s, 491.652 Mflops
matrix<unbounded_array> fast
elapsed: 1.02 s, 496.472 Mflops
DOUBLE, 100
bench_1
inner_prod
C array
elapsed: 1.06 s, 537.117 Mflops
c_vector
elapsed: 2.41 s, 236.242 Mflops
vector<unbounded_array>
elapsed: 1.08 s, 527.17 Mflops
vector + vector
C array
elapsed: 0 s, INF Mflops
c_vector safe
elapsed: 4.51 s, 126.875 Mflops
c_vector fast
elapsed: 2.44 s, 234.51 Mflops
vector<unbounded_array> safe
elapsed: 1.92 s, 298.023 Mflops
vector<unbounded_array> fast
elapsed: 0.76 s, 752.901 Mflops
bench_2
outer_prod
C array
elapsed: 0 s, INF Mflops
c_matrix, c_vector safe
elapsed: 2.83 s, 202.192 Mflops
c_matrix, c_vector fast
elapsed: 2.4 s, 238.419 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 0.96 s, 596.046 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 0.65 s, 880.315 Mflops
prod (matrix, vector)
C array
elapsed: 1 s, 569.344 Mflops
c_matrix, c_vector safe
elapsed: 1.45 s, 392.651 Mflops
c_matrix, c_vector fast
elapsed: 1.34 s, 424.883 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 1.22 s, 466.675 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 1.19 s, 478.44 Mflops
matrix + matrix
C array
elapsed: 0 s, INF Mflops
c_matrix safe
elapsed: 1.46 s, 391.921 Mflops
c_matrix fast
elapsed: 0.72 s, 794.729 Mflops
matrix<unbounded_array> safe
elapsed: 2.12 s, 269.908 Mflops
matrix<unbounded_array> fast
elapsed: 1.8 s, 317.891 Mflops
bench_3
prod (matrix, matrix)
C array
elapsed: 1.02 s, 558.18 Mflops
c_matrix safe
elapsed: 1.05 s, 542.232 Mflops
c_matrix fast
elapsed: 1.06 s, 537.117 Mflops
matrix<unbounded_array> safe
elapsed: 1.18 s, 482.495 Mflops
matrix<unbounded_array> fast
elapsed: 1.13 s, 503.844 Mflops

DOUBLE
peak
plus
elapsed: 0.35 s, 272.478 Mflops
multiplies
elapsed: 0 s, INF Mflops
DOUBLE, 3
bench_1
inner_prod
C array
elapsed: 1.06 s, 449.846 Mflops
c_vector
elapsed: 7.82 s, 60.9766 Mflops
vector<unbounded_array>
elapsed: 5.32 s, 89.631 Mflops
vector + vector
C array
elapsed: 0 s, INF Mflops
c_vector safe
elapsed: 21.62 s, 26.4664 Mflops
c_vector fast
elapsed: 12.86 s, 44.4949 Mflops
vector<unbounded_array> safe
elapsed: 56.49 s, 10.1293 Mflops
vector<unbounded_array> fast
elapsed: 11.75 s, 48.6983 Mflops
bench_2
outer_prod
C array
elapsed: 0 s, INF Mflops
c_matrix, c_vector safe
elapsed: 9.5 s, 54.2089 Mflops
c_matrix, c_vector fast
elapsed: 7.16 s, 71.9252 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 21.49 s, 23.9639 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 6.89 s, 74.7437 Mflops
prod (matrix, vector)
C array
elapsed: 0.02 s, 21457.7 Mflops
c_matrix, c_vector safe
elapsed: 6.28 s, 68.3365 Mflops
c_matrix, c_vector fast
elapsed: 4.86 s, 88.3032 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 17.82 s, 24.0827 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 4.49 s, 95.5798 Mflops
matrix + matrix
C array
elapsed: 0 s, INF Mflops
c_matrix safe
elapsed: 10.35 s, 49.7569 Mflops
c_matrix fast
elapsed: 8.37 s, 61.5274 Mflops
matrix<unbounded_array> safe
elapsed: 28.13 s, 18.3073 Mflops
matrix<unbounded_array> fast
elapsed: 10.18 s, 50.5878 Mflops
bench_3
prod (matrix, matrix)
C array
elapsed: 0.05 s, 8583.07 Mflops
c_matrix safe
elapsed: 3.73 s, 115.055 Mflops
c_matrix fast
elapsed: 2.83 s, 151.644 Mflops
matrix<unbounded_array> safe
elapsed: 7.93 s, 54.1177 Mflops
matrix<unbounded_array> fast
elapsed: 2.93 s, 146.469 Mflops
DOUBLE, 10
bench_1
inner_prod
C array
elapsed: 1.06 s, 512.825 Mflops
c_vector
elapsed: 4.23 s, 128.509 Mflops
vector<unbounded_array>
elapsed: 2.2 s, 247.088 Mflops
vector + vector
C array
elapsed: 0 s, INF Mflops
c_vector safe
elapsed: 11.69 s, 48.9482 Mflops
c_vector fast
elapsed: 5.77 s, 99.1689 Mflops
vector<unbounded_array> safe
elapsed: 19.07 s, 30.0055 Mflops
vector<unbounded_array> fast
elapsed: 4.53 s, 126.314 Mflops
bench_2
outer_prod
C array
elapsed: 0 s, INF Mflops
c_matrix, c_vector safe
elapsed: 5.09 s, 112.417 Mflops
c_matrix, c_vector fast
elapsed: 3.18 s, 179.939 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 6.17 s, 92.7398 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 2.09 s, 273.782 Mflops
prod (matrix, vector)
C array
elapsed: 0.27 s, 2013.31 Mflops
c_matrix, c_vector safe
elapsed: 2.45 s, 221.875 Mflops
c_matrix, c_vector fast
elapsed: 2.45 s, 221.875 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 3.16 s, 172.024 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 1.63 s, 333.493 Mflops
matrix + matrix
C array
elapsed: 0 s, INF Mflops
c_matrix safe
elapsed: 3.45 s, 165.856 Mflops
c_matrix fast
elapsed: 1.9 s, 301.16 Mflops
matrix<unbounded_array> safe
elapsed: 7.2 s, 79.4729 Mflops
matrix<unbounded_array> fast
elapsed: 2.95 s, 193.968 Mflops
bench_3
prod (matrix, matrix)
C array
elapsed: 0.32 s, 1698.73 Mflops
c_matrix safe
elapsed: 1.03 s, 527.762 Mflops
c_matrix fast
elapsed: 0.94 s, 578.292 Mflops
matrix<unbounded_array> safe
elapsed: 1.72 s, 316.043 Mflops
matrix<unbounded_array> fast
elapsed: 1.31 s, 414.958 Mflops
DOUBLE, 30
bench_1
inner_prod
C array
elapsed: 1.06 s, 530.819 Mflops
c_vector
elapsed: 2.78 s, 202.399 Mflops
vector<unbounded_array>
elapsed: 1.44 s, 390.742 Mflops
vector + vector
C array
elapsed: 0 s, INF Mflops
c_vector safe
elapsed: 7.21 s, 79.3626 Mflops
c_vector fast
elapsed: 4.02 s, 142.339 Mflops
vector<unbounded_array> safe
elapsed: 8.17 s, 70.0373 Mflops
vector<unbounded_array> fast
elapsed: 2.21 s, 258.916 Mflops
bench_2
outer_prod
C array
elapsed: 0 s, INF Mflops
c_matrix, c_vector safe
elapsed: 3.31 s, 155.584 Mflops
c_matrix, c_vector fast
elapsed: 2.15 s, 239.528 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 3.98 s, 129.393 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 1.28 s, 402.331 Mflops
prod (matrix, vector)
C array
elapsed: 0.27 s, 1875.56 Mflops
c_matrix, c_vector safe
elapsed: 1.64 s, 308.781 Mflops
c_matrix, c_vector fast
elapsed: 1.29 s, 392.559 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 1.28 s, 395.626 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 1.1 s, 460.365 Mflops
matrix + matrix
C array
elapsed: 0 s, INF Mflops
c_matrix safe
elapsed: 1.82 s, 282.958 Mflops
c_matrix fast
elapsed: 1.05 s, 490.461 Mflops
matrix<unbounded_array> safe
elapsed: 4.53 s, 113.683 Mflops
matrix<unbounded_array> fast
elapsed: 1.83 s, 281.412 Mflops
bench_3
prod (matrix, matrix)
C array
elapsed: 0.27 s, 1875.56 Mflops
c_matrix safe
elapsed: 0.83 s, 610.122 Mflops
c_matrix fast
elapsed: 0.8 s, 633.001 Mflops
matrix<unbounded_array> safe
elapsed: 1.1 s, 460.365 Mflops
matrix<unbounded_array> fast
elapsed: 1.02 s, 496.472 Mflops
DOUBLE, 100
bench_1
inner_prod
C array
elapsed: 1.06 s, 537.117 Mflops
c_vector
elapsed: 2.66 s, 214.039 Mflops
vector<unbounded_array>
elapsed: 1.17 s, 486.618 Mflops
vector + vector
C array
elapsed: 0 s, INF Mflops
c_vector safe
elapsed: 6.37 s, 89.828 Mflops
c_vector fast
elapsed: 2.35 s, 243.491 Mflops
vector<unbounded_array> safe
elapsed: 5.39 s, 106.16 Mflops
vector<unbounded_array> fast
elapsed: 1.53 s, 373.99 Mflops
bench_2
outer_prod
C array
elapsed: 0 s, INF Mflops
c_matrix, c_vector safe
elapsed: 3.39 s, 168.792 Mflops
c_matrix, c_vector fast
elapsed: 2.54 s, 225.277 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 4.26 s, 134.32 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 1.35 s, 423.855 Mflops
prod (matrix, vector)
C array
elapsed: 0.99 s, 575.095 Mflops
c_matrix, c_vector safe
elapsed: 1.52 s, 374.568 Mflops
c_matrix, c_vector fast
elapsed: 1.42 s, 400.946 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 1.13 s, 503.844 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 1.09 s, 522.334 Mflops
matrix + matrix
C array
elapsed: 0 s, INF Mflops
c_matrix safe
elapsed: 1.85 s, 309.3 Mflops
c_matrix fast
elapsed: 1.1 s, 520.186 Mflops
matrix<unbounded_array> safe
elapsed: 4.84 s, 118.224 Mflops
matrix<unbounded_array> fast
elapsed: 4.74 s, 120.718 Mflops
bench_3
prod (matrix, matrix)
C array
elapsed: 1.02 s, 558.18 Mflops
c_matrix safe
elapsed: 1.06 s, 537.117 Mflops
c_matrix fast
elapsed: 1.03 s, 552.761 Mflops
matrix<unbounded_array> safe
elapsed: 1.2 s, 474.453 Mflops
matrix<unbounded_array> fast
elapsed: 1.17 s, 486.618 Mflops