|
Ublas : |
From: Andreas Dolfen (a.dolfen_at_[hidden])
Date: 2006-04-20 11:20:26
> A couple of things you could look at. uBLAS has bench1-4. If you are only
> using Dense matrices then it would be sufficient to just compile up bench1
> for both versions and compare results. This may give us a better idea of
> which operations are suffering.
I just did that. The results are attached. It seems that the bench1
program shows the same performance degradation. I used:
export TOOLS=vacpp
bjam "-sBUILD=release"
for both boost libraries only changing the include-dirs.
Any ideas?
> A nother thing you could try is compiling test/placement_new.cpp and seeing
> what the return value is. If this is -3 then you can enable
> BOOST_UBLAS_USEFUL_ARRAY_PLACEMENT_NEW
> which can make a major difference on some compilers.
Ok! I will try this next and send the result to you later/tomorrow.
Thank you all very much!
Andreas
DOUBLE
peak
plus
elapsed: 0.01 s, 95.3674 Mflops
multiplies
elapsed: 0 s, INF Mflops
DOUBLE, 3
bench_1
inner_prod
C array
elapsed: 0.01 s, 476.837 Mflops
c_vector
elapsed: 0.05 s, 95.3674 Mflops
vector<unbounded_array>
elapsed: 0.02 s, 238.419 Mflops
vector + vector
C array
elapsed: 0 s, INF Mflops
c_vector safe
elapsed: 0.09 s, 63.5783 Mflops
c_vector fast
elapsed: 0.06 s, 95.3674 Mflops
vector<unbounded_array> safe
elapsed: 0.39 s, 14.6719 Mflops
vector<unbounded_array> fast
elapsed: 0.04 s, 143.051 Mflops
bench_2
outer_prod
C array
elapsed: 0 s, INF Mflops
c_matrix, c_vector safe
elapsed: 0.08 s, 64.373 Mflops
c_matrix, c_vector fast
elapsed: 0.05 s, 102.997 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 0.15 s, 34.3323 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 0.04 s, 128.746 Mflops
prod (matrix, vector)
C array
elapsed: 0 s, INF Mflops
c_matrix, c_vector safe
elapsed: 0.08 s, 53.6442 Mflops
c_matrix, c_vector fast
elapsed: 0.04 s, 107.288 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 0.15 s, 28.6102 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 0.03 s, 143.051 Mflops
matrix + matrix
C array
elapsed: 0 s, INF Mflops
c_matrix safe
elapsed: 0.06 s, 85.8307 Mflops
c_matrix fast
elapsed: 0.04 s, 128.746 Mflops
matrix<unbounded_array> safe
elapsed: 0.14 s, 36.7846 Mflops
matrix<unbounded_array> fast
elapsed: 0.03 s, 171.661 Mflops
bench_3
prod (matrix, matrix)
C array
elapsed: 0 s, INF Mflops
c_matrix safe
elapsed: 0.03 s, 143.051 Mflops
c_matrix fast
elapsed: 0.02 s, 214.577 Mflops
matrix<unbounded_array> safe
elapsed: 0.06 s, 71.5256 Mflops
matrix<unbounded_array> fast
elapsed: 0.03 s, 143.051 Mflops
DOUBLE, 10
bench_1
inner_prod
C array
elapsed: 0.01 s, 543.594 Mflops
c_vector
elapsed: 0.03 s, 181.198 Mflops
vector<unbounded_array>
elapsed: 0.02 s, 271.797 Mflops
vector + vector
C array
elapsed: 0 s, INF Mflops
c_vector safe
elapsed: 0.06 s, 95.3674 Mflops
c_vector fast
elapsed: 0.03 s, 190.735 Mflops
vector<unbounded_array> safe
elapsed: 0.12 s, 47.6837 Mflops
vector<unbounded_array> fast
elapsed: 0.01 s, 572.205 Mflops
bench_2
outer_prod
C array
elapsed: 0 s, INF Mflops
c_matrix, c_vector safe
elapsed: 0.05 s, 114.441 Mflops
c_matrix, c_vector fast
elapsed: 0.02 s, 286.102 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 0.03 s, 190.735 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 0.01 s, 572.205 Mflops
prod (matrix, vector)
C array
elapsed: 0 s, INF Mflops
c_matrix, c_vector safe
elapsed: 0.02 s, 271.797 Mflops
c_matrix, c_vector fast
elapsed: 0.02 s, 271.797 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 0.03 s, 181.198 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 0.01 s, 543.594 Mflops
matrix + matrix
C array
elapsed: 0 s, INF Mflops
c_matrix safe
elapsed: 0.02 s, 286.102 Mflops
c_matrix fast
elapsed: 0.01 s, 572.205 Mflops
matrix<unbounded_array> safe
elapsed: 0.03 s, 190.735 Mflops
matrix<unbounded_array> fast
elapsed: 0.03 s, 190.735 Mflops
bench_3
prod (matrix, matrix)
C array
elapsed: 0 s, INF Mflops
c_matrix safe
elapsed: 0.01 s, 543.594 Mflops
c_matrix fast
elapsed: 0.01 s, 543.594 Mflops
matrix<unbounded_array> safe
elapsed: 0.01 s, 543.594 Mflops
matrix<unbounded_array> fast
elapsed: 0.02 s, 271.797 Mflops
DOUBLE, 30
bench_1
inner_prod
C array
elapsed: 0.01 s, 562.668 Mflops
c_vector
elapsed: 0.02 s, 281.334 Mflops
vector<unbounded_array>
elapsed: 0.01 s, 562.668 Mflops
vector + vector
C array
elapsed: 0 s, INF Mflops
c_vector safe
elapsed: 0.06 s, 95.3674 Mflops
c_vector fast
elapsed: 0.03 s, 190.735 Mflops
vector<unbounded_array> safe
elapsed: 0.04 s, 143.051 Mflops
vector<unbounded_array> fast
elapsed: 0.01 s, 572.205 Mflops
bench_2
outer_prod
C array
elapsed: 0 s, INF Mflops
c_matrix, c_vector safe
elapsed: 0.03 s, 171.661 Mflops
c_matrix, c_vector fast
elapsed: 0.02 s, 257.492 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 0.01 s, 514.984 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 0.01 s, 514.984 Mflops
prod (matrix, vector)
C array
elapsed: 0 s, INF Mflops
c_matrix, c_vector safe
elapsed: 0.02 s, 253.201 Mflops
c_matrix, c_vector fast
elapsed: 0.01 s, 506.401 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 0.01 s, 506.401 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 0.01 s, 506.401 Mflops
matrix + matrix
C array
elapsed: 0 s, INF Mflops
c_matrix safe
elapsed: 0.01 s, 514.984 Mflops
c_matrix fast
elapsed: 0.01 s, 514.984 Mflops
matrix<unbounded_array> safe
elapsed: 0.02 s, 257.492 Mflops
matrix<unbounded_array> fast
elapsed: 0.01 s, 514.984 Mflops
bench_3
prod (matrix, matrix)
C array
elapsed: 0.01 s, 506.401 Mflops
c_matrix safe
elapsed: 0.01 s, 506.401 Mflops
c_matrix fast
elapsed: 0 s, INF Mflops
matrix<unbounded_array> safe
elapsed: 0.01 s, 506.401 Mflops
matrix<unbounded_array> fast
elapsed: 0.01 s, 506.401 Mflops
DOUBLE, 100
bench_1
inner_prod
C array
elapsed: 0.01 s, 569.344 Mflops
c_vector
elapsed: 0.02 s, 284.672 Mflops
vector<unbounded_array>
elapsed: 0.01 s, 569.344 Mflops
vector + vector
C array
elapsed: 0 s, INF Mflops
c_vector safe
elapsed: 0.05 s, 114.441 Mflops
c_vector fast
elapsed: 0.03 s, 190.735 Mflops
vector<unbounded_array> safe
elapsed: 0.02 s, 286.102 Mflops
vector<unbounded_array> fast
elapsed: 0 s, INF Mflops
bench_2
outer_prod
C array
elapsed: 0 s, INF Mflops
c_matrix, c_vector safe
elapsed: 0.03 s, 190.735 Mflops
c_matrix, c_vector fast
elapsed: 0.03 s, 190.735 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 0.01 s, 572.205 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 0 s, INF Mflops
prod (matrix, vector)
C array
elapsed: 0.01 s, 569.344 Mflops
c_matrix, c_vector safe
elapsed: 0.01 s, 569.344 Mflops
c_matrix, c_vector fast
elapsed: 0.01 s, 569.344 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 0.01 s, 569.344 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 0.01 s, 569.344 Mflops
matrix + matrix
C array
elapsed: 0 s, INF Mflops
c_matrix safe
elapsed: 0.02 s, 286.102 Mflops
c_matrix fast
elapsed: 0.01 s, 572.205 Mflops
matrix<unbounded_array> safe
elapsed: 0.02 s, 286.102 Mflops
matrix<unbounded_array> fast
elapsed: 0.02 s, 286.102 Mflops
bench_3
prod (matrix, matrix)
C array
elapsed: 0.01 s, 569.344 Mflops
c_matrix safe
elapsed: 0.01 s, 569.344 Mflops
c_matrix fast
elapsed: 0.01 s, 569.344 Mflops
matrix<unbounded_array> safe
elapsed: 0.01 s, 569.344 Mflops
matrix<unbounded_array> fast
elapsed: 0.01 s, 569.344 Mflops
DOUBLE
peak
plus
elapsed: 0.01 s, 95.3674 Mflops
multiplies
elapsed: 0 s, INF Mflops
DOUBLE, 3
bench_1
inner_prod
C array
elapsed: 0.01 s, 476.837 Mflops
c_vector
elapsed: 0.06 s, 79.4729 Mflops
vector<unbounded_array>
elapsed: 0.06 s, 79.4729 Mflops
vector + vector
C array
elapsed: 0 s, INF Mflops
c_vector safe
elapsed: 0.21 s, 27.2478 Mflops
c_vector fast
elapsed: 0.13 s, 44.0157 Mflops
vector<unbounded_array> safe
elapsed: 0.59 s, 9.69838 Mflops
vector<unbounded_array> fast
elapsed: 0.11 s, 52.0186 Mflops
bench_2
outer_prod
C array
elapsed: 0 s, INF Mflops
c_matrix, c_vector safe
elapsed: 0.1 s, 51.4984 Mflops
c_matrix, c_vector fast
elapsed: 0.06 s, 85.8307 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 0.22 s, 23.4084 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 0.07 s, 73.5692 Mflops
prod (matrix, vector)
C array
elapsed: 0 s, INF Mflops
c_matrix, c_vector safe
elapsed: 0.07 s, 61.3076 Mflops
c_matrix, c_vector fast
elapsed: 0.07 s, 61.3076 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 0.18 s, 23.8419 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 0.05 s, 85.8307 Mflops
matrix + matrix
C array
elapsed: 0 s, INF Mflops
c_matrix safe
elapsed: 0.11 s, 46.8167 Mflops
c_matrix fast
elapsed: 0.08 s, 64.373 Mflops
matrix<unbounded_array> safe
elapsed: 0.26 s, 19.8071 Mflops
matrix<unbounded_array> fast
elapsed: 0.1 s, 51.4984 Mflops
bench_3
prod (matrix, matrix)
C array
elapsed: 0 s, INF Mflops
c_matrix safe
elapsed: 0.04 s, 107.288 Mflops
c_matrix fast
elapsed: 0.02 s, 214.577 Mflops
matrix<unbounded_array> safe
elapsed: 0.08 s, 53.6442 Mflops
matrix<unbounded_array> fast
elapsed: 0.03 s, 143.051 Mflops
DOUBLE, 10
bench_1
inner_prod
C array
elapsed: 0.01 s, 543.594 Mflops
c_vector
elapsed: 0.04 s, 135.899 Mflops
vector<unbounded_array>
elapsed: 0.02 s, 271.797 Mflops
vector + vector
C array
elapsed: 0 s, INF Mflops
c_vector safe
elapsed: 0.12 s, 47.6837 Mflops
c_vector fast
elapsed: 0.05 s, 114.441 Mflops
vector<unbounded_array> safe
elapsed: 0.19 s, 30.116 Mflops
vector<unbounded_array> fast
elapsed: 0.04 s, 143.051 Mflops
bench_2
outer_prod
C array
elapsed: 0 s, INF Mflops
c_matrix, c_vector safe
elapsed: 0.05 s, 114.441 Mflops
c_matrix, c_vector fast
elapsed: 0.03 s, 190.735 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 0.06 s, 95.3674 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 0.02 s, 286.102 Mflops
prod (matrix, vector)
C array
elapsed: 0 s, INF Mflops
c_matrix, c_vector safe
elapsed: 0.03 s, 181.198 Mflops
c_matrix, c_vector fast
elapsed: 0.02 s, 271.797 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 0.03 s, 181.198 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 0.02 s, 271.797 Mflops
matrix + matrix
C array
elapsed: 0 s, INF Mflops
c_matrix safe
elapsed: 0.03 s, 190.735 Mflops
c_matrix fast
elapsed: 0.02 s, 286.102 Mflops
matrix<unbounded_array> safe
elapsed: 0.07 s, 81.7435 Mflops
matrix<unbounded_array> fast
elapsed: 0.03 s, 190.735 Mflops
bench_3
prod (matrix, matrix)
C array
elapsed: 0.01 s, 543.594 Mflops
c_matrix safe
elapsed: 0.01 s, 543.594 Mflops
c_matrix fast
elapsed: 0.01 s, 543.594 Mflops
matrix<unbounded_array> safe
elapsed: 0.01 s, 543.594 Mflops
matrix<unbounded_array> fast
elapsed: 0.02 s, 271.797 Mflops
DOUBLE, 30
bench_1
inner_prod
C array
elapsed: 0.01 s, 562.668 Mflops
c_vector
elapsed: 0.03 s, 187.556 Mflops
vector<unbounded_array>
elapsed: 0.01 s, 562.668 Mflops
vector + vector
C array
elapsed: 0 s, INF Mflops
c_vector safe
elapsed: 0.07 s, 81.7435 Mflops
c_vector fast
elapsed: 0.04 s, 143.051 Mflops
vector<unbounded_array> safe
elapsed: 0.09 s, 63.5783 Mflops
vector<unbounded_array> fast
elapsed: 0.03 s, 190.735 Mflops
bench_2
outer_prod
C array
elapsed: 0 s, INF Mflops
c_matrix, c_vector safe
elapsed: 0.03 s, 171.661 Mflops
c_matrix, c_vector fast
elapsed: 0.02 s, 257.492 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 0.04 s, 128.746 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 0.02 s, 257.492 Mflops
prod (matrix, vector)
C array
elapsed: 0 s, INF Mflops
c_matrix, c_vector safe
elapsed: 0.01 s, 506.401 Mflops
c_matrix, c_vector fast
elapsed: 0.02 s, 253.201 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 0.01 s, 506.401 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 0.01 s, 506.401 Mflops
matrix + matrix
C array
elapsed: 0 s, INF Mflops
c_matrix safe
elapsed: 0.02 s, 257.492 Mflops
c_matrix fast
elapsed: 0.01 s, 514.984 Mflops
matrix<unbounded_array> safe
elapsed: 0.05 s, 102.997 Mflops
matrix<unbounded_array> fast
elapsed: 0.02 s, 257.492 Mflops
bench_3
prod (matrix, matrix)
C array
elapsed: 0 s, INF Mflops
c_matrix safe
elapsed: 0.01 s, 506.401 Mflops
c_matrix fast
elapsed: 0.01 s, 506.401 Mflops
matrix<unbounded_array> safe
elapsed: 0.01 s, 506.401 Mflops
matrix<unbounded_array> fast
elapsed: 0.01 s, 506.401 Mflops
DOUBLE, 100
bench_1
inner_prod
C array
elapsed: 0.01 s, 569.344 Mflops
c_vector
elapsed: 0.02 s, 284.672 Mflops
vector<unbounded_array>
elapsed: 0.02 s, 284.672 Mflops
vector + vector
C array
elapsed: 0 s, INF Mflops
c_vector safe
elapsed: 0.05 s, 114.441 Mflops
c_vector fast
elapsed: 0.03 s, 190.735 Mflops
vector<unbounded_array> safe
elapsed: 0.06 s, 95.3674 Mflops
vector<unbounded_array> fast
elapsed: 0.01 s, 572.205 Mflops
bench_2
outer_prod
C array
elapsed: 0 s, INF Mflops
c_matrix, c_vector safe
elapsed: 0.04 s, 143.051 Mflops
c_matrix, c_vector fast
elapsed: 0.02 s, 286.102 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 0.04 s, 143.051 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 0.01 s, 572.205 Mflops
prod (matrix, vector)
C array
elapsed: 0.01 s, 569.344 Mflops
c_matrix, c_vector safe
elapsed: 0.02 s, 284.672 Mflops
c_matrix, c_vector fast
elapsed: 0.01 s, 569.344 Mflops
matrix<unbounded_array>, vector<unbounded_array> safe
elapsed: 0.01 s, 569.344 Mflops
matrix<unbounded_array>, vector<unbounded_array> fast
elapsed: 0.02 s, 284.672 Mflops
matrix + matrix
C array
elapsed: 0 s, INF Mflops
c_matrix safe
elapsed: 0.02 s, 286.102 Mflops
c_matrix fast
elapsed: 0.01 s, 572.205 Mflops
matrix<unbounded_array> safe
elapsed: 0.05 s, 114.441 Mflops
matrix<unbounded_array> fast
elapsed: 0.02 s, 286.102 Mflops
bench_3
prod (matrix, matrix)
C array
elapsed: 0.01 s, 569.344 Mflops
c_matrix safe
elapsed: 0.01 s, 569.344 Mflops
c_matrix fast
elapsed: 0.01 s, 569.344 Mflops
matrix<unbounded_array> safe
elapsed: 0.01 s, 569.344 Mflops
matrix<unbounded_array> fast
elapsed: 0.02 s, 284.672 Mflops