# Boost :

From: Patrick Kowalzick (Patrick.Kowalzick_at_[hidden])
Date: 2003-05-15 06:19:06

Hello Zdenek,

hihi, I think I had the blunder in my code. I did not keep it, but it must
had been somthing stupid. If I calculate a "theoratical" time for my machine
it takes about 10sec for 2000x2000. My result was not possible at all! (for
your machine it should take 13.33sec, so what is your ATLAS doing). My used
"formula" is just time=2*size^3/clock_speed.

So I took your testsuite and added a lit more things inside, just to get a
better understanding. I made a matrix calculation with ublas, valarray and
double*, and each in different storage order (called "normal", "suboptimal"
and "optimal"). I did not wanted to think too much, so I am not sure which
order in which case and it seems to me that I have a bug inside (or do you
believe that in the "optimal" case uBlas is so much faster? ;-)).

I addded two other things, just to test which time is needed for the
mathematical operations, but in the second case where I included the
calculation for the position in the array it seems that the optimizer is
doing a good job..........

The results shows, that uBlas is quite close to the others. Now another
question appears: Am I too stupid to write a matrix multiplication? or in
other words: Why are matlab (2000x2000 - 17s - snief) and ATLAS so *******
fast? I will think about that this afternoon and will keep you informed. I
am pretty sure we will get it running like hell ;-).

Regards,
Patrick

******************** Results *************************

size of metrices - 500x500

ublas (normal) 2.437s
valarray (normal) 2.468s
double* (normal) 2.406s

ublas (suboptimal) 3.844s
valarray (suboptimal) 3.875s
double* (suboptimal) 3.875s

ublas (optimal) 1.063s
valarray (optimal) 2.407s
double* (optimal) 2.281s

matrix operations 0.391s
matrix operations + positioning 0.406s

size of metrices - 1000x1000

ublas (normal) 21.438s
valarray (normal) 20.219s
double* (normal) 19.937s

ublas (suboptimal) 35.156s
valarray (suboptimal) 34.984s
double* (suboptimal) 35.25s

ublas (optimal) 8.39s
valarray (optimal) 19.094s
double* (optimal) 18.344s

matrix operations 3.188s
matrix operations + positioning 3.172s

size of metrices - 1500x1500

ublas (normal) 73.907s
valarray (normal) 69.172s
double* (normal) 67.422s

ublas (suboptimal) 211.219s
valarray (suboptimal) 215.125s
double* (suboptimal) 214.811s

ublas (optimal) 28.625s
valarray (optimal) 64.5s
double* (optimal) 64.125s

matrix operations 11.094s
matrix operations + positioning 11.328s

size of metrices - 2000x2000

ublas (normal) 228.563s
valarray (normal) 171.078s
double* (normal) 166.719s

ublas (suboptimal) 1756.17s
valarray (suboptimal) 1753.25s
double* (suboptimal) 1649.56s

ublas (optimal) 69.578s
valarray (optimal) 158.36s
double* (optimal) 151.969s

matrix operations 26.578s
matrix operations + positioning 26.547s

******************** start of listing *********************
// please no comments about coding style - LOL
#include <iostream>
#include <valarray>

#include <boost/numeric/ublas/matrix.hpp>
#include <boost/numeric/ublas/io.hpp>
#include <boost/timer.hpp>

namespace ublas = boost::numeric::ublas;
using namespace std;

void initmatrix(ublas::matrix<double>& A) {
for(size_t i = 0; i<A.size1(); i++)
for(size_t j = 0; j<A.size2(); j++)
A(i,j) = 1.0*rand();
}
void initmatrix(ublas::matrix<double,ublas::column_major>& A) {
for(size_t i = 0; i<A.size1(); i++)
for(size_t j = 0; j<A.size2(); j++)
A(i,j) = 1.0*rand();
}

void test(const size_t size) {
boost::timer t;
double time;

cout << "size of metrices - " << size << "x" << size << endl << endl;

{
cout << "ublas (normal) ";

ublas::matrix<double> A(size,size), B(size,size), X(size,size);
initmatrix(A);
initmatrix(B);

t.restart();
X = ublas::prod< ublas::matrix<double> >(A,B);
time = t.elapsed();
cout << time << "s" << endl;
}

{
cout << "valarray (normal) ";

valarray<double> A(size*size),B(size*size),X(size*size);
for(size_t i = 0; i<A.size(); i++) {
A[i] = 1.0*rand();
B[i]=1.0*rand();
}

t.restart();
for (size_t r=0;r<size;r++)
for (size_t c=0;c<size;c++)
for (size_t k=0;k<size;k++)
X[r+c*size] += A[r+k*size]*B[k+c*size];
time = t.elapsed();
cout << time << "s" << endl;
}

{
cout << "double* (normal) ";

double *A,*B,*X;
A = new double[size*size];
B = new double[size*size];
X = new double[size*size];
for(size_t i = 0; i<size*size; i++) {
A[i] = 1.0*rand();
B[i]=1.0*rand();
}

t.restart();
for (size_t r=0;r<size;r++)
for (size_t c=0;c<size;c++)
for (size_t k=0;k<size;k++)
X[r+c*size] += A[r+k*size]*B[k+c*size];
time = t.elapsed();
cout << time << "s" << endl;

delete A;
delete B;
delete X;
}

cout << endl;

{
cout << "ublas (suboptimal) ";

ublas::matrix<double,ublas::column_major> A(size,size);
ublas::matrix<double> B(size,size);
ublas::matrix<double> X(size,size);
initmatrix(A);
initmatrix(B);

t.restart();
X = ublas::prod< ublas::matrix<double> >(A,B);
time = t.elapsed();
cout << time << "s" << endl;
}

{
cout << "valarray (suboptimal) ";

valarray<double> A(size*size),B(size*size),X(size*size);
for(size_t i = 0; i<A.size(); i++) {
A[i] = 1.0*rand();
B[i]=1.0*rand();
}

t.restart();
for (size_t r=0;r<size;r++)
for (size_t c=0;c<size;c++)
for (size_t k=0;k<size;k++)
X[r+c*size] += A[r+k*size]*B[c+k*size];
time = t.elapsed();
cout << time << "s" << endl;
}

{
cout << "double* (suboptimal) ";

double *A,*B,*X;
A = new double[size*size];
B = new double[size*size];
X = new double[size*size];
for(size_t i = 0; i<size*size; i++) {
A[i] = 1.0*rand();
B[i]=1.0*rand();
}
t.restart();
for (size_t r=0;r<size;r++)
for (size_t c=0;c<size;c++)
for (size_t k=0;k<size;k++)
X[r+c*size] += A[r+k*size]*B[c+k*size];
time = t.elapsed();
cout << time << "s" << endl;

delete A;
delete B;
delete X;
}

cout << endl;

{
cout << "ublas (optimal) ";

ublas::matrix<double> A(size,size);
ublas::matrix<double,ublas::column_major> B(size,size);
ublas::matrix<double> X(size,size);
initmatrix(A);
initmatrix(B);

t.restart();
X = ublas::prod< ublas::matrix<double> >(A,B);
time = t.elapsed();
cout << time << "s" << endl;
}

{
cout << "valarray (optimal) ";

valarray<double> A(size*size),B(size*size),X(size*size);
for(size_t i = 0; i<A.size(); i++) {
A[i] = 1.0*rand();
B[i]=1.0*rand();
}

t.restart();
for (size_t r=0;r<size;r++)
for (size_t c=0;c<size;c++)
for (size_t k=0;k<size;k++)
X[r+c*size] += A[k+r*size]*B[k+c*size];
time = t.elapsed();
cout << time << "s" << endl;
}

{
cout << "double* (optimal) ";

double *A,*B,*X;
A = new double[size*size];
B = new double[size*size];
X = new double[size*size];
for(size_t i = 0; i<size*size; i++) {
A[i] = 1.0*rand();
B[i]=1.0*rand();
}

t.restart();
for (size_t r=0;r<size;r++)
for (size_t c=0;c<size;c++)
for (size_t k=0;k<size;k++)
X[r+c*size] += A[k+r*size]*B[k+c*size];
time = t.elapsed();
cout << time << "s" << endl;

delete A;
delete B;
delete X;
}

cout << endl;

{
cout << "matrix operations ";

double A=1.0*rand(),B=1.0*rand(),X=1.0*rand();

t.restart();
for (size_t r=0;r<size;r++)
for (size_t c=0;c<size;c++)
for (size_t k=0;k<size;k++)
X += A*B;
time = t.elapsed();
cout << time << "s" << endl;
}
{

cout << "matrix operations + positioning ";

double A=1.0*rand(),B=1.0*rand(),X=1.0*rand();
size_t d1,d2,d3;

t.restart();
for (size_t r=0;r<size;r++)
for (size_t c=0;c<size;c++)
for (size_t k=0;k<size;k++) {
X += A*B;
d1=r+c*size;
d2=k+r*size;
d3=k+c*size;
}
time = t.elapsed();
cout << time << "s" << endl;
}

cout << endl << endl;
}

void main() {
test(500);
test(1000);
test(1500);
test(2000);
}