I wrote a serial code for a matrix-vector multiplication in a
Coordinate Storage Format(COO). COO method has already made a big
speed-up. My matrix is in a tridiagonal form. I want to parallelize
the code and see how the parallelizm improves the code. My matrix could
grow up to 2.000.000*2.000.000. Any efficient parallelization approach
doesn't come to my mind (I have written some basic mpi programs but not
much more. So my experience is limited.) Could you give me some
headstart?
Thanks.
Özhan Fenerci
#include <boost/numeric/ublas/banded.hpp>
#include <boost/numeric/ublas/io.hpp>
#include <vector>
#include <iostream>
#include "mpi.h"
#include <stdio.h>
int main(int argc, char *argv[] ) {
using namespace boost::numeric::ublas;
using std::cout;
using std::endl;
int numprocs, rank, chunk_size, i,j;
int max, mymax,rem;
int totalNonZeros; //totalNonZeros of TriDiagonal Matrix
MPI_Status status;
//b=A*x
banded_matrix<double> A (10, 10, 1, 1);
//vector<double> x(10);
//vector<double> b(10);
int* x = new int[10];
int* b = new int[10];
/* Initialize MPI */
MPI_Init( &argc,&argv);
MPI_Comm_rank( MPI_COMM_WORLD, &rank);
MPI_Comm_size( MPI_COMM_WORLD, &numprocs);
//MPI_Status status;
if (rank == 0) {
for (signed i = 0; i < signed (A.size1 ()); ++ i) {
for (signed j = std::max (i - 1, 0); j < std::min (i + 2, signed (A.size2 ())); ++j)
A (i, j) = i + j;
x[i]= 1;
}
}
if(rank == 0) {
cout << A <<endl<<endl;
totalNonZeros = 0;
for (signed i = 0; i < signed (A.size1 ()); ++ i) {
for (signed j = std::max (i - 1, 0); j < std::min (i + 2, signed (A.size2 ())); ++ j)
if( A(i,j) !=0)
totalNonZeros++;
}
}
cout<<"Number ot NonZeros "<<totalNonZeros<<endl;
if(rank == 0) {
int * row_idx = new int[totalNonZeros]; // row index array
int * col_idx = new int[totalNonZeros]; // col index array
int * val = new int[totalNonZeros]; //val array
int index=0;
for (signed i = 0; i < signed (A.size1 ()); ++ i) {
for (signed j = std::max (i - 1, 0); j < std::min (i + 2, signed (A.size2 ())); ++ j)
if( A(i,j) !=0) {
row_idx[index] = i;
col_idx[index]=j;
val[index] = A(i,j);
index++;
}
}
//Clear out result
for ( int k=0; k<A.size1(); ++k)
{
b[k]= 0;
}
//Matrix-vector Multiply
for (int index=0 ; index <totalNonZeros;++index){
b[row_idx[index]] = b[row_idx[index]]+ val[index]*x[col_idx[index]]; cout<<b[3]<<endl;
}
//delete row_idx;
// delete col_idx;
//delete val;
}
MPI_Finalize();
return 0;