#include #include #include #include #include #include #include #include "gemm.hpp" template void fill(MATRIX &A) { typedef typename MATRIX::size_type size_type; typedef typename MATRIX::value_type T; std::random_device random; std::default_random_engine mt(random()); std::uniform_real_distribution uniform(-100,100); for (size_type i=0; i typename MATRIX::value_type asum(const MATRIX &A) { typedef typename MATRIX::size_type size_type; typedef typename MATRIX::value_type T; T asum = 0; for (size_type i=0; i void axpy_prod(const MATRIXA &A, const MATRIXB &B, MATRIXC &C, bool update) { typedef typename MATRIXA::value_type TA; typedef typename MATRIXC::value_type TC; assert(A.size2()==B.size1()); const std::ptrdiff_t m = C.size1(); const std::ptrdiff_t n = C.size2(); const std::ptrdiff_t k = A.size2(); const std::ptrdiff_t incRowA = &A(1,0) - &A(0,0); const std::ptrdiff_t incColA = &A(0,1) - &A(0,0); const std::ptrdiff_t incRowB = &B(1,0) - &B(0,0); const std::ptrdiff_t incColB = &B(0,1) - &B(0,0); const std::ptrdiff_t incRowC = &C(1,0) - &C(0,0); const std::ptrdiff_t incColC = &C(0,1) - &C(0,0); gemm(m, n, k, TA(1), &A(0,0), incRowA, incColA, &B(0,0), incRowB, incColB, TC(update ? 0 : 1), &C(0,0), incRowC, incColC); } } // namespace foo int main() { namespace ublas = boost::numeric::ublas; const std::size_t m_min = 100; const std::size_t k_min = 100; const std::size_t n_min = 100; const std::size_t m_max = 4000; const std::size_t k_max = 4000; const std::size_t n_max = 4000; const std::size_t m_inc = 100; const std::size_t k_inc = 100; const std::size_t n_inc = 100; const bool matprodUpdate = true; typedef double T; typedef ublas::row_major SO; boost::timer t; for (std::size_t m=m_min, k=k_min, n=n_min; m<=m_max && k<=k_max && n<=n_max; m += m_inc, k += k_inc, n += n_inc) { ublas::matrix A(m, k); ublas::matrix B(k, n); ublas::matrix C1(m, n); ublas::matrix C2(m, n); fill(A); fill(B); fill(C1); C2 = C1; t.restart(); ublas::axpy_prod(A, B, C1, matprodUpdate); double t1 = t.elapsed(); t.restart(); foo::axpy_prod(A, B, C2, matprodUpdate); double t2 = t.elapsed(); C2 -= C1; std::cout.width(5); std::cout << m << " "; std::cout.width(10); std::cout << t1 << " "; std::cout.width(10); std::cout << t2 << " "; std::cout.width(15); std::cout << asum(C2); std::cout << std::endl; } }