00001 #include <iostream>
00002 #include <iomanip>
00003
00004 extern "C" {
00005 #include <atlas/cblas.h>
00006
00007 }
00008
00009 #include <boost/numeric/ublas/matrix.hpp>
00010 #include <boost/numeric/ublas/operation.hpp>
00011 #include <boost/numeric/ublas/operation_blocked.hpp>
00012 #include <boost/numeric/ublas/io.hpp>
00013 #include <boost/timer.hpp>
00014
00015 namespace boost { namespace numeric { namespace ublas {
00016
00017 template<class M, std::size_t IBS, std::size_t JBS, std::size_t KBS, class E1, class E2>
00018 BOOST_UBLAS_INLINE
00019 M
00020 block_prod_goto (const matrix_expression<E1> &e1,
00021 const matrix_expression<E2> &e2,
00022 row_major_tag) {
00023 typedef M matrix_type;
00024 const std::size_t i_block_size = IBS;
00025 const std::size_t j_block_size = JBS;
00026 const std::size_t k_block_size = KBS;
00027 typedef const E1 expression1_type;
00028 typedef const E2 expression2_type;
00029 typedef typename M::size_type size_type;
00030 typedef typename M::value_type value_type;
00031
00032 M m (e1 ().size1 (), e2 ().size2 ());
00033 #ifdef BOOST_UBLAS_TYPE_CHECK
00034 matrix<value_type, row_major> cm (m.size1 (), m.size2 ());
00035 indexing_matrix_assign (scalar_assign<value_type, value_type> (), cm, prod (e1, e2), row_major_tag ());
00036 disable_type_check = true;
00037 #endif
00038 size_type i_size = e1 ().size1 ();
00039 size_type j_size = e2 ().size2 ();
00040 size_type k_size = BOOST_UBLAS_SAME (e1 ().size2 (), e2 ().size1 ());
00041 m.assign (zero_matrix<value_type> (m.size1 (), m.size2 ()));
00042 for (size_type i_begin = 0; i_begin < i_size; i_begin += i_block_size) {
00043 size_type i_end = i_begin + std::min (i_size - i_begin, i_block_size);
00044 for (size_type k_begin = 0; k_begin < k_size; k_begin += k_block_size) {
00045 size_type k_end = k_begin + std::min (k_size - k_begin, k_block_size);
00046
00047 const matrix<value_type, row_major> e1_range (project (e1 (), range (i_begin, i_end), range (k_begin, k_end)));
00048 for (size_type j_begin = 0; j_begin < j_size; j_begin += j_block_size) {
00049 size_type j_end = j_begin + std::min (j_size - j_begin, j_block_size);
00050 matrix_range<matrix_type> m_range (m, range (i_begin, i_end), range (j_begin, j_end));
00051 if (boost::is_same<typename E2::orientation_category, column_major>::value) {
00052 const matrix_range<expression2_type> e2_range (e2 (), range (k_begin, k_end), range (j_begin, j_end));
00053 m_range.plus_assign (prod (e1_range, e2_range));
00054 } else {
00055
00056 const matrix<value_type, column_major> e2_range (project (e2 (), range (k_begin, k_end), range (j_begin, j_end)));
00057 m_range.plus_assign (prod (e1_range, e2_range));
00058 }
00059 }
00060 }
00061 }
00062 #ifdef BOOST_UBLAS_TYPE_CHECK
00063 disable_type_check = false;
00064 BOOST_UBLAS_CHECK (equals (m, cm), internal_logic ());
00065 #endif
00066 return m;
00067 }
00068
00069 template<class M, std::size_t IBS, std::size_t JBS, std::size_t KBS, class E1, class E2>
00070 BOOST_UBLAS_INLINE
00071 M
00072 block_prod_goto (const matrix_expression<E1> &e1,
00073 const matrix_expression<E2> &e2,
00074 column_major_tag) {
00075 typedef M matrix_type;
00076 const std::size_t i_block_size = IBS;
00077 const std::size_t j_block_size = JBS;
00078 const std::size_t k_block_size = KBS;
00079 typedef const E1 expression1_type;
00080 typedef const E2 expression2_type;
00081 typedef typename M::size_type size_type;
00082 typedef typename M::value_type value_type;
00083
00084 M m (e1 ().size1 (), e2 ().size2 ());
00085 #ifdef BOOST_UBLAS_TYPE_CHECK
00086 matrix<value_type, row_major> cm (m.size1 (), m.size2 ());
00087 indexing_matrix_assign (scalar_assign<value_type, value_type> (), cm, prod (e1, e2), row_major_tag ());
00088 disable_type_check = true;
00089 #endif
00090 size_type i_size = e1 ().size1 ();
00091 size_type j_size = e2 ().size2 ();
00092 size_type k_size = BOOST_UBLAS_SAME (e1 ().size2 (), e2 ().size1 ());
00093 m.assign (zero_matrix<value_type> (m.size1 (), m.size2 ()));
00094 for (size_type j_begin = 0; j_begin < j_size; j_begin += j_block_size) {
00095 size_type j_end = j_begin + std::min (j_size - j_begin, j_block_size);
00096 for (size_type k_begin = 0; k_begin < k_size; k_begin += k_block_size) {
00097 size_type k_end = k_begin + std::min (k_size - k_begin, k_block_size);
00098
00099 const matrix<value_type, column_major> e2_range (project (e2 (), range (k_begin, k_end), range (j_begin, j_end)));
00100 for (size_type i_begin = 0; i_begin < i_size; i_begin += i_block_size) {
00101 size_type i_end = i_begin + std::min (i_size - i_begin, i_block_size);
00102 matrix_range<matrix_type> m_range (m, range (i_begin, i_end), range (j_begin, j_end));
00103 if (boost::is_same<typename E1::orientation_category, row_major>::value) {
00104 const matrix_range<expression1_type> e1_range (e1 (), range (i_begin, i_end), range (k_begin, k_end));
00105 m_range.plus_assign (prod (e1_range, e2_range));
00106 } else {
00107
00108 const matrix<value_type, row_major> e1_range (project (e1 (), range (i_begin, i_end), range (k_begin, k_end)));
00109 m_range.plus_assign (prod (e1_range, e2_range));
00110 }
00111 }
00112 }
00113 }
00114 #ifdef BOOST_UBLAS_TYPE_CHECK
00115 disable_type_check = false;
00116 BOOST_UBLAS_CHECK (equals (m, cm), internal_logic ());
00117 #endif
00118 return m;
00119 }
00120
00121
00122 template<class M, std::size_t IBS, std::size_t JBS, std::size_t KBS, class E1, class E2>
00123 BOOST_UBLAS_INLINE
00124 M
00125 block_prod_goto (const matrix_expression<E1> &e1,
00126 const matrix_expression<E2> &e2) {
00127 typedef typename M::orientation_category orientation_category;
00128 return block_prod_goto<M, IBS, JBS, KBS> (e1, e2, orientation_category ());
00129 }
00130
00131 }}}
00132
00133 namespace ublas = boost::numeric::ublas;
00134 using namespace std;
00135
00136 BOOST_UBLAS_INLINE
00137 void
00138 cblas_prod (ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& C,
00139 const ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& A,
00140 const ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& B) {
00141
00142
00143 int m = C.size1();
00144 int n = C.size2();
00145 int k = A.size2();
00146 BOOST_UBLAS_CHECK (C.size1() == A.size1(), ublas::bad_size ());
00147 BOOST_UBLAS_CHECK (A.size2() == B.size1(), ublas::bad_size ());
00148 BOOST_UBLAS_CHECK (B.size2() == C.size2(), ublas::bad_size ());
00149 cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
00150 m, n, k, 1.0, &A.data()[0], A.size2(), &B.data()[0], B.size2(), 1.0, &C.data()[0], C.size2());
00151 }
00152
00153 BOOST_UBLAS_INLINE
00154 void
00155 cblas_prod (ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& C,
00156 const ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& A,
00157 const ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& B) {
00158
00159
00160 int m = C.size1();
00161 int n = C.size2();
00162 int k = A.size2();
00163 BOOST_UBLAS_CHECK (C.size1() == A.size1(), ublas::bad_size ());
00164 BOOST_UBLAS_CHECK (A.size2() == B.size1(), ublas::bad_size ());
00165 BOOST_UBLAS_CHECK (B.size2() == C.size2(), ublas::bad_size ());
00166 cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans,
00167 m, n, k, 1.0, &A.data()[0], A.size1(), &B.data()[0], B.size2(), 1.0, &C.data()[0], C.size2());
00168 }
00169
00170 BOOST_UBLAS_INLINE
00171 void
00172 cblas_prod (ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& C,
00173 const ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& A,
00174 const ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& B) {
00175
00176
00177 int m = C.size1();
00178 int n = C.size2();
00179 int k = A.size2();
00180 BOOST_UBLAS_CHECK (C.size1() == A.size1(), ublas::bad_size ());
00181 BOOST_UBLAS_CHECK (A.size2() == B.size1(), ublas::bad_size ());
00182 BOOST_UBLAS_CHECK (B.size2() == C.size2(), ublas::bad_size ());
00183 cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
00184 m, n, k, 1.0, &A.data()[0], A.size2(), &B.data()[0], B.size1(), 1.0, &C.data()[0], C.size2());
00185 }
00186
00187 BOOST_UBLAS_INLINE
00188 void
00189 cblas_prod (ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& C,
00190 const ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& A,
00191 const ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& B) {
00192
00193
00194 int m = C.size1();
00195 int n = C.size2();
00196 int k = A.size2();
00197 BOOST_UBLAS_CHECK (C.size1() == A.size1(), ublas::bad_size ());
00198 BOOST_UBLAS_CHECK (A.size2() == B.size1(), ublas::bad_size ());
00199 BOOST_UBLAS_CHECK (B.size2() == C.size2(), ublas::bad_size ());
00200 cblas_dgemm(CblasRowMajor, CblasTrans, CblasTrans,
00201 m, n, k, 1.0, &A.data()[0], A.size1(), &B.data()[0], B.size1(), 1.0, &C.data()[0], C.size2());
00202 }
00203
00204 BOOST_UBLAS_INLINE
00205 void
00206 cblas_prod (ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& C,
00207 const ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& A,
00208 const ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& B) {
00209
00210
00211 int m = C.size1();
00212 int n = C.size2();
00213 int k = A.size2();
00214 BOOST_UBLAS_CHECK (C.size1() == A.size1(), ublas::bad_size ());
00215 BOOST_UBLAS_CHECK (A.size2() == B.size1(), ublas::bad_size ());
00216 BOOST_UBLAS_CHECK (B.size2() == C.size2(), ublas::bad_size ());
00217 cblas_dgemm(CblasColMajor, CblasTrans, CblasTrans,
00218 m, n, k, 1.0, &A.data()[0], A.size2(), &B.data()[0], B.size2(), 1.0, &C.data()[0], C.size1());
00219 }
00220
00221 BOOST_UBLAS_INLINE
00222 void
00223 cblas_prod (ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& C,
00224 const ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& A,
00225 const ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& B) {
00226
00227
00228 int m = C.size1();
00229 int n = C.size2();
00230 int k = A.size2();
00231 BOOST_UBLAS_CHECK (C.size1() == A.size1(), ublas::bad_size ());
00232 BOOST_UBLAS_CHECK (A.size2() == B.size1(), ublas::bad_size ());
00233 BOOST_UBLAS_CHECK (B.size2() == C.size2(), ublas::bad_size ());
00234 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans,
00235 m, n, k, 1.0, &A.data()[0], A.size1(), &B.data()[0], B.size2(), 1.0, &C.data()[0], C.size1());
00236 }
00237
00238 BOOST_UBLAS_INLINE
00239 void
00240 cblas_prod (ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& C,
00241 const ublas::matrix<double, ublas::row_major, ublas::unbounded_array<double> >& A,
00242 const ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& B) {
00243
00244
00245 int m = C.size1();
00246 int n = C.size2();
00247 int k = A.size2();
00248 BOOST_UBLAS_CHECK (C.size1() == A.size1(), ublas::bad_size ());
00249 BOOST_UBLAS_CHECK (A.size2() == B.size1(), ublas::bad_size ());
00250 BOOST_UBLAS_CHECK (B.size2() == C.size2(), ublas::bad_size ());
00251 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans,
00252 m, n, k, 1.0, &A.data()[0], A.size2(), &B.data()[0], B.size1(), 1.0, &C.data()[0], C.size1());
00253 }
00254
00255 BOOST_UBLAS_INLINE
00256 void
00257 cblas_prod (ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& C,
00258 const ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& A,
00259 const ublas::matrix<double, ublas::column_major, ublas::unbounded_array<double> >& B) {
00260
00261
00262 int m = C.size1();
00263 int n = C.size2();
00264 int k = A.size2();
00265 BOOST_UBLAS_CHECK (C.size1() == A.size1(), ublas::bad_size ());
00266 BOOST_UBLAS_CHECK (A.size2() == B.size1(), ublas::bad_size ());
00267 BOOST_UBLAS_CHECK (B.size2() == C.size2(), ublas::bad_size ());
00268 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
00269 m, n, k, 1.0, &A.data()[0], A.size1(), &B.data()[0], B.size1(), 1.0, &C.data()[0], C.size1());
00270 }
00271
00272 void zeromatrix(ublas::matrix<double,ublas::row_major>& A) {
00273 for(size_t i = 0; i<A.size1(); i++)
00274 for(size_t j = 0; j<A.size2(); j++)
00275 A(i,j) = 0.0;
00276 }
00277 void zeromatrix(ublas::matrix<double,ublas::column_major>& A) {
00278 for(size_t i = 0; i<A.size1(); i++)
00279 for(size_t j = 0; j<A.size2(); j++)
00280 A(i,j) = 0.0;
00281 }
00282 void initmatrix(ublas::matrix<double,ublas::row_major>& A) {
00283 for(size_t i = 0; i<A.size1(); i++)
00284 for(size_t j = 0; j<A.size2(); j++)
00285 A(i,j) = 1.0*rand();
00286 }
00287 void initmatrix(ublas::matrix<double,ublas::column_major>& A) {
00288 for(size_t i = 0; i<A.size1(); i++)
00289 for(size_t j = 0; j<A.size2(); j++)
00290 A(i,j) = 1.0*rand();
00291 }
00292
00293 #define NOCHECK
00294 #ifdef NOCHECK
00295 template <class M, class E>
00296 int equals(const M& lhs, const E& rhs)
00297 {
00298 return 2;
00299 }
00300 #endif
00301
00302 typedef ublas::matrix<double,ublas::row_major> RT;
00303 typedef ublas::matrix<double,ublas::column_major> CT;
00304
00305 #define mul(a,b,d) \
00306 { \
00307 zeromatrix(X); \
00308 initmatrix(A); \
00309 initmatrix(B); \
00310 t.restart(); \
00311 for (size_t a=0;a<size;a++) \
00312 for (size_t b=0;b<size;b++) \
00313 for (size_t d=0;d<size;d++) \
00314 X(r,c) += A(r,k)*B(k,c); \
00315 time = t.elapsed(); \
00316 cout << setw(8) << time << flush; \
00317 cerr << equals(X,prod(A,B))<< endl; \
00318 }
00319
00320 #define mul2(xtype) \
00321 { \
00322 zeromatrix(X); \
00323 initmatrix(A); \
00324 initmatrix(B); \
00325 t.restart(); \
00326 X.plus_assign(prod(A,B)); \
00327 time = t.elapsed(); \
00328 cout << setw(8) << time << flush; \
00329 cerr << equals(X,prod(A,B)) << endl; \
00330 }
00331
00332 #define mul3(xtype) \
00333 { \
00334 zeromatrix(X); \
00335 initmatrix(A); \
00336 initmatrix(B); \
00337 t.restart(); \
00338 ublas::axpy_prod(A,B,X,false); \
00339 time = t.elapsed(); \
00340 cout << setw(8) << time << flush; \
00341 cerr << equals(X,prod(A,B)) << endl; \
00342 }
00343
00344 #define mul4(xtype) \
00345 { \
00346 zeromatrix(X); \
00347 initmatrix(A); \
00348 initmatrix(B); \
00349 t.restart(); \
00350 ublas::opb_prod(A,B,X,false); \
00351 time = t.elapsed(); \
00352 cout << setw(8) << time << flush; \
00353 cerr << equals(X,prod(A,B)) << endl; \
00354 }
00355
00356 #define mul5(xtype) \
00357 { \
00358 zeromatrix(X); \
00359 initmatrix(A); \
00360 initmatrix(B); \
00361 t.restart(); \
00362 X.plus_assign(ublas::block_prod<xtype, 64>(A,B)); \
00363 time = t.elapsed(); \
00364 cout << setw(8) << time << flush; \
00365 cerr << equals(X,prod(A,B)) << endl; \
00366 }
00367
00368 #define mul6(xtype) \
00369 { \
00370 zeromatrix(X); \
00371 initmatrix(A); \
00372 initmatrix(B); \
00373 t.restart(); \
00374 X.plus_assign(ublas::block_prod_goto<xtype, 64, 64, 64>(A,B)); \
00375 time = t.elapsed(); \
00376 cout << setw(8) << time << flush; \
00377 cerr << equals(X,prod(A,B)) << endl; \
00378 }
00379
00380 #define mul7 \
00381 { \
00382 zeromatrix(X); \
00383 initmatrix(A); \
00384 initmatrix(B); \
00385 t.restart(); \
00386 cblas_prod(X, A, B); \
00387 time = t.elapsed(); \
00388 \
00389 cout << setw(8) << time << flush; \
00390 cerr << equals(X,prod(A,B)) << endl; \
00391 }
00392
00393 #define mulset(xtype,atype,btype) \
00394 { \
00395 xtype X(size1,size3); \
00396 atype A(size1,size2); \
00397 btype B(size2,size3); \
00398 mul5(xtype); \
00399 mul6(xtype); \
00400 mul7 ; \
00401 cout << endl; \
00402 }
00403
00404
00405
00406
00407
00408
00409
00410
00411
00412
00413
00414
00415
00416 void test(const size_t size1, const size_t size2, const size_t size3) {
00417 boost::timer t;
00418 double time;
00419 cout << "size of metrices - " << size1 << "x" << size2 << "*" << size2 << "x" << size3 << endl << endl;
00420
00421 cout << " block goto atlas" <<
00422 endl;
00423 cout << "RRR";mulset(RT,RT,RT);
00424 cout << "RRC";mulset(RT,RT,CT);
00425 cout << "RCR";mulset(RT,CT,RT);
00426 cout << "RCC";mulset(RT,CT,CT);
00427
00428 cout << "CRR";mulset(CT,RT,RT);
00429 cout << "CRC";mulset(CT,RT,CT);
00430 cout << "CCR";mulset(CT,CT,RT);
00431 cout << "CCC";mulset(CT,CT,CT);
00432
00433 }
00434
00435 int main(int argc, char *argv[]) {
00436 int size1 = 100;
00437 if (argc > 1)
00438 size1 = ::atoi(argv [1]);
00439 int size2 = 100;
00440 if (argc > 2)
00441 size2 = ::atoi(argv [2]);
00442 int size3 = 100;
00443 if (argc > 3)
00444 size3 = ::atoi(argv [3]);
00445 test(size1, size2, size3);
00446 }
00447
00448
00449