From octave-maintainers-request at bevo dot che dot wisc dot edu Fri Jan 23 07:09:34 2004 Subject: eigenvalues 3 times speedup patch [Was: benchmarks] From: David Bateman To: Paul Kienzle Cc: octave-maintainers at bevo dot che dot wisc dot edu Date: Fri, 23 Jan 2004 14:05:04 +0100 --wac7ysb48OaltWcw Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Paul, Ok, this e-mail of your has drawn a lot of replies from me. I think this is probably the last thread I'll draw from it for now. However, check > II.B Matlab 0.86 - Octave 2.30 - O-Matrix 0.44 > b = eig(a); Well, there is a simple reason why octave is slower here, for this benchmark. The benchmark asks for the eigenvalues only, but octave calculates the eignevectors as well... Dohhhh.... I've written a small patch to 2.1.53, that makes "eig(a)" only calculate the eigenvectors when asked for them. Here are some interesting tests Matlab R12 ---------- >> a = randn(300,300); tic; [b,bv] = eig(a); toc elapsed_time = 1.9291 >> a = randn(300,300); tic; b = eig(a); toc elapsed_time = 0.7860 Octave 2.1.50 ------------- octave:2> a = randn(300,300); tic; [b,bv] = eig(a); toc ans = 2.1188 octave:4> a = randn(300,300); tic; b = eig(a); toc ans = 2.0628 Octave 2.1.53 + patch --------------------- octave:2> a = randn(300,300); tic; [b,bv] = eig(a); toc ans = 2.1721 octave:4> a = randn(300,300); tic; b = eig(a); toc ans = 0.84389 This together with the sort and ziggurat codes I've been working on seem to address most of the issues in the sciviews benchmarks in sections I and II. Now there are only the problems with "for" loops as shown in section III, but these are probably beyond me to address. JWE, any chance of getting this patch in? Cheers David -- David Bateman David dot Bateman at motorola dot com Motorola CRM +33 1 69 35 48 04 (Ph) Parc Les Algorithmes, Commune de St Aubin +33 1 69 35 77 01 (Fax) 91193 Gif-Sur-Yvette FRANCE The information contained in this communication has been classified as: [x] General Business Information [ ] Motorola Internal Use Only [ ] Motorola Confidential Proprietary --wac7ysb48OaltWcw Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="patch.eigen" *** ./liboctave/EIG.h.orig 2004-01-23 11:54:50.000000000 +0100 --- ./liboctave/EIG.h 2004-01-23 13:42:30.000000000 +0100 *************** *** 44,56 **** EIG (void) : lambda (), v () { } ! EIG (const Matrix& a) { init (a); } ! EIG (const Matrix& a, int& info) { info = init (a); } ! EIG (const ComplexMatrix& a) { init (a); } ! EIG (const ComplexMatrix& a, int& info) { info = init (a); } EIG (const EIG& a) : lambda (a.lambda), v (a.v) { } --- 44,68 ---- EIG (void) : lambda (), v () { } ! EIG (const Matrix& a) { init (a, true); } ! EIG (const Matrix& a, int& info) { info = init (a, true); } ! EIG (const Matrix& a, const bool& calc_eigenvectors) ! { init (a, calc_eigenvectors); } ! EIG (const Matrix& a, const bool& calc_eigenvectors, int& info) ! { info = init (a, calc_eigenvectors); } ! ! EIG (const ComplexMatrix& a) { init (a, true); } ! ! EIG (const ComplexMatrix& a, int& info) { info = init (a, true); } ! ! EIG (const ComplexMatrix& a, const bool& calc_eigenvectors) ! { init (a, calc_eigenvectors); } ! ! EIG (const ComplexMatrix& a, const bool& calc_eigenvectors, int& info) ! { info = init (a, calc_eigenvectors); } EIG (const EIG& a) : lambda (a.lambda), v (a.v) { } *************** *** 78,88 **** ComplexColumnVector lambda; ComplexMatrix v; ! int init (const Matrix& a); ! int init (const ComplexMatrix& a); ! int symmetric_init (const Matrix& a); ! int hermitian_init (const ComplexMatrix& a); }; #endif --- 90,100 ---- ComplexColumnVector lambda; ComplexMatrix v; ! int init (const Matrix& a, const bool& calc_eignvectors); ! int init (const ComplexMatrix& a, const bool& calc_eignvectors); ! int symmetric_init (const Matrix& a, const bool& calc_eigenvectors); ! int hermitian_init (const ComplexMatrix& a, const bool& calc_eignvectors); }; #endif *** ./liboctave/EIG.cc.orig 2004-01-23 11:54:56.000000000 +0100 --- ./liboctave/EIG.cc 2004-01-23 13:42:52.000000000 +0100 *************** *** 71,80 **** } int ! EIG::init (const Matrix& a) { if (a.is_symmetric ()) ! return symmetric_init (a); int n = a.rows (); --- 71,80 ---- } int ! EIG::init (const Matrix& a, const bool& calc_eigenvectors) { if (a.is_symmetric ()) ! return symmetric_init (a, calc_eigenvectors); int n = a.rows (); *************** *** 95,102 **** Array wi (n); double *pwi = wi.fortran_vec (); ! Matrix vr (n, n); ! double *pvr = vr.fortran_vec (); // XXX FIXME XXX -- it might be possible to choose a better value of // lwork that would result in more efficient computations. --- 95,101 ---- Array wi (n); double *pwi = wi.fortran_vec (); ! Matrix vr; // XXX FIXME XXX -- it might be possible to choose a better value of // lwork that would result in more efficient computations. *************** *** 108,119 **** double *dummy = 0; int idummy = 1; ! F77_XFCN (dgeev, DGEEV, (F77_CONST_CHAR_ARG2 ("N", 1), ! F77_CONST_CHAR_ARG2 ("V", 1), ! n, tmp_data, n, pwr, pwi, dummy, ! idummy, pvr, n, pwork, lwork, info ! F77_CHAR_ARG_LEN (1) ! F77_CHAR_ARG_LEN (1))); if (f77_exception_encountered || info < 0) (*current_liboctave_error_handler) ("unrecoverable error in dgeev"); --- 107,132 ---- double *dummy = 0; int idummy = 1; ! if (calc_eigenvectors) ! { ! vr.resize (n, n); ! double *pvr = vr.fortran_vec (); ! F77_XFCN (dgeev, DGEEV, (F77_CONST_CHAR_ARG2 ("N", 1), ! F77_CONST_CHAR_ARG2 ("V", 1), ! n, tmp_data, n, pwr, pwi, dummy, ! idummy, pvr, n, pwork, lwork, info ! F77_CHAR_ARG_LEN (1) ! F77_CHAR_ARG_LEN (1))); ! } ! else ! F77_XFCN (dgeev, DGEEV, (F77_CONST_CHAR_ARG2 ("N", 1), ! F77_CONST_CHAR_ARG2 ("N", 1), ! n, tmp_data, n, pwr, pwi, dummy, ! idummy, dummy, idummy, pwork, lwork, info ! F77_CHAR_ARG_LEN (1) ! F77_CHAR_ARG_LEN (1))); ! ! if (f77_exception_encountered || info < 0) (*current_liboctave_error_handler) ("unrecoverable error in dgeev"); *************** *** 124,138 **** else { lambda.resize (n); ! v.resize (n, n); for (int j = 0; j < n; j++) { if (wi.elem (j) == 0.0) { lambda.elem (j) = Complex (wr.elem (j)); ! for (int i = 0; i < n; i++) ! v.elem (i, j) = vr.elem (i, j); } else { --- 137,155 ---- else { lambda.resize (n); ! if (calc_eigenvectors) ! v.resize (n, n); ! else ! v.resize (0, 0); for (int j = 0; j < n; j++) { if (wi.elem (j) == 0.0) { lambda.elem (j) = Complex (wr.elem (j)); ! if (calc_eigenvectors) ! for (int i = 0; i < n; i++) ! v.elem (i, j) = vr.elem (i, j); } else { *************** *** 146,158 **** lambda.elem(j) = Complex (wr.elem(j), wi.elem(j)); lambda.elem(j+1) = Complex (wr.elem(j+1), wi.elem(j+1)); ! for (int i = 0; i < n; i++) ! { ! double real_part = vr.elem (i, j); ! double imag_part = vr.elem (i, j+1); ! v.elem (i, j) = Complex (real_part, imag_part); ! v.elem (i, j+1) = Complex (real_part, -imag_part); ! } j++; } } --- 163,176 ---- lambda.elem(j) = Complex (wr.elem(j), wi.elem(j)); lambda.elem(j+1) = Complex (wr.elem(j+1), wi.elem(j+1)); ! if (calc_eigenvectors) ! for (int i = 0; i < n; i++) ! { ! double real_part = vr.elem (i, j); ! double imag_part = vr.elem (i, j+1); ! v.elem (i, j) = Complex (real_part, imag_part); ! v.elem (i, j+1) = Complex (real_part, -imag_part); ! } j++; } } *************** *** 163,169 **** } int ! EIG::symmetric_init (const Matrix& a) { int n = a.rows (); --- 181,187 ---- } int ! EIG::symmetric_init (const Matrix& a, const bool& calc_eigenvectors) { int n = a.rows (); *************** *** 188,198 **** Array work (lwork); double *pwork = work.fortran_vec (); ! F77_XFCN (dsyev, DSYEV, (F77_CONST_CHAR_ARG2 ("V", 1), ! F77_CONST_CHAR_ARG2 ("U", 1), ! n, tmp_data, n, pwr, pwork, lwork, info ! F77_CHAR_ARG_LEN (1) ! F77_CHAR_ARG_LEN (1))); if (f77_exception_encountered || info < 0) (*current_liboctave_error_handler) ("unrecoverable error in dsyev"); --- 206,223 ---- Array work (lwork); double *pwork = work.fortran_vec (); ! if (calc_eigenvectors) ! F77_XFCN (dsyev, DSYEV, (F77_CONST_CHAR_ARG2 ("V", 1), ! F77_CONST_CHAR_ARG2 ("U", 1), ! n, tmp_data, n, pwr, pwork, lwork, info ! F77_CHAR_ARG_LEN (1) ! F77_CHAR_ARG_LEN (1))); ! else ! F77_XFCN (dsyev, DSYEV, (F77_CONST_CHAR_ARG2 ("N", 1), ! F77_CONST_CHAR_ARG2 ("U", 1), ! n, tmp_data, n, pwr, pwork, lwork, info ! F77_CHAR_ARG_LEN (1) ! F77_CHAR_ARG_LEN (1))); if (f77_exception_encountered || info < 0) (*current_liboctave_error_handler) ("unrecoverable error in dsyev"); *************** *** 201,217 **** else { lambda = ComplexColumnVector (wr); ! v = ComplexMatrix (atmp); } return info; } int ! EIG::init (const ComplexMatrix& a) { if (a.is_hermitian ()) ! return hermitian_init (a); int n = a.rows (); --- 226,245 ---- else { lambda = ComplexColumnVector (wr); ! if (calc_eigenvectors) ! v = ComplexMatrix (atmp); ! else ! v.resize(0,0); } return info; } int ! EIG::init (const ComplexMatrix& a, const bool& calc_eigenvectors) { if (a.is_hermitian ()) ! return hermitian_init (a, calc_eigenvectors); int n = a.rows (); *************** *** 229,236 **** ComplexColumnVector w (n); Complex *pw = w.fortran_vec (); ! ComplexMatrix vtmp (n, n); ! Complex *pv = vtmp.fortran_vec (); // XXX FIXME XXX -- it might be possible to choose a better value of // lwork that would result in more efficient computations. --- 257,263 ---- ComplexColumnVector w (n); Complex *pw = w.fortran_vec (); ! ComplexMatrix vtmp; // XXX FIXME XXX -- it might be possible to choose a better value of // lwork that would result in more efficient computations. *************** *** 246,257 **** Complex *dummy = 0; int idummy = 1; ! F77_XFCN (zgeev, ZGEEV, (F77_CONST_CHAR_ARG2 ("N", 1), ! F77_CONST_CHAR_ARG2 ("V", 1), ! n, tmp_data, n, pw, dummy, idummy, ! pv, n, pwork, lwork, prwork, info ! F77_CHAR_ARG_LEN (1) ! F77_CHAR_ARG_LEN (1))); if (f77_exception_encountered || info < 0) (*current_liboctave_error_handler) ("unrecoverable error in zgeev"); --- 273,297 ---- Complex *dummy = 0; int idummy = 1; ! if (calc_eigenvectors) ! { ! vtmp.resize (n, n); ! Complex *pv = vtmp.fortran_vec (); ! ! F77_XFCN (zgeev, ZGEEV, (F77_CONST_CHAR_ARG2 ("N", 1), ! F77_CONST_CHAR_ARG2 ("V", 1), ! n, tmp_data, n, pw, dummy, idummy, ! pv, n, pwork, lwork, prwork, info ! F77_CHAR_ARG_LEN (1) ! F77_CHAR_ARG_LEN (1))); ! } ! else ! F77_XFCN (zgeev, ZGEEV, (F77_CONST_CHAR_ARG2 ("N", 1), ! F77_CONST_CHAR_ARG2 ("N", 1), ! n, tmp_data, n, pw, dummy, idummy, ! dummy, idummy, pwork, lwork, prwork, info ! F77_CHAR_ARG_LEN (1) ! F77_CHAR_ARG_LEN (1))); if (f77_exception_encountered || info < 0) (*current_liboctave_error_handler) ("unrecoverable error in zgeev"); *************** *** 260,273 **** else { lambda = w; ! v = vtmp; } return info; } int ! EIG::hermitian_init (const ComplexMatrix& a) { int n = a.rows (); --- 300,316 ---- else { lambda = w; ! if (calc_eigenvectors) ! v = vtmp; ! else ! v.resize(0,0); } return info; } int ! EIG::hermitian_init (const ComplexMatrix& a, const bool& calc_eigenvectors) { int n = a.rows (); *************** *** 296,306 **** Array rwork (lrwork); double *prwork = rwork.fortran_vec (); ! F77_XFCN (zheev, ZHEEV, (F77_CONST_CHAR_ARG2 ("V", 1), ! F77_CONST_CHAR_ARG2 ("U", 1), ! n, tmp_data, n, pwr, pwork, lwork, prwork, info ! F77_CHAR_ARG_LEN (1) ! F77_CHAR_ARG_LEN (1))); if (f77_exception_encountered || info < 0) (*current_liboctave_error_handler) ("unrecoverable error in zheev"); --- 339,357 ---- Array rwork (lrwork); double *prwork = rwork.fortran_vec (); ! if (calc_eigenvectors) ! F77_XFCN (zheev, ZHEEV, (F77_CONST_CHAR_ARG2 ("V", 1), ! F77_CONST_CHAR_ARG2 ("U", 1), ! n, tmp_data, n, pwr, pwork, lwork, prwork, info ! F77_CHAR_ARG_LEN (1) ! F77_CHAR_ARG_LEN (1))); ! else ! F77_XFCN (zheev, ZHEEV, (F77_CONST_CHAR_ARG2 ("N", 1), ! F77_CONST_CHAR_ARG2 ("U", 1), ! n, tmp_data, n, pwr, pwork, lwork, prwork, info ! F77_CHAR_ARG_LEN (1) ! F77_CHAR_ARG_LEN (1))); ! if (f77_exception_encountered || info < 0) (*current_liboctave_error_handler) ("unrecoverable error in zheev"); *************** *** 309,315 **** else { lambda = ComplexColumnVector (wr); ! v = ComplexMatrix (atmp); } return info; --- 360,369 ---- else { lambda = ComplexColumnVector (wr); ! if (calc_eigenvectors) ! v = ComplexMatrix (atmp); ! else ! v.resize(0,0); } return info; *** ./src/DLD-FUNCTIONS/eig.cc.orig 2004-01-23 13:43:22.000000000 +0100 --- ./src/DLD-FUNCTIONS/eig.cc 2004-01-23 13:40:01.000000000 +0100 *************** *** 81,87 **** if (error_state) return retval; else ! result = EIG (tmp); } else if (arg.is_complex_type ()) { --- 81,90 ---- if (error_state) return retval; else ! if (nargout == 0 || nargout == 1) ! result = EIG (tmp, false); ! else ! result = EIG (tmp); } else if (arg.is_complex_type ()) { *************** *** 90,96 **** if (error_state) return retval; else ! result = EIG (ctmp); } else { --- 93,102 ---- if (error_state) return retval; else ! if (nargout == 0 || nargout == 1) ! result = EIG (ctmp, false); ! else ! result = EIG (ctmp); } else { --wac7ysb48OaltWcw--