From octave-sources-request at bevo dot che dot wisc dot edu Wed Oct 4 18:20:44 2000 Subject: no cvs web interface From: Paul Kienzle To: octave-sources at bevo dot che dot wisc dot edu Date: Thu, 5 Oct 2000 00:14:32 +0100 Hi! I've been fiddling around in liboctave and made some changes that others may be interested in. Unfortunately, I didn't keep the original files, and the web gateway to CVS at bevo seems to be down. My web connection is way to slow and flakey to use anonymous CVS properly. So, until the web gateway is back or until I get an reasonable ISP, I'll give you the functions which are changed. Can somebody do the diffs against 2.1.31 and submit them as patches? Thanks! First, since I use x(:)' a lot, I have made it significantly faster by reusing the same data instead of copying. It will still be copy on modify, but for the most part the value is used in an intermediate expression for reference only, so it is a big win. E.g., with change: octave:6> x=rand(5000,2); t=time(); x(:)'; time()-t ans = 0.00029099 without change: octave2.1:7> x=rand(5000,2); t=time(); x(:)'; time()-t ans = 0.015498 The same technique should be applied to the function reshape, though most uses of reshape can be replaced with x(:) or x(:)' directly. A similar technique should work for the generic transpose, keeping the original data and swapping the indices on calls to elem or xelem, but building the real transpose matrix if there is a call to fortran_vec. At the moment, though, there are too many references to data() for this to be done safely. Second, I took up the challenge of supporting M .* v where M is a matrix and v is a row or column vector. If v is a row vector with the same number of columns as M, this multiplies each column of M by v. Similarly for column vectors. This supports element-wise +, -, *, /, \, including assignment forms. To support ^, you need to do a similar modification to src/xpow.cc. To support comparisons and boolean operators, you need to do similar modifications to different macros in mx-op-defs.h. Not only is M .* v more convenient than M .* v(:,ones(1,columns(M))) but it is also faster and uses half the memory: octave:34> nr=2; nc=3; M=rand(nr,nc); v=rand(nr,1); octave:35> t=time(); M .* v; time()-t ans = 0.00026202 octave:36> t=time(); M .* v(:,ones(1,nc)); time()-t ans = 0.00053406 The speedup is 3:1 for M=200x300 Paul Kienzle pkienzle at kienzle dot powernet dot co dot uk In Array2-idx.h, line 57, avoid copying array for magic colon x(:) template Array2 Array2::index (idx_vector& idx_arg) const { Array2 retval; int nr = d1; int nc = d2; int idx_orig_rows = idx_arg.orig_rows (); int idx_orig_columns = idx_arg.orig_columns (); if (idx_arg.is_colon ()) { // Fast magic colon processing (copy on modify) int result_nr = nr * nc; int result_nc = result_nr ? 1 : 0; retval = Array2 (*this, result_nr, result_nc); } else if (nr == 1 && nc == 1) { Array tmp = Array::index (idx_arg); int len = tmp.length (); if (len == 0) retval = Array2 (0, 0); else { if (liboctave_pcv_flag) retval = Array2 (tmp, len, 1); else retval = Array2 (tmp, 1, len); } } else if (nr == 1 || nc == 1) { Array tmp = Array::index (idx_arg); int len = tmp.length (); if (len == 0) retval = Array2 (0, 0); else { if (nc == 1) retval = Array2 (tmp, len, 1); else retval = Array2 (tmp, 1, len); } } else if (liboctave_dfi_flag || (idx_arg.one_zero_only () && idx_orig_rows == nr && idx_orig_columns == nc)) { // This code is only for indexing matrices. The vector // cases are handled above. idx_arg.freeze (nr * nc, "matrix"); if (idx_arg) { int result_nr = idx_orig_rows; int result_nc = idx_orig_columns; if (idx_arg.one_zero_only ()) { result_nr = idx_arg.ones_count (); result_nc = (result_nr > 0 ? 1 : 0); } retval.resize (result_nr, result_nc); int k = 0; for (int j = 0; j < result_nc; j++) { for (int i = 0; i < result_nr; i++) { int ii = idx_arg.elem (k++); int fr = ii % nr; int fc = (ii - fr) / nr; retval.elem (i, j) = elem (fr, fc); } } } // idx_vector::freeze() printed an error message for us. } else (*current_liboctave_error_handler) ("single index only valid for row or column vector"); return retval; } In Array2.cc, line 219, avoid copying array on transpose nx1 or 1xn arrays template Array2 Array2::transpose (void) const { if (d1 > 1 && d2 > 1) { Array2 result (d2, d1); for (int j = 0; j < d2; j++) for (int i = 0; i < d1; i++) result.xelem (j, i) = xelem (i, j); return result; } else { // Fast transpose for vectors and empty matrices return Array2 (*this, d2, d1); } } In MArray2.cc, line 142, support M .* v and v .* M, where M is a matrix and v is a vector which matches the number of rows or number of columns in the matrix #define MARRAY_A2A2_OP(FCN, OP) \ template \ MArray2 \ FCN (const MArray2& a, const MArray2& b) \ { \ int a_nr = a.rows (); \ int a_nc = a.cols (); \ int b_nr = b.rows (); \ int b_nc = b.cols (); \ if (a_nr != b_nr || a_nc != b_nc) \ if (a_nr == b_nr && a_nc == 1) \ { \ MArray2 result (a_nr, b_nc); \ T *r = result.fortran_vec (); \ const T *x = a.data (); \ const T *y = b.data (); \ int k=0; \ for (int j=0; j < b_nc; j++) \ for (int i=0; i < a_nr; i++, k++) \ r[k] = x[i] OP y[k]; \ return result; \ } \ else if (a_nr == b_nr && b_nc == 1) \ { \ MArray2 result (a_nr, a_nc); \ T *r = result.fortran_vec (); \ const T *x = a.data (); \ const T *y = b.data (); \ int k=0; \ for (int j=0; j < a_nc; j++) \ for (int i=0; i < a_nr; i++, k++) \ r[k] = x[k] OP y[i]; \ return result; \ } \ else if (a_nc == b_nc && b_nr == 1) \ { \ MArray2 result (a_nr, a_nc); \ T *r = result.fortran_vec (); \ const T *x = a.data (); \ const T *y = b.data (); \ for (int i=0; i < a_nr; i++) \ for (int j=0, k=i; j < a_nc; j++, k+=a_nr) \ r[k] = x[k] OP y[j]; \ return result; \ } \ else if (a_nc == b_nc && a_nr == 1) \ { \ MArray2 result (b_nr, a_nc); \ T *r = result.fortran_vec (); \ const T *x = a.data (); \ const T *y = b.data (); \ for (int i=0; i < b_nr; i++) \ for (int j=0, k=i; j < a_nc; j++, k+=b_nr) \ r[k] = x[j] OP y[k]; \ return result; \ } \ else \ { \ gripe_nonconformant (#FCN, a_nr, a_nc, b_nr, b_nc); \ return MArray2 (); \ } \ if (a_nr == 0 || a_nc == 0) \ return MArray2 (a_nr, a_nc); \ int l = a.length (); \ MArray2 result (a_nr, a_nc); \ T *r = result.fortran_vec (); \ const T *x = a.data (); \ const T *y = b.data (); \ DO_VV_OP (r, l, x, OP, y); \ return result; \ } In mx-op-defs.h, line 357, M .* v and v .* M for differing types. #define MM_BIN_OP(R, OP, M1, M2, F, OPSYM) \ R \ OP (const M1& m1, const M2& m2) \ { \ R r; \ \ int m1_nr = m1.rows (); \ int m1_nc = m1.cols (); \ \ int m2_nr = m2.rows (); \ int m2_nc = m2.cols (); \ \ if (m1_nr != m2_nr || m1_nc != m2_nc) \ if (m1_nr == m2_nr && m1_nc == 1) \ { \ r.resize (m1_nr, m2_nc); \ for (int j = 0; j < m2_nc; j++) \ for (int i = 0; i < m1_nr; i++) \ r.elem(i, j) = m1.elem(i, 0) OPSYM m2.elem(i, j); \ } \ else if (m1_nr == m2_nr && m2_nc == 1) \ { \ r.resize (m1_nr, m1_nc); \ for (int j = 0; j < m1_nc; j++) \ for (int i = 0; i < m1_nr; i++) \ r.elem(i, j) = m1.elem(i, j) OPSYM m2.elem(i, 0); \ } \ else if (m1_nc == m2_nc && m2_nr == 1) \ { \ r.resize (m1_nr, m1_nc); \ for (int j = 0; j < m1_nc; j++) \ for (int i = 0; i < m1_nr; i++) \ r.elem(i, j) = m1.elem(i, j) OPSYM m2.elem(0, j); \ } \ else if (m1_nc == m2_nc && m1_nr == 1) \ { \ r.resize (m2_nr, m1_nc); \ for (int j = 0; j < m1_nc; j++) \ for (int i = 0; i < m2_nr; i++) \ r.elem(i, j) = m1.elem(0, j) OPSYM m2.elem(i, j); \ } \ else \ gripe_nonconformant (#OP, m1_nr, m1_nc, m2_nr, m2_nc); \ else \ { \ r.resize (m1_nr, m1_nc); \ \ if (m1_nr > 0 && m1_nc > 0) \ F ## _vv (r.fortran_vec (), m1.data (), m2.data (), m1_nr * m1_nc); \ } \ \ return r; \ } ----------------------------------------------------------------------- Octave is freely available under the terms of the GNU GPL. Octave's home on the web: http://www.che.wisc.edu/octave/octave.html How to fund new projects: http://www.che.wisc.edu/octave/funding.html Subscription information: http://www.che.wisc.edu/octave/archive.html -----------------------------------------------------------------------