From maintainers-request at octave dot org Wed Apr 12 15:41:57 2006 Subject: Patch for sparse indexed assignment From: David Bateman To: octave maintainers mailing list Date: Wed, 12 Apr 2006 22:37:00 +0200 This is a multi-part message in MIME format. --------------000207020309060301020001 Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit Here is an optimization of sparse indexed assignments like s(p,:)=1 or s(p,1)=sprandn( length(p),1) which makes them significantly faster... D. * Sparse.cc (assign (Sparse&, const Sparse&)): Optimize assignment. -- David Bateman David dot Bateman at motorola dot com Motorola Labs - Paris +33 1 69 35 48 04 (Ph) Parc Les Algorithmes, Commune de St Aubin +33 6 72 01 06 33 (Mob) 91193 Gif-Sur-Yvette FRANCE +33 1 69 35 77 01 (Fax) The information contained in this communication has been classified as: [x] General Business Information [ ] Motorola Internal Use Only [ ] Motorola Confidential Proprietary --------------000207020309060301020001 Content-Type: text/plain; name="patch" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="patch" *** liboctave/Sparse.cc.orig 2006-04-12 14:19:49.949293513 +0200 --- liboctave/Sparse.cc 2006-04-12 21:56:16.706420952 +0200 *************** *** 2492,2524 **** { octave_idx_type iii = 0; octave_idx_type ii = idx_i.elem (iii); ! for (octave_idx_type i = 0; i < new_nr; i++) { ! OCTAVE_QUIT; ! ! if (iii < n && ii == i) { if (scalar != RT ()) { stmp.data(kk) = scalar; ! stmp.ridx(kk++) = i; } if (++iii < n) ii = idx_i.elem(iii); } ! else if (j < lhs.cols()) { ! for (octave_idx_type k = lhs.cidx(j); ! k < lhs.cidx(j+1); k++) ! { ! if (lhs.ridx(k) == i) ! { ! stmp.data(kk) = lhs.data(k); ! stmp.ridx(kk++) = i; ! } ! if (lhs.ridx(k) >= i) ! break; ! } } } if (++jji < m) --- 2492,2523 ---- { octave_idx_type iii = 0; octave_idx_type ii = idx_i.elem (iii); ! octave_idx_type ppp = 0; ! octave_idx_type ppi = lhs.cidx(j+1) - ! lhs.cidx(j); ! octave_idx_type pp = (ppp < ppi ? ! lhs.ridx(lhs.cidx(j)+ppp) : ! new_nr); ! while (ppp < ppi || iii < n) { ! if (iii < n && ii <= pp) { if (scalar != RT ()) { stmp.data(kk) = scalar; ! stmp.ridx(kk++) = ii; } + if (ii == pp) + pp = (++ppp < ppi ? lhs.ridx(lhs.cidx(j)+ppp) : new_nr); if (++iii < n) ii = idx_i.elem(iii); } ! else { ! stmp.data(kk) = ! lhs.data(lhs.cidx(j)+ppp); ! stmp.ridx(kk++) = pp; ! pp = (++ppp < ppi ? lhs.ridx(lhs.cidx(j)+ppp) : new_nr); } } if (++jji < m) *************** *** 2630,2665 **** for (octave_idx_type i = 0; i < m; i++) rhs_idx_j[i] = i; ! // Count the number of non-zero terms ! octave_idx_type new_nzmx = lhs.nnz (); ! for (octave_idx_type j = 0; j < m; j++) ! { ! octave_idx_type jj = idx_j.elem (j); ! for (octave_idx_type i = 0; i < n; i++) ! { ! OCTAVE_QUIT; ! ! if (jj < lhs_nc) ! { ! octave_idx_type ii = idx_i.elem (i); ! ! if (ii < lhs_nr) ! { ! for (octave_idx_type k = lhs.cidx(jj); ! k < lhs.cidx(jj+1); k++) ! { ! if (lhs.ridx(k) == ii) ! new_nzmx--; ! if (lhs.ridx(k) >= ii) ! break; ! } ! } ! } ! ! if (rhs.elem(rhs_idx_i[i],rhs_idx_j[j]) != RT ()) ! new_nzmx++; ! } ! } Sparse stmp (new_nr, new_nc, new_nzmx); --- 2629,2636 ---- for (octave_idx_type i = 0; i < m; i++) rhs_idx_j[i] = i; ! // Maximum number of non-zero elements ! octave_idx_type new_nzmx = lhs.nnz() + rhs.nnz(); Sparse stmp (new_nr, new_nc, new_nzmx); *************** *** 2673,2707 **** { octave_idx_type iii = 0; octave_idx_type ii = idx_i.elem (iii); ! for (octave_idx_type i = 0; i < new_nr; i++) { ! OCTAVE_QUIT; ! ! if (iii < n && ii == i) { RT rtmp = rhs.elem (rhs_idx_i[iii], rhs_idx_j[jji]); if (rtmp != RT ()) { stmp.data(kk) = rtmp; ! stmp.ridx(kk++) = i; } if (++iii < n) ii = idx_i.elem(iii); } ! else if (j < lhs.cols()) { ! for (octave_idx_type k = lhs.cidx(j); ! k < lhs.cidx(j+1); k++) ! { ! if (lhs.ridx(k) == i) ! { ! stmp.data(kk) = lhs.data(k); ! stmp.ridx(kk++) = i; ! } ! if (lhs.ridx(k) >= i) ! break; ! } } } if (++jji < m) --- 2644,2677 ---- { octave_idx_type iii = 0; octave_idx_type ii = idx_i.elem (iii); ! octave_idx_type ppp = 0; ! octave_idx_type ppi = lhs.cidx(j+1) - ! lhs.cidx(j); ! octave_idx_type pp = (ppp < ppi ? ! lhs.ridx(lhs.cidx(j)+ppp) : ! new_nr); ! while (ppp < ppi || iii < n) { ! if (iii < n && ii <= pp) { RT rtmp = rhs.elem (rhs_idx_i[iii], rhs_idx_j[jji]); if (rtmp != RT ()) { stmp.data(kk) = rtmp; ! stmp.ridx(kk++) = ii; } + if (ii == pp) + pp = (++ppp < ppi ? lhs.ridx(lhs.cidx(j)+ppp) : new_nr); if (++iii < n) ii = idx_i.elem(iii); } ! else { ! stmp.data(kk) = ! lhs.data(lhs.cidx(j)+ppp); ! stmp.ridx(kk++) = pp; ! pp = (++ppp < ppi ? lhs.ridx(lhs.cidx(j)+ppp) : new_nr); } } if (++jji < m) *************** *** 2719,2724 **** --- 2689,2695 ---- stmp.cidx(j+1) = kk; } + stmp.maybe_compress(); lhs = stmp; } } --------------000207020309060301020001--