From bug-request at octave dot org Wed Mar 16 14:11:57 2005 Subject: split.m From: =?ISO-8859-1?Q?S=F8ren_Hauberg?= To: bugs at octave dot org Date: Wed, 16 Mar 2005 14:09:40 -0600 This is a multi-part message in MIME format. --------------090609060104020400060207 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 8bit Hi The current version of split.m is very slow on large strings (takes more than 3 hours to process a 3MB string on my old laptop). The attached version is many times faster (same string takes about 15 seconds to process on same machine) because it only allocates memory once. This is my first patch so please tell me if I've done something wrong Thanks, Søren --------------090609060104020400060207 Content-Type: text/x-patch; name="split.m.diff" Content-Transfer-Encoding: 8bit Content-Disposition: inline; filename="split.m.diff" *** /usr/share/octave/2.1.64/m/strings/split.m 2004-12-05 05:17:25.000000000 +0100 --- split.m 2005-03-16 21:08:06.980198640 +0100 *************** *** 33,38 **** --- 33,39 ---- ## Author: Kurt Hornik ## Adapted-By: jwe + ## Minor changes by Søren Hauberg function m = split (s, t) *************** function m = split (s, t) *** 40,46 **** usage ("split (s, t)"); endif ! if (isstr (s) && isstr (t)) l_s = length (s); l_t = length (t); --- 41,50 ---- usage ("split (s, t)"); endif ! if not(ischar (s) && ischar (t)) ! error ("split: both s and t must be strings"); ! endif ! l_s = length (s); l_t = length (t); *************** function m = split (s, t) *** 48,88 **** if (l_s == 0) m = ""; return; elseif (l_s < l_t) error ("split: s must not be shorter than t"); endif ! ! if (l_t == 0) ! ind = 1 : (l_s + 1); ! else ! ind = findstr (s, t, 0); ! if (length (ind) == 0) ! m = s; ! return; ! endif ! ind = [1 - l_t, ind, l_s + 1]; endif ! cmd = ""; ! ! limit = length (ind) - 1; ! ! for k = 1 : limit ! ! range = (ind (k) + l_t) : ind (k + 1) - 1; ! ! if (k != limit) ! cmd = sprintf ("%s\"%s\", ", cmd, undo_string_escapes (s (range))); ! else ! cmd = sprintf ("%s\"%s\"", cmd, undo_string_escapes (s (range))); ! endif ! ! endfor ! ! m = eval (sprintf ("str2mat (%s);", cmd)); ! ! else ! error ("split: both s and t must be strings"); endif endfunction --- 52,86 ---- if (l_s == 0) m = ""; return; + elseif (l_t == 0) + m = s'; + return; elseif (l_s < l_t) error ("split: s must not be shorter than t"); endif ! ! if (min(size(s)) ~= 1 | min(size(t)) ~= 1) ! error("split: multible strings are not supported"); endif ! ind = findstr (s, t, 0); ! if (length (ind) == 0) ! m = s; ! return; endif + ind2 = [1, ind+l_t]; + ind = [ind, l_s+1]; + + ind_diff = ind-ind2; + % Create a matrix of the correct size that's filled with spaces + m_rows = length(ind); + m_cols = max(ind_diff); + m = char( zeros(m_rows, m_cols) + ' ' ); + + % Copy the strings to the matrix + for i = 1:length(ind) + tmp = ind2(i):(ind(i)-1); + m(i, 1:length(tmp)) = s(tmp); + end endfunction --------------090609060104020400060207-- ------------------------------------------------------------- Octave is freely available under the terms of the GNU GPL. Octave's home on the web: http://www.octave.org How to fund new projects: http://www.octave.org/funding.html Subscription information: http://www.octave.org/archive.html -------------------------------------------------------------