From octave-sources-request at bevo dot che dot wisc dot edu Thu May 30 13:43:39 2002 Subject: Replacing LINPACK with LAPACK From: David Bateman To: Cc: , Date: Thu, 30 May 2002 09:35:23 -0500 --7JfCtLOvnd9MIVvH Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Dear Octave Maintainers, [ I'm not a member of this list, so please cc me in your replies to this message. ] I notice that many operations within Octave still rely on the obsolete LINPACK libraries. Significantly better performance can be obtained if these operations were converted to use LAPACK. As a proof of concept I converted the inverse, determinant, and solve functions to use LAPACK. The attached patch is against 2.1.36 and does add libcruft/lapack/dgetri.f add libcruft/lapack/zgetri.f add libcruft/lapack/dgecon.f add libcruft/lapack/zgecon.f change liboctave/dMatrix.cc change liboctave/CMatrix.cc change configure.in The additional LAPACK functions add the ability to get the condition number and form the inverse from the LU decomposition and weren't present in 2.1.36. The changes to ?Matrix.cc adds the flag PREFER_LAPACK that allows newly added code to use the LAPACK functions in preference to the LINPACK ones in this code. The change to configure.in adds the flag "-without-linpack" that forces the use of the code in my patch. The code in my patch is entirely inactive without this compile flag, though I imagine that this behaviour should eventually be changed. Unfortunately there exists many other dependencies on LINPACK in octave (ode libraries) and thus the building of LINPACK can not be enirely disabled at this point. To prove that there is a significant gain to be had with the use of LAPACK over LINPACK, I attach a benchmark program with several example runs of the benchmark on various versions of octave and matlab. This benchmark was based on the benchmark code of Francesco Potorti, however his code is very particular in that it uses hadamard matrices for the benchmarks. The particular properties of these matrices bias his tests, and thus I replaced his matrices with randomly constructed matrices. You might claim that this is still a bias since the ocnstructed matrices are still positive definite, but they aren't symmetric. As you can see by the benchmark results in the attached file, there are significant gains to be had in speed matrix inversion, solution of linear equations (ie y = A \ x) and the calculation of the determinant. Note that I also replaced the BLAS library with a PPro optimized version from http://www.cs.utk.edu/~ghenry/distrib/. Tests with ATLAS and Intels math kernel libraries showed similar performance. An interesting thing to note about the attached benchmark is the degradation in the performance of the for loops in 2.1.36 relative to 2.0.16. Is someone aware of this issue? I make no claims that this is a perfect patch. Changes that might be useful are to only calculate the condition number when it is actually requested as a return value, and to use the function IALENV to get the optimum block size for the LAPACK functions when initializing the work arrays. I release this patch under which ever license you care to use with it. Regards David Bateman --7JfCtLOvnd9MIVvH Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="bm_test.m" % % Do benchmark % function [name, time] = bm_test(f,rep) % Actual test functions global t; start = cputime; for i = 1:rep if (f==1) name='Matrix inversion (LINPACK/LAPACK)'; rand('seed', 0); bm_x=inv(rand(256)); elseif (f==2) name='Schur decomposition (LAPACK)'; rand('seed', 0); bm_x=schur(rand(128)); elseif (f==3) name='Linear equation (LINPACK/LAPACK)'; rand('seed', 0); bm_A=rand(256); bm_y=ones(256,1); bm_x=bm_A\bm_y; elseif (f==4) name='Linear equation N RHS (LINPACK/LAPACK)'; rand('seed', 0); bm_A=rand(256); bm_y=ones(256,4); bm_x=bm_A\bm_y; elseif (f==5) name='Matrix determinant (LINPACK/LAPACK)'; rand('seed', 0); bm_x=det(rand(256)); elseif (f==6) name='Matrix Multiply (BLAS)'; rand('seed', 0); bm_x=rand(256)*rand(256); elseif (f==7) name='Fourier transforms (FFTPACK)'; rand('seed', 0); bm_x=ifft2(fft2(rand(256))); elseif (f==8) name='for loop'; for i=1:6000;bm_x=i^2;end end end time = (cputime - start)/rep; return endfunction --7JfCtLOvnd9MIVvH Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="mybench.m" function out = mybench () bm_version = ['bm ', '0.1']; % Benchmark for matlab/octave. if (exist('OCTAVE_VERSION')) fprintf (' OCTAVE/Matlab benchmark version %s\n', bm_version); else fprintf (' Octave/MATLAB benchmark version %s\n', bm_version); end bm_numtests = 8; bm_targetaccuracy = 0.025; % target accuracy of mean of times bm_minrepetitions = 7; % min number of repetitions per test bm_maxtime = 60; % max runtime per test [seconds] bm_mintime = 0.3; % min runtime per test [seconds] bm_runtime = 3; % target runtime per test [seconds] % Reference times for vanilla Octave 2.0.16 on a PII 300MHz bm_reftime = [ 1.04 0.73 0.49 0.51 0.47 1.49 0.40 0.21]; bm_refname = 'Octave 2.0.16 on PII 300MHz'; if (exist('OCTAVE_VERSION')) fprintf (' Speed of octave %s on %s \n relative to %s\n', ... version, computer, bm_refname); fflush(stdout); else fprintf (' Speed of matlab %s on %s \n relative to %s\n', ... version, computer, bm_refname); end fprintf ('\n'); bm_mytime = zeros(1,bm_numtests); for f = 1:bm_numtests res = []; bm_test(f,1); % increase the RSS, load things rep = 1; % number of repetitions per run while (1) % we would need a do..while really [name,time] = bm_test(f,rep); % evaluate name and time if (time*rep > bm_mintime) % run for at least bm_mintime break; % found approximate time end rep = 2*rep; % approaching min run time end fprintf('%-40s', name); % print name if (exist('OCTAVE_VERSION')) fflush(stdout); end rep = round(bm_runtime/time); % no. of repetitions per run rep = max(1,rep); % slow machines need this for runs = 1:bm_maxtime/bm_runtime % do runs [name,time] = bm_test(f,rep); % run res(runs) = time; % store performance if (runs < bm_minrepetitions) % jump rest of for loop continue end res = sort(res); bm_mean = mean(res(2:runs-1)); % remove min and max results if (std(res)/bm_mean < bm_targetaccuracy) break end end % end of repetitions loop bm_mytime(f) = bm_mean; % print 95% confidence interval fprintf('%5.2f (%5.2f sec) +/- %.1f%% (%d runs)\n', ... bm_reftime(f)/bm_mean, bm_mean, 200*std(res)/bm_mean, runs*rep); if (exist('OCTAVE_VERSION')) fflush(stdout); end end clear bm_x % Display the geometric mean of the results fprintf('-- Performance index (%s): %.3g\n\n', bm_version, ... prod(bm_reftime./bm_mytime)^(1/length(bm_reftime))); endfunction --7JfCtLOvnd9MIVvH Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="mybench.txt" ########################################################################### Octave 2.0.16 on PII 300MHz from mandrake 8.2 RPM Stupid test since this is the calibration data set, so we expect 1.00 everywhere. OCTAVE/Matlab benchmark version bm 0.1 Speed of octave 2.0.16 on i586-mandrake-linux-gnu relative to Octave 2.0.16 on PII 300MHz Matrix inversion (LINPACK/LAPACK) 1.06 ( 0.98 sec) +/- 3.1% (21 runs) Schur decomposition (LAPACK) 0.99 ( 0.74 sec) +/- 1.1% (28 runs) Linear equation (LINPACK/LAPACK) 1.01 ( 0.49 sec) +/- 0.7% (42 runs) Linear equation N RHS (LINPACK/LAPACK) 1.01 ( 0.50 sec) +/- 0.9% (42 runs) Matrix determinant (LINPACK/LAPACK) 1.00 ( 0.47 sec) +/- 4.9% (42 runs) Matrix Multiply (BLAS) 1.01 ( 1.47 sec) +/- 4.1% (14 runs) Fourier transforms (FFTPACK) 1.02 ( 0.39 sec) +/- 0.3% (56 runs) for loop 0.99 ( 0.21 sec) +/- 1.0% (98 runs) -- Performance index (bm 0.1): 1.01 ########################################################################### Octave 2.1.36 on PII 300MHz with ascsi_red BLAS + fftw OCTAVE/Matlab benchmark version bm 0.1 Speed of octave 2.1.36 on i686-pc-linux-gnu relative to Octave 2.0.16 on PII 300MHz Matrix inversion (LINPACK/LAPACK) 1.24 ( 0.84 sec) +/- 5.9% (80 runs) Schur decomposition (LAPACK) 1.16 ( 0.63 sec) +/- 1.5% (35 runs) Linear equation (LINPACK/LAPACK) 1.09 ( 0.45 sec) +/- 4.0% (49 runs) Linear equation N RHS (LINPACK/LAPACK) 1.13 ( 0.45 sec) +/- 1.5% (49 runs) Matrix determinant (LINPACK/LAPACK) 1.03 ( 0.46 sec) +/- 1.4% (49 runs) Matrix Multiply (BLAS) 3.37 ( 0.44 sec) +/- 1.4% (49 runs) Fourier transforms (FFTPACK) 1.09 ( 0.37 sec) +/- 4.8% (56 runs) for loop 0.66 ( 0.32 sec) +/- 0.4% (63 runs) -- Performance index (bm 0.1): 1.23 ########################################################################### Octave 2.1.36 on PII 300MHz bragon with ascsi_red BLAS + fftw + lapack_patch OCTAVE/Matlab benchmark version bm 0.1 Speed of octave 2.1.36 on i686-pc-linux-gnu relative to Octave 2.0.16 on PII 300MHz Matrix inversion (LINPACK/LAPACK) 2.34 ( 0.44 sec) +/- 0.3% (49 runs) Schur decomposition (LAPACK) 1.18 ( 0.62 sec) +/- 3.4% (35 runs) Linear equation (LINPACK/LAPACK) 1.71 ( 0.29 sec) +/- 1.2% (77 runs) Linear equation N RHS (LINPACK/LAPACK) 1.76 ( 0.29 sec) +/- 2.2% (70 runs) Matrix determinant (LINPACK/LAPACK) 1.65 ( 0.28 sec) +/- 1.1% (77 runs) Matrix Multiply (BLAS) 3.38 ( 0.44 sec) +/- 0.5% (49 runs) Fourier transforms (FFTPACK) 1.10 ( 0.36 sec) +/- 0.9% (56 runs) for loop 0.69 ( 0.30 sec) +/- 1.9% (70 runs) -- Performance index (bm 0.1): 1.55 ########################################################################### MATLAB 6.1 on PII 300Hz Octave/MATLAB benchmark version bm 0.1 Speed of matlab 6.1.0.450 (R12.1) on GLNX86 relative to Octave 2.0.16 on PII 300MHz Matrix inversion (LINPACK/LAPACK) 3.22 ( 0.32 sec) +/- 4.9% (63 runs) Schur decomposition (LAPACK) 1.86 ( 0.39 sec) +/- 0.7% (49 runs) Linear equation (LINPACK/LAPACK) 2.96 ( 0.17 sec) +/- 2.1% (126 runs) Linear equation N RHS (LINPACK/LAPACK) 3.00 ( 0.17 sec) +/- 1.0% (126 runs) Matrix determinant (LINPACK/LAPACK) 3.75 ( 0.13 sec) +/- 2.9% (168 runs) Matrix Multiply (BLAS) 7.25 ( 0.21 sec) +/- 2.2% (98 runs) Fourier transforms (FFTPACK) 1.66 ( 0.24 sec) +/- 4.3% (84 runs) for loop 2.98 ( 0.07 sec) +/- 1.6% (308 runs) -- Performance index (bm 0.1): 3.03 --7JfCtLOvnd9MIVvH Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="patch-2.1.36" --- octave-2.1.36/libcruft/lapack/dgetri.f.orig Tue May 28 15:02:20 2002 +++ octave-2.1.36/libcruft/lapack/dgetri.f Tue May 28 14:53:47 2002 at @ -0,0 +1,193 @@ + SUBROUTINE DGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGETRI computes the inverse of a matrix using the LU factorization +* computed by DGETRF. +* +* This method inverts U and then computes inv(A) by solving the system +* inv(A)*L = inv(U) for inv(A). +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) +* On entry, the factors L and U from the factorization +* A = P*L*U as computed by DGETRF. +* On exit, if INFO = 0, the inverse of the original matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices from DGETRF; for 1<=i<=N, row i of the +* matrix was interchanged with row IPIV(i). +* +* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) +* On exit, if INFO=0, then WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* For optimal performance LWORK >= N*NB, where NB is +* the optimal blocksize returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero; the matrix is +* singular and its inverse could not be computed. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB, + $ NBMIN, NN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DGEMV, DSWAP, DTRSM, DTRTRI, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NB = ILAENV( 1, 'DGETRI', ' ', N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -3 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGETRI', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form inv(U). If INFO > 0 from DTRTRI, then U is singular, +* and the inverse is not computed. +* + CALL DTRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = MAX( LDWORK*NB, 1 ) + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DGETRI', ' ', N, -1, -1, -1 ) ) + END IF + ELSE + IWS = N + END IF +* +* Solve the equation inv(A)*L = inv(U) for inv(A). +* + IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN +* +* Use unblocked code. +* + DO 20 J = N, 1, -1 +* +* Copy current column of L to WORK and replace with zeros. +* + DO 10 I = J + 1, N + WORK( I ) = A( I, J ) + A( I, J ) = ZERO + 10 CONTINUE +* +* Compute current column of inv(A). +* + IF( J.LT.N ) + $ CALL DGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ), + $ LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 ) + 20 CONTINUE + ELSE +* +* Use blocked code. +* + NN = ( ( N-1 ) / NB )*NB + 1 + DO 50 J = NN, 1, -NB + JB = MIN( NB, N-J+1 ) +* +* Copy current block column of L to WORK and replace with +* zeros. +* + DO 40 JJ = J, J + JB - 1 + DO 30 I = JJ + 1, N + WORK( I+( JJ-J )*LDWORK ) = A( I, JJ ) + A( I, JJ ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* Compute current block column of inv(A). +* + IF( J+JB.LE.N ) + $ CALL DGEMM( 'No transpose', 'No transpose', N, JB, + $ N-J-JB+1, -ONE, A( 1, J+JB ), LDA, + $ WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA ) + CALL DTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB, + $ ONE, WORK( J ), LDWORK, A( 1, J ), LDA ) + 50 CONTINUE + END IF +* +* Apply column interchanges. +* + DO 60 J = N - 1, 1, -1 + JP = IPIV( J ) + IF( JP.NE.J ) + $ CALL DSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 ) + 60 CONTINUE +* + WORK( 1 ) = IWS + RETURN +* +* End of DGETRI +* + END --- octave-2.1.36/libcruft/lapack/zgetri.f.orig Tue May 28 15:02:23 2002 +++ octave-2.1.36/libcruft/lapack/zgetri.f Tue May 28 14:53:47 2002 at @ -0,0 +1,194 @@ + SUBROUTINE ZGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGETRI computes the inverse of a matrix using the LU factorization +* computed by ZGETRF. +* +* This method inverts U and then computes inv(A) by solving the system +* inv(A)*L = inv(U) for inv(A). +* +* Arguments +* ========= +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input/output) COMPLEX*16 array, dimension (LDA,N) +* On entry, the factors L and U from the factorization +* A = P*L*U as computed by ZGETRF. +* On exit, if INFO = 0, the inverse of the original matrix A. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* IPIV (input) INTEGER array, dimension (N) +* The pivot indices from ZGETRF; for 1<=i<=N, row i of the +* matrix was interchanged with row IPIV(i). +* +* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) +* On exit, if INFO=0, then WORK(1) returns the optimal LWORK. +* +* LWORK (input) INTEGER +* The dimension of the array WORK. LWORK >= max(1,N). +* For optimal performance LWORK >= N*NB, where NB is +* the optimal blocksize returned by ILAENV. +* +* If LWORK = -1, then a workspace query is assumed; the routine +* only calculates the optimal size of the WORK array, returns +* this value as the first entry of the WORK array, and no error +* message related to LWORK is issued by XERBLA. +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* > 0: if INFO = i, U(i,i) is exactly zero; the matrix is +* singular and its inverse could not be computed. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB, + $ NBMIN, NN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEMM, ZGEMV, ZSWAP, ZTRSM, ZTRTRI +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NB = ILAENV( 1, 'ZGETRI', ' ', N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -3 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGETRI', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form inv(U). If INFO > 0 from ZTRTRI, then U is singular, +* and the inverse is not computed. +* + CALL ZTRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = MAX( LDWORK*NB, 1 ) + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZGETRI', ' ', N, -1, -1, -1 ) ) + END IF + ELSE + IWS = N + END IF +* +* Solve the equation inv(A)*L = inv(U) for inv(A). +* + IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN +* +* Use unblocked code. +* + DO 20 J = N, 1, -1 +* +* Copy current column of L to WORK and replace with zeros. +* + DO 10 I = J + 1, N + WORK( I ) = A( I, J ) + A( I, J ) = ZERO + 10 CONTINUE +* +* Compute current column of inv(A). +* + IF( J.LT.N ) + $ CALL ZGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ), + $ LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 ) + 20 CONTINUE + ELSE +* +* Use blocked code. +* + NN = ( ( N-1 ) / NB )*NB + 1 + DO 50 J = NN, 1, -NB + JB = MIN( NB, N-J+1 ) +* +* Copy current block column of L to WORK and replace with +* zeros. +* + DO 40 JJ = J, J + JB - 1 + DO 30 I = JJ + 1, N + WORK( I+( JJ-J )*LDWORK ) = A( I, JJ ) + A( I, JJ ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* Compute current block column of inv(A). +* + IF( J+JB.LE.N ) + $ CALL ZGEMM( 'No transpose', 'No transpose', N, JB, + $ N-J-JB+1, -ONE, A( 1, J+JB ), LDA, + $ WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA ) + CALL ZTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB, + $ ONE, WORK( J ), LDWORK, A( 1, J ), LDA ) + 50 CONTINUE + END IF +* +* Apply column interchanges. +* + DO 60 J = N - 1, 1, -1 + JP = IPIV( J ) + IF( JP.NE.J ) + $ CALL ZSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 ) + 60 CONTINUE +* + WORK( 1 ) = IWS + RETURN +* +* End of ZGETRI +* + END --- octave-2.1.36/libcruft/lapack/dgecon.f.orig Tue May 28 15:02:26 2002 +++ octave-2.1.36/libcruft/lapack/dgecon.f Tue May 28 14:53:47 2002 at @ -0,0 +1,181 @@ + SUBROUTINE DGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* February 29, 1992 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER INFO, LDA, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* DGECON estimates the reciprocal of the condition number of a general +* real matrix A, in either the 1-norm or the infinity-norm, using +* the LU factorization computed by DGETRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as +* RCOND = 1 / ( norm(A) * norm(inv(A)) ). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) DOUBLE PRECISION array, dimension (LDA,N) +* The factors L and U from the factorization A = P*L*U +* as computed by DGETRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* ANORM (input) DOUBLE PRECISION +* If NORM = '1' or 'O', the 1-norm of the original matrix A. +* If NORM = 'I', the infinity-norm of the original matrix A. +* +* RCOND (output) DOUBLE PRECISION +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(norm(A) * norm(inv(A))). +* +* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) +* +* IWORK (workspace) INTEGER array, dimension (N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ONENRM + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + DOUBLE PRECISION AINVNM, SCALE, SL, SMLNUM, SU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IDAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLACON, DLATRS, DRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGECON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = DLAMCH( 'Safe minimum' ) +* +* Estimate the norm of inv(A). +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL DLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(L). +* + CALL DLATRS( 'Lower', 'No transpose', 'Unit', NORMIN, N, A, + $ LDA, WORK, SL, WORK( 2*N+1 ), INFO ) +* +* Multiply by inv(U). +* + CALL DLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SU, WORK( 3*N+1 ), INFO ) + ELSE +* +* Multiply by inv(U'). +* + CALL DLATRS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, A, + $ LDA, WORK, SU, WORK( 3*N+1 ), INFO ) +* +* Multiply by inv(L'). +* + CALL DLATRS( 'Lower', 'Transpose', 'Unit', NORMIN, N, A, + $ LDA, WORK, SL, WORK( 2*N+1 ), INFO ) + END IF +* +* Divide X by 1/(SL*SU) if doing so will not cause overflow. +* + SCALE = SL*SU + NORMIN = 'Y' + IF( SCALE.NE.ONE ) THEN + IX = IDAMAX( N, WORK, 1 ) + IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL DRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE + RETURN +* +* End of DGECON +* + END --- octave-2.1.36/libcruft/lapack/zgecon.f.orig Tue May 28 15:02:32 2002 +++ octave-2.1.36/libcruft/lapack/zgecon.f Tue May 28 14:53:47 2002 at @ -0,0 +1,189 @@ + SUBROUTINE ZGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, RWORK, + $ INFO ) +* +* -- LAPACK routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* March 31, 1993 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER INFO, LDA, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +* ZGECON estimates the reciprocal of the condition number of a general +* complex matrix A, in either the 1-norm or the infinity-norm, using +* the LU factorization computed by ZGETRF. +* +* An estimate is obtained for norm(inv(A)), and the reciprocal of the +* condition number is computed as +* RCOND = 1 / ( norm(A) * norm(inv(A)) ). +* +* Arguments +* ========= +* +* NORM (input) CHARACTER*1 +* Specifies whether the 1-norm condition number or the +* infinity-norm condition number is required: +* = '1' or 'O': 1-norm; +* = 'I': Infinity-norm. +* +* N (input) INTEGER +* The order of the matrix A. N >= 0. +* +* A (input) COMPLEX*16 array, dimension (LDA,N) +* The factors L and U from the factorization A = P*L*U +* as computed by ZGETRF. +* +* LDA (input) INTEGER +* The leading dimension of the array A. LDA >= max(1,N). +* +* ANORM (input) DOUBLE PRECISION +* If NORM = '1' or 'O', the 1-norm of the original matrix A. +* If NORM = 'I', the infinity-norm of the original matrix A. +* +* RCOND (output) DOUBLE PRECISION +* The reciprocal of the condition number of the matrix A, +* computed as RCOND = 1/(norm(A) * norm(inv(A))). +* +* WORK (workspace) COMPLEX*16 array, dimension (2*N) +* +* RWORK (workspace) DOUBLE PRECISION array, dimension (2*N) +* +* INFO (output) INTEGER +* = 0: successful exit +* < 0: if INFO = -i, the i-th argument had an illegal value +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ONENRM + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + DOUBLE PRECISION AINVNM, SCALE, SL, SMLNUM, SU + COMPLEX*16 ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IZAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDRSCL, ZLACON, ZLATRS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGECON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = DLAMCH( 'Safe minimum' ) +* +* Estimate the norm of inv(A). +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL ZLACON( N, WORK( N+1 ), WORK, AINVNM, KASE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(L). +* + CALL ZLATRS( 'Lower', 'No transpose', 'Unit', NORMIN, N, A, + $ LDA, WORK, SL, RWORK, INFO ) +* +* Multiply by inv(U). +* + CALL ZLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SU, RWORK( N+1 ), INFO ) + ELSE +* +* Multiply by inv(U'). +* + CALL ZLATRS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, A, LDA, WORK, SU, RWORK( N+1 ), + $ INFO ) +* +* Multiply by inv(L'). +* + CALL ZLATRS( 'Lower', 'Conjugate transpose', 'Unit', NORMIN, + $ N, A, LDA, WORK, SL, RWORK, INFO ) + END IF +* +* Divide X by 1/(SL*SU) if doing so will not cause overflow. +* + SCALE = SL*SU + NORMIN = 'Y' + IF( SCALE.NE.ONE ) THEN + IX = IZAMAX( N, WORK, 1 ) + IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL ZDRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE + RETURN +* +* End of ZGECON +* + END --- octave-2.1.36/liboctave/dMatrix.cc.orig Tue May 28 14:49:03 2002 +++ octave-2.1.36/liboctave/dMatrix.cc Thu May 30 11:15:43 2002 at @ -74,6 +74,21 @@ const double&, double*, const int&, long, long); +#ifdef PREFER_LAPACK + int F77_FUNC (dgetrf, DGETRF) (const int&, const int&, double*, const int&, + int*, int&); + + int F77_FUNC (dgetrs, DGETRS) (const char*, const int&, const int&, + const double*, const int&, + const int*, double*, const int&, int&); + + int F77_FUNC (dgetri, DGETRI) (const int&, double*, const int&, const int*, + double*, const int&, int&); + + int F77_FUNC (dgecon, DGECON) (const char*, const int&, double*, + const int&, const double&, double&, + double*, int*, int&); +#else int F77_FUNC (dgeco, DGECO) (double*, const int&, const int&, int*, double&, double*); at @ -83,6 +98,7 @@ int F77_FUNC (dgedi, DGEDI) (double*, const int&, const int&, const int*, double*, double*, const int&); +#endif int F77_FUNC (dgelss, DGELSS) (const int&, const int&, const int&, double*, const int&, double*, at @ -579,35 +595,87 @@ Array ipvt (nr); int *pipvt = ipvt.fortran_vec (); +#ifdef PREFER_LAPACK + /* Can't be bothered calling ILAENV. Block size of 64 should be good */ + int lwork = 64 * nr; + Array z (lwork); + double *pz = z.fortran_vec (); + Array iz (lwork); + int *piz = iz.fortran_vec (); +#else Array z (nr); double *pz = z.fortran_vec (); +#endif retval = *this; double *tmp_data = retval.fortran_vec (); +#ifdef PREFER_LAPACK + /* Calculate the norm of the matrix, for later use */ + double anorm = retval.abs().sum().row(0).max(); + + F77_XFCN (dgetrf, DGETRF, (nc, nc, tmp_data, nr, pipvt, info)); + + if (f77_exception_encountered) + (*current_liboctave_error_handler) ("unrecoverable error in dgetrf"); +#else F77_XFCN (dgeco, DGECO, (tmp_data, nr, nc, pipvt, rcond, pz)); if (f77_exception_encountered) (*current_liboctave_error_handler) ("unrecoverable error in dgeco"); +#endif else { +#ifdef PREFER_LAPACK + /* Throw-away extra info LAPACK gives so as to not change output */ + rcond = 0.; + if ( info != 0) + info = -1; + else { + /* Now calculate the condition number for non-singular matrix */ + /* Can we get rid of this, or at least do it when needed ??? */ + char job = '1'; + F77_XFCN (dgecon, DGECON, ( &job, nc, tmp_data, nr, anorm, + rcond, pz, piz, info)); + + if (f77_exception_encountered) + (*current_liboctave_error_handler) + ("unrecoverable error in dgecon"); + + if ( info != 0) + info = -1; + } +#else volatile double rcond_plus_one = rcond + 1.0; if (rcond_plus_one == 1.0 || xisnan (rcond)) info = -1; +#endif if (info == -1 && ! force) retval = *this; // Restore matrix contents. else { - double *dummy = 0; +#ifdef PREFER_LAPACK + F77_XFCN (dgetri, DGETRI, (nc, tmp_data, nr, pipvt, + pz, lwork, info)); + + if (f77_exception_encountered) + (*current_liboctave_error_handler) + ("unrecoverable error in dgetri"); + + if ( info != 0) + info = -1; +#else + double *dummy = 0; F77_XFCN (dgedi, DGEDI, (tmp_data, nr, nc, pipvt, dummy, pz, 1)); if (f77_exception_encountered) (*current_liboctave_error_handler) ("unrecoverable error in dgedi"); +#endif } } } at @ -989,18 +1057,76 @@ Array ipvt (nr); int *pipvt = ipvt.fortran_vec (); +#ifdef PREFER_LAPACK + Array z (4*nr); + double *pz = z.fortran_vec (); + Array iz (nr); + int *piz = iz.fortran_vec (); +#else Array z (nr); double *pz = z.fortran_vec (); +#endif Matrix atmp = *this; double *tmp_data = atmp.fortran_vec (); +#ifdef PREFER_LAPACK + /* Calculate the norm of the matrix, for later use */ + double anorm = atmp.abs().sum().row(0).max(); + + F77_XFCN (dgetrf, DGETRF, (nr, nr, tmp_data, nr, pipvt, info)); + + if (f77_exception_encountered) + (*current_liboctave_error_handler) ("unrecoverable error in dgetrf"); +#else F77_XFCN (dgeco, DGECO, (tmp_data, nr, nr, pipvt, rcond, pz)); if (f77_exception_encountered) (*current_liboctave_error_handler) ("unrecoverable error in dgeco"); +#endif else { +#ifdef PREFER_LAPACK + /* Throw-away extra info LAPACK gives so as to not change output */ + rcond = 0.; + if ( info != 0) { + info = -1; + retval = DET (); + } else { + /* Now calculate the condition number for non-singular matrix */ + /* Can we get rid of this, or at least do it when needed ??? */ + char job = '1'; + F77_XFCN (dgecon, DGECON, ( &job, nc, tmp_data, nr, anorm, + rcond, pz, piz, info)); + + if (f77_exception_encountered) + (*current_liboctave_error_handler) + ("unrecoverable error in dgecon"); + else { + if ( info != 0) { + info = -1; + retval = DET (); + } else { + + double d[2] = { 1., 0.}; + for (int i=0; i= 10.) { + d[0] = 0.1 * d[0]; + d[1] = d[1] + 1.; + } + } + retval = DET (d); + } + } + } +#else volatile double rcond_plus_one = rcond + 1.0; if (rcond_plus_one == 1.0 || xisnan (rcond)) at @ -1020,6 +1146,7 @@ else retval = DET (d); } +#endif } } at @ -1066,18 +1193,64 @@ Array ipvt (nr); int *pipvt = ipvt.fortran_vec (); +#ifdef PREFER_LAPACK + /* Can't be bothered calling ILAENV. Block size of 64 should be good */ + int lwork = 64 * nr; + Array z (lwork); + double *pz = z.fortran_vec (); + Array iz (lwork); + int *piz = iz.fortran_vec (); +#else Array z (nr); double *pz = z.fortran_vec (); +#endif Matrix atmp = *this; double *tmp_data = atmp.fortran_vec (); +#ifdef PREFER_LAPACK + /* Calculate the norm of the matrix, for later use */ + double anorm = atmp.abs().sum().row(0).max(); + + F77_XFCN (dgetrf, DGETRF, (nr, nr, tmp_data, nr, pipvt, info)); + + if (f77_exception_encountered) + (*current_liboctave_error_handler) ("unrecoverable error in dgetrf"); +#else F77_XFCN (dgeco, DGECO, (tmp_data, nr, nr, pipvt, rcond, pz)); if (f77_exception_encountered) (*current_liboctave_error_handler) ("unrecoverable error in dgeco"); +#endif else { +#ifdef PREFER_LAPACK + /* Throw-away extra info LAPACK gives so as to not change output */ + rcond = 0.; + if ( info != 0) { + info = -2; + + if (sing_handler) + sing_handler (rcond); + else + (*current_liboctave_error_handler) + ("matrix singular to machine precision"); + + } else { + /* Now calculate the condition number for non-singular matrix */ + /* Can we get rid of this, or at least do it when needed ??? */ + char job = '1'; + F77_XFCN (dgecon, DGECON, ( &job, nc, tmp_data, nr, anorm, + rcond, pz, piz, info)); + + if (f77_exception_encountered) + (*current_liboctave_error_handler) + ("unrecoverable error in dgecon"); + + if ( info != 0) + info = -2; + +#endif volatile double rcond_plus_one = rcond + 1.0; if (rcond_plus_one == 1.0 || xisnan (rcond)) at @ -1098,6 +1271,16 @@ int b_nc = b.cols (); +#ifdef PREFER_LAPACK + char job = 'N'; + F77_XFCN (dgetrs, DGETRS, (&job, nr, b_nc, tmp_data, nr, pipvt, + result, b.rows(), info)); + + if (f77_exception_encountered) + (*current_liboctave_error_handler) + ("unrecoverable error in dgetrs"); + } +#else for (volatile int j = 0; j < b_nc; j++) { F77_XFCN (dgesl, DGESL, (tmp_data, nr, nr, pipvt, at @ -1111,6 +1294,7 @@ break; } } +#endif } } } at @ -1186,19 +1370,65 @@ Array ipvt (nr); int *pipvt = ipvt.fortran_vec (); +#ifdef PREFER_LAPACK + /* Can't be bothered calling ILAENV. Block size of 64 should be good */ + int lwork = 64 * nr; + Array z (lwork); + double *pz = z.fortran_vec (); + Array iz (lwork); + int *piz = iz.fortran_vec (); +#else Array z (nr); double *pz = z.fortran_vec (); +#endif Matrix atmp = *this; double *tmp_data = atmp.fortran_vec (); +#ifdef PREFER_LAPACK + /* Calculate the norm of the matrix, for later use */ + double anorm = atmp.abs().sum().row(0).max(); + + F77_XFCN (dgetrf, DGETRF, (nr, nr, tmp_data, nr, pipvt, info)); + + if (f77_exception_encountered) + (*current_liboctave_error_handler) + ("unrecoverable error in dgetrf"); +#else F77_XFCN (dgeco, DGECO, (tmp_data, nr, nr, pipvt, rcond, pz)); if (f77_exception_encountered) (*current_liboctave_error_handler) ("unrecoverable error in dgeco"); +#endif else { +#ifdef PREFER_LAPACK + /* Throw-away extra info LAPACK gives so as to not change output */ + rcond = 0.; + if ( info != 0) { + info = -2; + + if (sing_handler) + sing_handler (rcond); + else + (*current_liboctave_error_handler) + ("matrix singular to machine precision, rcond = %g", + rcond); + } else { + /* Now calculate the condition number for non-singular matrix */ + /* Can we get rid of this, or at least do it when needed ??? */ + char job = '1'; + F77_XFCN (dgecon, DGECON, ( &job, nc, tmp_data, nr, anorm, + rcond, pz, piz, info)); + + if (f77_exception_encountered) + (*current_liboctave_error_handler) + ("unrecoverable error in dgecon"); + + if ( info != 0) + info = -2; +#endif volatile double rcond_plus_one = rcond + 1.0; if (rcond_plus_one == 1.0 || xisnan (rcond)) at @ -1217,11 +1447,22 @@ retval = b; double *result = retval.fortran_vec (); +#ifdef PREFER_LAPACK + char job = 'N'; + F77_XFCN (dgetrs, DGETRS, (&job, nr, 1, tmp_data, nr, pipvt, + result, b.length(), info)); + + if (f77_exception_encountered) + (*current_liboctave_error_handler) + ("unrecoverable error in dgetrs"); + } +#else F77_XFCN (dgesl, DGESL, (tmp_data, nr, nr, pipvt, result, 0)); if (f77_exception_encountered) (*current_liboctave_error_handler) ("unrecoverable error in dgesl"); +#endif } } } --- octave-2.1.36/liboctave/CMatrix.cc.orig Tue May 28 15:17:32 2002 +++ octave-2.1.36/liboctave/CMatrix.cc Thu May 30 11:12:54 2002 at @ -59,6 +59,11 @@ #include "oct-fftw.h" #endif + +#ifdef PREFER_LAPACK +Matrix abs (ComplexMatrix x); +#endif + // Fortran functions we call. extern "C" at @ -79,6 +84,21 @@ const Complex&, Complex*, const int&, long, long); +#ifdef PREFER_LAPACK + int F77_FUNC (zgetrf, ZGETRF) (const int&, const int&, Complex*, const int&, + int*, int&); + + int F77_FUNC (zgetrs, ZGETRS) (const char*, const int&, const int&, + Complex*, const int&, + const int*, Complex*, const int&, int&); + + int F77_FUNC (zgetri, ZGETRI) (const int&, Complex*, const int&, const int*, + Complex*, const int&, int&); + + int F77_FUNC (zgecon, ZGECON) (const char*, const int&, Complex*, + const int&, const double&, double&, + Complex*, double*, int&); +#else int F77_FUNC (zgeco, ZGECO) (Complex*, const int&, const int&, int*, double&, Complex*); at @ -87,6 +107,7 @@ int F77_FUNC (zgesl, ZGESL) (Complex*, const int&, const int&, int*, Complex*, const int&); +#endif int F77_FUNC (zgelss, ZGELSS) (const int&, const int&, const int&, Complex*, const int&, Complex*, at @ -881,27 +902,78 @@ Array ipvt (nr); int *pipvt = ipvt.fortran_vec (); +#ifdef PREFER_LAPACK + /* Can't be bothered calling ILAENV. Block size of 64 should be good */ + int lwork = 64 * nr; + Array z (lwork); + Complex *pz = z.fortran_vec (); + Array rz (lwork); + double *prz = rz.fortran_vec (); +#else Array z (nr); Complex *pz = z.fortran_vec (); +#endif retval = *this; Complex *tmp_data = retval.fortran_vec (); +#ifdef PREFER_LAPACK + /* Calculate the norm of the matrix, for later use */ + double anorm = abs(retval).sum().row(0).max(); + + F77_XFCN (zgetrf, ZGETRF, (nc, nc, tmp_data, nr, pipvt, info)); + + if (f77_exception_encountered) + (*current_liboctave_error_handler) ("unrecoverable error in zgetrf"); +#else F77_XFCN (zgeco, ZGECO, (tmp_data, nr, nc, pipvt, rcond, pz)); if (f77_exception_encountered) (*current_liboctave_error_handler) ("unrecoverable error in zgeco"); +#endif else { +#ifdef PREFER_LAPACK + /* Throw-away extra info LAPACK gives so as to not change output */ + rcond = 0.; + if ( info != 0) + info = -1; + else { + /* Now calculate the condition number for non-singular matrix */ + /* Can we get rid of this, or at least do it when needed ??? */ + char job = '1'; + F77_XFCN (zgecon, ZGECON, ( &job, nc, tmp_data, nr, anorm, + rcond, pz, prz, info)); + + if (f77_exception_encountered) + (*current_liboctave_error_handler) + ("unrecoverable error in zgecon"); + + if ( info != 0) + info = -1; + } +#else volatile double rcond_plus_one = rcond + 1.0; if (rcond_plus_one == 1.0 || xisnan (rcond)) info = -1; +#endif if (info == -1 && ! force) retval = *this; // Restore contents. else { +#ifdef PREFER_LAPACK + F77_XFCN (zgetri, ZGETRI, (nc, tmp_data, nr, pipvt, + pz, lwork, info)); + + if (f77_exception_encountered) + (*current_liboctave_error_handler) + ("unrecoverable error in zgetri"); + + if ( info != 0) + info = -1; +#else Complex *dummy = 0; F77_XFCN (zgedi, ZGEDI, (tmp_data, nr, nc, pipvt, dummy, at @ -910,6 +982,7 @@ if (f77_exception_encountered) (*current_liboctave_error_handler) ("unrecoverable error in zgedi"); +#endif } } } at @ -1293,18 +1366,75 @@ Array ipvt (nr); int *pipvt = ipvt.fortran_vec (); +#ifdef PREFER_LAPACK + Array z (2*nr); + Complex *pz = z.fortran_vec (); + Array rz (2*nr); + double *prz = rz.fortran_vec (); +#else Array z (nr); Complex *pz = z.fortran_vec (); +#endif ComplexMatrix atmp = *this; Complex *tmp_data = atmp.fortran_vec (); +#ifdef PREFER_LAPACK + /* Calculate the norm of the matrix, for later use */ + double anorm = abs(atmp).sum().row(0).max(); + + F77_XFCN (zgetrf, ZGETRF, (nr, nc, tmp_data, nr, pipvt, info)); + + if (f77_exception_encountered) + (*current_liboctave_error_handler) ("unrecoverable error in zgetrf"); +#else F77_XFCN (zgeco, ZGECO, (tmp_data, nr, nr, pipvt, rcond, pz)); if (f77_exception_encountered) (*current_liboctave_error_handler) ("unrecoverable error in zgeco"); +#endif else { +#ifdef PREFER_LAPACK + /* Throw-away extra info LAPACK gives so as to not change output */ + rcond = 0.; + if ( info != 0) { + info = -1; + retval = ComplexDET (); + } else { + /* Now calculate the condition number for non-singular matrix */ + /* Can we get rid of this, or at least do it when needed ??? */ + char job = '1'; + F77_XFCN (zgecon, ZGECON, ( &job, nc, tmp_data, nr, anorm, + rcond, pz, prz, info)); + + if (f77_exception_encountered) + (*current_liboctave_error_handler) + ("unrecoverable error in zgecon"); + else { + if ( info != 0) { + info = -1; + retval = ComplexDET (); + } else { + Complex d[2] = { 1., 0.}; + for (int i=0; i= 10.) { + d[0] = 0.1 * d[0]; + d[1] = d[1] + 1.; + } + } + retval = ComplexDET (d); + } + } + } +#else volatile double rcond_plus_one = rcond + 1.0; if (rcond_plus_one == 1.0 || xisnan (rcond)) at @ -1320,10 +1450,11 @@ if (f77_exception_encountered) (*current_liboctave_error_handler) - ("unrecoverable error in dgedi"); + ("unrecoverable error in zgedi"); else retval = ComplexDET (d); } +#endif } } at @ -1399,18 +1530,64 @@ Array ipvt (nr); int *pipvt = ipvt.fortran_vec (); +#ifdef PREFER_LAPACK + /* Can't be bothered calling ILAENV. Block size of 64 should be good */ + int lwork = 64 * nr; + Array z (lwork); + Complex *pz = z.fortran_vec (); + Array rz (lwork); + double *prz = rz.fortran_vec (); +#else Array z (nr); Complex *pz = z.fortran_vec (); +#endif ComplexMatrix atmp = *this; Complex *tmp_data = atmp.fortran_vec (); +#ifdef PREFER_LAPACK + /* Calculate the norm of the matrix, for later use */ + double anorm = abs(atmp).sum().row(0).max(); + + F77_XFCN (zgetrf, ZGETRF, (nr, nr, tmp_data, nr, pipvt, info)); + + if (f77_exception_encountered) + (*current_liboctave_error_handler) ("unrecoverable error in zgetrf"); +#else F77_XFCN (zgeco, ZGECO, (tmp_data, nr, nr, pipvt, rcond, pz)); if (f77_exception_encountered) (*current_liboctave_error_handler) ("unrecoverable error in zgeco"); +#endif else { +#ifdef PREFER_LAPACK + /* Throw-away extra info LAPACK gives so as to not change output */ + rcond = 0.; + if ( info != 0) { + info = -2; + + if (sing_handler) + sing_handler (rcond); + else + (*current_liboctave_error_handler) + ("matrix singular to machine precision"); + + } else { + /* Now calculate the condition number for non-singular matrix */ + /* Can we get rid of this, or at least do it when needed ??? */ + char job = '1'; + F77_XFCN (zgecon, ZGECON, ( &job, nc, tmp_data, nr, anorm, + rcond, pz, prz, info)); + + if (f77_exception_encountered) + (*current_liboctave_error_handler) + ("unrecoverable error in zgecon"); + + if ( info != 0) + info = -2; +#endif + volatile double rcond_plus_one = rcond + 1.0; if (rcond_plus_one == 1.0 || xisnan (rcond)) at @ -1431,6 +1608,16 @@ int b_nc = b.cols (); +#ifdef PREFER_LAPACK + char job = 'N'; + F77_XFCN (zgetrs, ZGETRS, (&job, nr, b_nc, tmp_data, nr, pipvt, + result, b.rows(), info)); + + if (f77_exception_encountered) + (*current_liboctave_error_handler) + ("unrecoverable error in zgetrs"); + } +#else for (volatile int j = 0; j < b_nc; j++) { F77_XFCN (zgesl, ZGESL, (tmp_data, nr, nr, pipvt, at @ -1439,11 +1626,12 @@ if (f77_exception_encountered) { (*current_liboctave_error_handler) - ("unrecoverable error in dgesl"); + ("unrecoverable error in zgesl"); break; } } +#endif } } } at @ -1521,19 +1709,66 @@ Array ipvt (nr); int *pipvt = ipvt.fortran_vec (); +#ifdef PREFER_LAPACK + /* Can't be bothered calling ILAENV. Block size of 64 should be good */ + int lwork = 64 * nr; + Array z (lwork); + Complex *pz = z.fortran_vec (); + Array rz (lwork); + double *prz = rz.fortran_vec (); +#else Array z (nr); Complex *pz = z.fortran_vec (); +#endif ComplexMatrix atmp = *this; Complex *tmp_data = atmp.fortran_vec (); +#ifdef PREFER_LAPACK + /* Calculate the norm of the matrix, for later use */ + double anorm = abs(atmp).sum().row(0).max(); + + F77_XFCN (zgetrf, ZGETRF, (nr, nr, tmp_data, nr, pipvt, info)); + + if (f77_exception_encountered) + (*current_liboctave_error_handler) + ("unrecoverable error in zgetrf"); +#else F77_XFCN (zgeco, ZGECO, (tmp_data, nr, nr, pipvt, rcond, pz)); if (f77_exception_encountered) (*current_liboctave_error_handler) ("unrecoverable error in zgeco"); +#endif else { +#ifdef PREFER_LAPACK + /* Throw-away extra info LAPACK gives so as to not change output */ + rcond = 0.; + if ( info != 0) { + info = -2; + + if (sing_handler) + sing_handler (rcond); + else + (*current_liboctave_error_handler) + ("matrix singular to machine precision, rcond = %g", + rcond); + } else { + /* Now calculate the condition number for non-singular matrix */ + /* Can we get rid of this, or at least do it when needed ??? */ + char job = '1'; + F77_XFCN (zgecon, ZGECON, ( &job, nc, tmp_data, nr, anorm, + rcond, pz, prz, info)); + + if (f77_exception_encountered) + (*current_liboctave_error_handler) + ("unrecoverable error in zgecon"); + + if ( info != 0) + info = -2; + +#endif volatile double rcond_plus_one = rcond + 1.0; if (rcond_plus_one == 1.0 || xisnan (rcond)) at @ -1552,11 +1787,22 @@ retval = b; Complex *result = retval.fortran_vec (); +#ifdef PREFER_LAPACK + char job = 'N'; + F77_XFCN (zgetrs, ZGETRS, (&job, nr, 1, tmp_data, nr, pipvt, + result, b.length(), info)); + + if (f77_exception_encountered) + (*current_liboctave_error_handler) + ("unrecoverable error in zgetrs"); + } +#else F77_XFCN (zgesl, ZGESL, (tmp_data, nr, nr, pipvt, result, 0)); if (f77_exception_encountered) (*current_liboctave_error_handler) - ("unrecoverable error in dgesl"); + ("unrecoverable error in zgesl"); +#endif } } } at @ -2482,6 +2728,23 @@ #undef COL_EXPR } + +#ifdef PREFER_LAPACK +Matrix abs (ComplexMatrix x) +{ + int nr = x.rows (); + int nc = x.cols (); + + Matrix retval (nr, nc); + + for (int j = 0; j < nc; j++) + for (int i = 0; i < nr; i++) + retval (i, j) = abs (x (i, j)); + + return retval; +} +#endif + ComplexColumnVector ComplexMatrix::diag (void) const { at @ -2595,7 +2858,7 @@ bool real_only = row_is_real_only (i); double abs_min = real_only ? real (tmp_min) : abs (tmp_min); - + if (xisnan (tmp_min)) idx_j = -1; else --- octave-2.1.36/configure.in.orig Tue May 28 15:51:55 2002 +++ octave-2.1.36/configure.in Tue May 28 16:15:26 2002 at @ -327,6 +327,19 @@ ;; esac +### Disable LINPACK completely + +WITH_LINPACK=yes +AC_ARG_WITH(linpack, + [ --without-linpack don't use LINPACK], + WITH_LINPACK=$withval, WITH_LINPACK=yes) +LINPACK_DIR=linpack +if test $WITH_LINPACK = "no"; then + LINPACK_DIR= + OCTAVE_CXX_FLAG(-DPREFER_LAPACK) +fi +AC_SUBST(LINPACK_DIR) + ### Check for HDF5 library. ### XXX FIXME XXX -- how can user specify a set of libraries here? --7JfCtLOvnd9MIVvH--