From octave-sources-request at bevo dot che dot wisc dot edu Wed Mar 3 09:21:47 2004 Subject: oct versions of BFGSMin From: Michael Creel To: octave-sources at bevo dot che dot wisc dot edu Date: Wed, 03 Mar 2004 16:12:33 +0100 --Boundary_(ID_Czj9YPx5Xm8PqR24vS7v7Q) Content-type: text/plain; charset=us-ascii Content-transfer-encoding: 7BIT Content-disposition: inline Hello, I've spent the last few days learning how to create oct files, and have some results. The attached files allow one to do bfgs minimization of a function, using numeric derivatives. Any comments or suggestions, especially about how to do type checking, would be welcome. These are preliminary versions. They work, but I expect to have more polished versions soon. To use them (with linux, don't know about windows), type "make all" from the shell prompt. Then type "octave ExampleBFGSMin.m" Regards, Michael --Boundary_(ID_Czj9YPx5Xm8PqR24vS7v7Q) Content-type: text/x-makefile; name=Makefile; charset=us-ascii Content-transfer-encoding: 7BIT Content-disposition: attachment; filename=Makefile # Makefile optimization stuff .phony: all all: BisectionStep.oct NewtonStep.oct NumGradient.oct BFGSMin.oct %.oct: %.cc mkoctfile $< --Boundary_(ID_Czj9YPx5Xm8PqR24vS7v7Q) Content-type: text/x-c++src; name=BFGSMin.cc; charset=us-ascii Content-transfer-encoding: 7BIT Content-disposition: attachment; filename=BFGSMin.cc // ============================ BFGSMin.m ===================================== // // Minimize a function of the form // // [obj_value, ...] = f(arg, otherargs) // // or // // [obj_value, ...] = f(arg) // // with respect to arg. The first return of f() must be the value of f(). // f() may have other returns, but they are not used here. // // Calling syntax: // // [theta, obj_value, iters, convergence] = BFGSMin(f, arg, otherargs, control) // or // [theta, obj_value, iters, convergence] = BFGSMin(f, arg, {1}, control) // // NOTE: the last argument (control) is optional // // // Input arguments: // // REQUIRED // f - (string) the function to minimize // arg - the arg we min with respect to, a vector // otherargs - a cell array of other arguments of func, or a placeholder, e.g., {1} // // OPTIONAL // control - 3x1 vector // * 1st elem. controls maximum iterations // scalar > 0 - max. iters // or -1 for infinity (default) // * 2nd elem. is verbosity control // 0 = no results printed (default) // 1 = intermediate results // 2 = only last iteration results // * 3rd elem. specifies convergence criterion // 1 = function, gradient, and parameter change // are all tested (default) // 0 = only function conv tested // // Outputs: // theta - the minimizer // obj_value - the minimized funtion value // iters - number of iterations used // convergence (1 = success, 0 = failure) //#include #include #include DEFUN_DLD(BFGSMin, args, , "BFGSMin.cc") { std::string f = args(0).string_value(); Matrix theta = args(1).matrix_value(); octave_value otherargs = args(2); octave_value_list f_args(2,1); octave_value_list g_args = args; octave_value_list step_args(4,1); octave_value_list f_return; int criterion, max_iters, convergence, verbosity, iter; int gradient_failed, i, j, conv1, conv2, conv3; int k = theta.rows(); double func_tol, param_tol, gradient_tol, stepsize, obj_value; double last_obj_value, denominator, test, tempscalar; Matrix control, thetain, thetanew, H, g, d, g_new, p, q, temp, H1, H2; f_args(0) = theta; f_args(1) = otherargs; step_args(0) = f; // the 4 elements are func, direction, theta, otherargs step_args(3) = otherargs; // Check number of args if (args.length() > 4) error("\nBFGSMin: you have provided more than 4 arguments\n"); if (args.length() < 3) error("\nBFGSMin: you have provided fewer than 3 arguments\n"); // Type checking: not done yet, since I don't know how to do it // // Check types of required arguments // if !ischar(func) error("\nBFGSMin: first argument must be a string that gives name of objective function\n"); endif // if !isvector(theta) error("\nBFGSMin: second argument must be a vector\n"); endif // if !iscell(otherargs) error("\nBFGSMin: third argument must be a cell array\n\ // (even if it's just placeholder such as {1}\n"); endif // if (!is_matrix_type(control)) error("\nBFGSMin: If 4 arguments passed, the 4th must be control vector\n"); endif // if (control.rows() != 3) error("\nBFGSMin: control (4th argument) must be a 3x1 vector\n"); endif // tolerances func_tol = 10*sqrt(2.22e-16); param_tol = 1e-5; gradient_tol = 1e-4; // Default values for controls max_iters = INT_MAX; // no limit on iterations verbosity = 0; // by default don't report results to screen criterion = 1; // strong convergence required // use provided controls, if applicable if (args.length() == 4) { control = args(3).matrix_value(); max_iters = (int) control(0); if (max_iters == -1) max_iters = INT_MAX; verbosity = (int) control(1); criterion = (int) control(2); } // initialize things convergence = -1; // if this doesn't change, it means that maxiters were exceeded thetain = theta; H = identity_matrix(k,k); // Initial obj_value f_return = feval(f, f_args); last_obj_value = f_return(0).double_value(); // initial gradient f_return = feval("NumGradient", args); g = f_return(0).matrix_value(); g = g.transpose(); // check that gradient is ok gradient_failed = 0; // test = 1 means gradient failed for (i=0; i 1.0) { conv1 = (fabs(obj_value - last_obj_value)/fabs(last_obj_value)) < func_tol; } else { conv1 = fabs(obj_value - last_obj_value) < func_tol; } // parameter change convergence temp = theta.transpose() * theta; test = sqrt(temp(0,0)); if (test > 1) { temp = p.transpose() * p; conv2 = (temp(0,0) / test) < param_tol ; } else { temp = p.transpose() * p; conv2 = temp(0,0) < param_tol; } // gradient convergence temp = g.transpose() * g; conv3 = sqrt(temp(0,0)) < gradient_tol; // Want intermediate results? if (verbosity == 1) { printf("\nBFGSMin intermediate results: Iteration %d\n",iter); printf("Stepsize %8.7f\n", stepsize); printf("Objective function value %16.10f\n", last_obj_value); printf("Function conv %d Param conv %d Gradient conv %d\n", conv1, conv2, conv3); printf(" params gradient change\n"); for (j = 0; j #include DEFUN_DLD(BisectionStep, args, , "BisectionStep.cc") { std::string func = args(0).string_value(); Matrix dx = args(1).matrix_value(); Matrix x = args(2).matrix_value(); octave_value otherargs = args(3); octave_value_list f_args(2,1); f_args(1) = otherargs; double obj_0, obj, a; octave_value_list f_return; octave_value_list stepobj(2,1); f_args(0) = x; Matrix x_in = x; // possibly function returns a cell array // obj. value will be in first position f_return = feval(func, f_args); obj_0 = f_return(0).double_value(); a = 1.0; // this first loop goes until an improvement is found while (a > 4e-16) // limit iterations { f_args(0) = x + a*dx; f_return = feval(func, f_args); obj = f_return(0).double_value(); // reduce stepsize if worse, or if function can't be evaluated if ((obj > obj_0) || isnan(obj)) { a = 0.5 * a; } else { obj_0 = obj; break; } } // now keep going until we no longer improve, or reach max trials while (a > 4e-16) { a = 0.5*a; f_args(0) = x + a*dx; f_return = feval(func, f_args); obj = f_return(0).double_value(); // if improved, record new best and try another step if ((obj < obj_0) & !isnan(obj)) { obj_0 = obj; } else { a = a / 0.5; // put it back to best found break; } } stepobj(0) = a; stepobj(1) = obj_0; return octave_value_list(stepobj); } --Boundary_(ID_Czj9YPx5Xm8PqR24vS7v7Q) Content-type: text/x-c++src; name=NumGradient.cc; charset=us-ascii Content-transfer-encoding: 7BIT Content-disposition: attachment; filename=NumGradient.cc // ============================= NumGradient ============================= // Central difference gradient of a function of the form // f(theta, otherargs), where theta is a vector and otherargs is a cell array. // // The gradient is respect to the FIRST element of args. // // * Allows diff. of vector-valued function // * Uses systematic finite difference // // See ExampleNumGradient.m #include #include DEFUN_DLD(NumGradient, args, ,"NumGradient, C++ version\n\ Numeric gradient via central difference") { std::string f = args(0).string_value(); Matrix parameter = args(1).matrix_value(); octave_value otherargs = args(2); octave_value_list f_args(2,1); f_args(0) = parameter; f_args(1) = otherargs; Matrix obj_value, obj_left, obj_right; octave_value_list f_return; double p, d, delta, delta_right, delta_left; int i, j; // possibly function return cell array // obj. value will be in first position f_return = feval(f, f_args); obj_value = f_return(0).matrix_value(); const int n = obj_value.rows(); const int k = parameter.rows(); Matrix derivative(n, k); Matrix columnj; for (j=0; j #include DEFUN_DLD(NewtonStep, args, , "NewtonStep.cc") { std::string func = args(0).string_value(); Matrix dx = args(1).matrix_value(); Matrix x = args(2).matrix_value(); octave_value otherargs = args(3); octave_value_list f_args(2,1); f_args(1) = otherargs; double obj, obj_0, obj_left, obj_right, delta, a, gradient, hessian; octave_value_list f_return; octave_value_list stepobj(2,1); f_args(0) = x; Matrix x_in = x; gradient = 1.0; // possibly function return cell array // obj. value will be in first position f_return = feval(func, f_args); obj = f_return(0).double_value(); obj_0 = obj; delta = 0.001; // experimentation show that this is a good choice Matrix x_right = x + delta*dx; Matrix x_left = x - delta*dx; // possibly function return cell array // obj. value will be in first position f_args(0) = x_right; f_return = feval(func, f_args); obj_right = f_return(0).double_value(); f_args(0) = x_left; f_return = feval(func, f_args); obj_left = f_return(0).double_value(); gradient = (obj_right - obj_left) / (2*delta); // take central difference hessian = (obj_right - 2*obj + obj_left) / pow(delta, 2.0); hessian = fabs(hessian); // ensures we're going in a decreasing direction if (hessian <= 2e-16) hessian = 1.0; // avoid div by zero a = - gradient / hessian; // hessian inverse gradient: the Newton step a = (a < 5.0)*a + 5.0*(a>=5.0); // Let's avoid extreme steps that might cause crashes // check that this is improvement f_args(0) = x + a*dx; f_return = feval(func, f_args); obj = f_return(0).double_value(); // if not, fall back to bisection if ((obj > obj_0) || isnan(obj)) { f_return = feval("BisectionStep", args); a = f_return(0).double_value(); obj = f_return(1).double_value(); } stepobj(0) = a; stepobj(1) = obj; return octave_value_list(stepobj); } --Boundary_(ID_Czj9YPx5Xm8PqR24vS7v7Q) Content-type: text/octave; name=ExampleBFGSMin.m; charset=us-ascii Content-transfer-encoding: 7BIT Content-disposition: attachment; filename=ExampleBFGSMin.m 1; # This shows how to call BFGSMin.m # This is just used to form objective functions, not important # Function value and gradient vector of the rosenbrock function # The minimizer is at the vector (1,1,..,1), # and the minimized value is 0. function [obj_value, gradient] = rosenbrock(x); dimension = length(x); obj_value = sum(100*(x(2:dimension)-x(1:dimension-1).^2).^2 + (1-x(1:dimension-1)).^2); if nargout > 1 gradient = zeros(dimension, 1); gradient(1:dimension-1) = - 400*x(1:dimension-1).*(x(2:dimension)-x(1:dimension-1).^2) - 2*(1-x(1:dimension-1)); gradient(2:dimension) = gradient(2:dimension) + 200*(x(2:dimension)-x(1:dimension-1).^2); endif endfunction # example obj. fn. - illustrates use of "otherargs" function obj_value = objective(theta, otherargs) location = otherargs{1}; x = theta - location + ones(rows(theta),1); # move minimizer to "location" obj_value = rosenbrock(x); endfunction # example obj. fn. - illustrates calling BFGSMin with 2 arguments function obj_value = objective2(theta) # location defined internally, since not passed dim = rows(theta) - 1; location = 5*(0:dim)/dim; location = location'; x = theta - location + ones(rows(theta),1); # move minimizer to "location" obj_value = rosenbrock(x); endfunction # control options and initial value control = [1000;2;1]; # max 1000 iterations; only report results of last iteration; strong convergence required dim = 10; # dimension of Rosenbrock function theta = zeros(dim+1,1); # starting values location = 5*(0:dim)/dim; location = location'; # do the minimization printf("If this was successful, the minimizer should be\n"); printf("a vector of even steps from 0 to 5\n\n"); printf("BFGSMin - 4 input args\n"); theta = theta - theta; t=cputime(); [theta, obj_value, iterations, convergence] = BFGSMin("objective", theta, {location}, control); t = cputime() - t; printf("Elapsed time = %f\n",t); printf("BFGSMin - 4 input args (dummy otherargs, control supplied)\n"); theta = theta - theta; t=cputime(); [theta, obj_value, iterations, convergence] = BFGSMin("objective2", theta, {1}, control); t = cputime() - t; printf("Elapsed time = %f\n",t); printf("BFGSMin - 3 input args (dummy otherargs, control not supplied)\n"); theta = theta - theta; t=cputime(); [theta, obj_value, iterations, convergence] = BFGSMin("objective", theta, {location}); t = cputime() - t; printf("check that this last worked, since it doesn't print results\n"); theta printf("Elapsed time = %f\n",t); --Boundary_(ID_Czj9YPx5Xm8PqR24vS7v7Q)--