Scythe-1.0.3
|
00001 /* 00002 * Scythe Statistical Library Copyright (C) 2000-2002 Andrew D. Martin 00003 * and Kevin M. Quinn; 2002-present Andrew D. Martin, Kevin M. Quinn, 00004 * and Daniel Pemstein. All Rights Reserved. 00005 * 00006 * This program is free software; you can redistribute it and/or 00007 * modify under the terms of the GNU General Public License as 00008 * published by Free Software Foundation; either version 2 of the 00009 * License, or (at your option) any later version. See the text files 00010 * COPYING and LICENSE, distributed with this source code, for further 00011 * information. 00012 * -------------------------------------------------------------------- 00013 * scythestat/optimize.h 00014 * 00015 */ 00016 00048 #ifndef SCYTHE_OPTIMIZE_H 00049 #define SCYTHE_OPTIMIZE_H 00050 00051 #ifdef SCYTHE_COMPILE_DIRECT 00052 #include "matrix.h" 00053 #include "algorithm.h" 00054 #include "error.h" 00055 #include "rng.h" 00056 #include "distributions.h" 00057 #include "la.h" 00058 #include "ide.h" 00059 #include "smath.h" 00060 #include "stat.h" 00061 #else 00062 #include "scythestat/matrix.h" 00063 #include "scythestat/algorithm.h" 00064 #include "scythestat/error.h" 00065 #include "scythestat/rng.h" 00066 #include "scythestat/distributions.h" 00067 #include "scythestat/la.h" 00068 #include "scythestat/ide.h" 00069 #include "scythestat/smath.h" 00070 #include "scythestat/stat.h" 00071 #endif 00072 00073 /* We want to use an anonymous namespace to make the following consts 00074 * and functions local to this file, but mingw doesn't play nice with 00075 * anonymous namespaces so we do things differently when using the 00076 * cross-compiler. 00077 */ 00078 #ifdef __MINGW32__ 00079 #define SCYTHE_MINGW32_STATIC static 00080 #else 00081 #define SCYTHE_MINGW32_STATIC 00082 #endif 00083 00084 namespace scythe { 00085 #ifndef __MINGW32__ 00086 namespace { 00087 #endif 00088 00089 /* Functions (private to this file) that do very little... */ 00090 template <typename T, matrix_order O, matrix_style S> 00091 SCYTHE_MINGW32_STATIC T donothing (const Matrix<T,O,S>& x) 00092 { 00093 return (T) 0.0; 00094 } 00095 00096 template <typename T> 00097 SCYTHE_MINGW32_STATIC T donothing (T& x) 00098 { 00099 return (T) 0.0; 00100 } 00101 #ifndef __MINGW32__ 00102 } 00103 #endif 00104 00105 00106 /* Return the machine epsilon 00107 * Notes: Algorithm taken from Sedgewick, Robert. 1992. Algorithms 00108 * in C++. Addison Wesley. pg. 561 00109 */ 00116 template <typename T> 00117 T 00118 epsilon() 00119 { 00120 T eps, del, neweps; 00121 del = (T) 0.5; 00122 eps = (T) 0.0; 00123 neweps = (T) 1.0; 00124 00125 while ( del > 0 ) { 00126 if ( 1 + neweps > 1 ) { /* Then the value might be too large */ 00127 eps = neweps; /* ...save the current value... */ 00128 neweps -= del; /* ...and decrement a bit */ 00129 } else { /* Then the value is too small */ 00130 neweps += del; /* ...so increment it */ 00131 } 00132 del *= 0.5; /* Reduce the adjustment by half */ 00133 } 00134 00135 return eps; 00136 } 00137 00163 template <typename T, typename FUNCTOR> 00164 T intsimp (FUNCTOR fun, T a, T b, unsigned int N) 00165 { 00166 SCYTHE_CHECK_10(a > b, scythe_invalid_arg, 00167 "Lower limit larger than upper"); 00168 00169 T I = (T) 0; 00170 T w = (b - a) / N; 00171 for (unsigned int i = 1; i <= N; ++i) 00172 I += w * (fun(a +(i - 1) *w) + 4 * fun(a - w / 2 + i * w) + 00173 fun(a + i * w)) / 6; 00174 00175 return I; 00176 } 00177 00206 template <typename T, typename FUNCTOR> 00207 T adaptsimp(FUNCTOR fun, T a, T b, unsigned int N, double tol = 1e-5) 00208 { 00209 SCYTHE_CHECK_10(a > b, scythe_invalid_arg, 00210 "Lower limit larger than upper"); 00211 00212 T I = intsimp(fun, a, b, N); 00213 if (std::fabs(I - intsimp(fun, a, b, N / 2)) > tol) 00214 return adaptsimp(fun, a, (a + b) / 2, N, tol) 00215 + adaptsimp(fun, (a + b) / 2, b, N, tol); 00216 00217 return I; 00218 } 00219 00247 template <matrix_order RO, matrix_style RS, typename T, 00248 matrix_order PO, matrix_style PS, typename FUNCTOR> 00249 Matrix<T, RO, RS> 00250 gradfdif (FUNCTOR fun, const Matrix<T,PO,PS>& theta) 00251 00252 { 00253 SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error, 00254 "Theta not column vector"); 00255 00256 unsigned int k = theta.size(); 00257 T h = std::sqrt(epsilon<T>()); 00258 h = std::sqrt(h); 00259 00260 Matrix<T,RO,RS> grad(k, 1); 00261 Matrix<T,RO> e; 00262 Matrix<T,RO> temp; 00263 for (unsigned int i = 0; i < k; ++i) { 00264 e = Matrix<T,RO>(k, 1); 00265 e[i] = h; 00266 temp = theta + e; 00267 donothing(temp); // XXX I don't understand this 00268 e = temp - theta; 00269 grad[i] = (fun(theta + e) - fun(theta)) / e[i]; 00270 } 00271 00272 return grad; 00273 } 00274 00275 // Default template version 00276 template <typename T, matrix_order O, matrix_style S, 00277 typename FUNCTOR> 00278 Matrix<T, O, Concrete> 00279 gradfdif (FUNCTOR fun, const Matrix<T,O,S>& theta) 00280 { 00281 return gradfdif<O,Concrete>(fun, theta); 00282 } 00283 00314 template <typename T, matrix_order PO1, matrix_style PS1, 00315 matrix_order PO2, matrix_style PS2, typename FUNCTOR> 00316 T 00317 gradfdifls (FUNCTOR fun, T alpha, const Matrix<T,PO1,PS1>& theta, 00318 const Matrix<T,PO2,PS2>& p) 00319 00320 { 00321 SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error, 00322 "Theta not column vector"); 00323 SCYTHE_CHECK_10(! p.isColVector(), scythe_dimension_error, 00324 "p not column vector"); 00325 00326 unsigned int k = theta.size(); 00327 T h = std::sqrt(epsilon<T>()); 00328 h = std::sqrt(h); 00329 //T h = std::sqrt(2.2e-16); 00330 00331 T deriv; 00332 00333 for (unsigned int i = 0; i < k; ++i) { 00334 T temp = alpha + h; 00335 donothing(temp); 00336 T e = temp - alpha; 00337 deriv = (fun(theta + (alpha + e) * p) - fun(theta + alpha * p)) 00338 / e; 00339 } 00340 00341 return deriv; 00342 } 00343 00370 template <matrix_order RO, matrix_style RS, typename T, 00371 matrix_order PO, matrix_style PS, typename FUNCTOR> 00372 Matrix<T,RO,RS> 00373 jacfdif (FUNCTOR fun, const Matrix<T,PO,PS>& theta) 00374 { 00375 SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error, 00376 "Theta not column vector"); 00377 00378 Matrix<T,RO> fval = fun(theta); 00379 unsigned int k = theta.rows(); 00380 unsigned int n = fval.rows(); 00381 00382 T h = std::sqrt(epsilon<T>()); //2.2e-16 00383 h = std::sqrt(h); 00384 00385 Matrix<T,RO,RS> J(n,k); 00386 Matrix<T,RO> e; 00387 Matrix<T,RO> temp; 00388 Matrix<T,RO> fthetae; 00389 Matrix<T,RO> ftheta; 00390 00391 for (int i = 0; i < k; ++i) { 00392 e = Matrix<T,RO>(k,1); 00393 e[i] = h; 00394 temp = theta + e; 00395 donothing(temp); 00396 e = temp - theta; 00397 fthetae = fun(theta + e); 00398 ftheta = fun(theta); 00399 for (unsigned int j = 0; j < n; ++j) { 00400 J(j,i) = (fthetae[j] - ftheta[j]) / e[i]; 00401 } 00402 } 00403 00404 return J; 00405 } 00406 00407 // default template 00408 template <typename T, matrix_order PO, matrix_style PS, 00409 typename FUNCTOR> 00410 Matrix<T,PO,PS> 00411 jacfdif (FUNCTOR fun, const Matrix<T,PO,PS>& theta) 00412 { 00413 return jacfdif<PO,Concrete>(fun, theta); 00414 } 00415 00416 00443 template <matrix_order RO, matrix_style RS, typename T, 00444 matrix_order PO, matrix_style PS, typename FUNCTOR> 00445 Matrix<T, RO, RS> 00446 hesscdif (FUNCTOR fun, const Matrix<T,PO,PS>& theta) 00447 { 00448 SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error, 00449 "Theta not column vector"); 00450 00451 T fval = fun(theta); 00452 00453 //std::cout << std::endl; 00454 //std::cout << "hesscdif theta = " << theta << "\n"; 00455 //std::cout << "hesscdif fun(theta) = " << fval << std::endl; 00456 00457 unsigned int k = theta.rows(); 00458 00459 // stepsize CAREFUL -- THIS IS MACHINE SPECIFIC !!!! 00460 T h2 = std::sqrt(epsilon<T>()); 00461 //T h2 = (T) 1e-10; 00462 T h = std::sqrt(h2); 00463 00464 Matrix<T, RO, RS> H(k,k); 00465 00466 //std::cout << "h2 = " << h2 << " h = " << h << std::endl; 00467 00468 Matrix<T,RO> ei; 00469 Matrix<T,RO> ej; 00470 Matrix<T,RO> temp; 00471 00472 for (unsigned int i = 0; i < k; ++i) { 00473 ei = Matrix<T,RO>(k, 1); 00474 ei[i] = h; 00475 temp = theta + ei; 00476 donothing(temp); // XXX Again, I'm baffled 00477 ei = temp - theta; 00478 for (unsigned int j = 0; j < k; ++j){ 00479 ej = Matrix<T,RO>(k,1); 00480 ej[j] = h; 00481 temp = theta + ej; 00482 donothing(temp); // XXX and again 00483 ej = temp - theta; 00484 00485 if (i == j) { 00486 H(i,i) = ( -fun(theta + 2.0 * ei) + 16.0 * 00487 fun(theta + ei) - 30.0 * fval + 16.0 * 00488 fun(theta - ei) - 00489 fun(theta - 2.0 * ei)) / (12.0 * h2); 00490 } else { 00491 H(i,j) = ( fun(theta + ei + ej) - fun(theta + ei - ej) 00492 - fun(theta - ei + ej) + fun(theta - ei - ej)) 00493 / (4.0 * h2); 00494 } 00495 } 00496 } 00497 00498 //std::cout << "end of hesscdif, H = " << H << "\n"; 00499 return H; 00500 } 00501 00502 // default template 00503 template <typename T, matrix_order PO, matrix_style PS, 00504 typename FUNCTOR> 00505 Matrix<T,PO,PS> 00506 hesscdif (FUNCTOR fun, const Matrix<T,PO,PS>& theta) 00507 { 00508 return hesscdif<PO,Concrete>(fun, theta); 00509 } 00510 00539 template <typename T, matrix_order PO1, matrix_style PS1, 00540 matrix_order PO2, matrix_style PS2, typename FUNCTOR> 00541 T linesearch1 (FUNCTOR fun, const Matrix<T,PO1,PS1>& theta, 00542 const Matrix<T,PO2,PS2>& p) 00543 { 00544 SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error, 00545 "Theta not column vector"); 00546 SCYTHE_CHECK_10(! p.isColVector(), scythe_dimension_error, 00547 "p not column vector"); 00548 00549 T alpha_bar = (T) 1.0; 00550 T rho = (T) 0.9; 00551 T c = (T) 0.5; 00552 T alpha = alpha_bar; 00553 Matrix<T,PO1> fgrad = gradfdif(fun, theta); 00554 00555 while (fun(theta + alpha * p) > 00556 (fun(theta) + c * alpha * t(fgrad) * p)[0]) { 00557 alpha = rho * alpha; 00558 } 00559 00560 return alpha; 00561 } 00562 00594 template <typename T, matrix_order PO1, matrix_style PS1, 00595 matrix_order PO2, matrix_style PS2, typename FUNCTOR, 00596 typename RNGTYPE> 00597 T linesearch2 (FUNCTOR fun, const Matrix<T,PO1,PS1>& theta, 00598 const Matrix<T,PO2,PS2>& p, rng<RNGTYPE>& runif) 00599 { 00600 SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error, 00601 "Theta not column vector"); 00602 SCYTHE_CHECK_10(! p.isColVector(), scythe_dimension_error, 00603 "p not column vector"); 00604 00605 T alpha_last = (T) 0.0; 00606 T alpha_cur = (T) 1.0; 00607 T alpha_max = (T) 10.0; 00608 T c1 = (T) 1e-4; 00609 T c2 = (T) 0.5; 00610 unsigned int max_iter = 50; 00611 T fgradalpha0 = gradfdifls(fun, (T) 0, theta, p); 00612 00613 for (unsigned int i = 0; i < max_iter; ++i) { 00614 T phi_cur = fun(theta + alpha_cur * p); 00615 T phi_last = fun(theta + alpha_last * p); 00616 00617 if ((phi_cur > (fun(theta) + c1 * alpha_cur * fgradalpha0)) 00618 || ((phi_cur >= phi_last) && (i > 0))) { 00619 T alphastar = zoom(fun, alpha_last, alpha_cur, theta, p); 00620 return alphastar; 00621 } 00622 00623 T fgradalpha_cur = gradfdifls(fun, alpha_cur, theta, p); 00624 if (std::fabs(fgradalpha_cur) <= -1 * c2 * fgradalpha0) 00625 return alpha_cur; 00626 00627 if ( fgradalpha_cur >= (T) 0.0) { 00628 T alphastar = zoom(fun, alpha_cur, alpha_last, theta, p); 00629 return alphastar; 00630 } 00631 00632 alpha_last = alpha_cur; 00633 // runif stuff below is probably not correc KQ 12/08/2006 00634 // I think it should work now DBP 01/02/2007 00635 alpha_cur = runif() * (alpha_max - alpha_cur) + alpha_cur; 00636 } 00637 00638 return 0.001; 00639 } 00640 00669 template <typename T, matrix_order PO1, matrix_style PS1, 00670 matrix_order PO2, matrix_style PS2, typename FUNCTOR> 00671 T zoom (FUNCTOR fun, T alpha_lo, T alpha_hi, 00672 const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p) 00673 { 00674 SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error, 00675 "Theta not column vector"); 00676 SCYTHE_CHECK_10(! p.isColVector(), scythe_dimension_error, 00677 "p not column vector"); 00678 00679 T alpha_j = (alpha_lo + alpha_hi) / 2.0; 00680 T phi_0 = fun(theta); 00681 T c1 = (T) 1e-4; 00682 T c2 = (T) 0.5; 00683 T fgrad0 = gradfdifls(fun, (T) 0, theta, p); 00684 00685 unsigned int count = 0; 00686 unsigned int maxit = 20; 00687 while(count < maxit) { 00688 T phi_j = fun(theta + alpha_j * p); 00689 T phi_lo = fun(theta + alpha_lo * p); 00690 00691 if ((phi_j > (phi_0 + c1 * alpha_j * fgrad0)) 00692 || (phi_j >= phi_lo)){ 00693 alpha_hi = alpha_j; 00694 } else { 00695 T fgradj = gradfdifls(fun, alpha_j, theta, p); 00696 if (std::fabs(fgradj) <= -1 * c2 * fgrad0){ 00697 return alpha_j; 00698 } 00699 if ( fgradj * (alpha_hi - alpha_lo) >= 0){ 00700 alpha_hi = alpha_lo; 00701 } 00702 alpha_lo = alpha_j; 00703 } 00704 ++count; 00705 } 00706 00707 return alpha_j; 00708 } 00709 00710 00744 // there were 2 versions of linesearch1-- the latter was what we 00745 // had been calling linesearch2 00746 template <matrix_order RO, matrix_style RS, typename T, 00747 matrix_order PO, matrix_style PS, 00748 typename FUNCTOR, typename RNGTYPE> 00749 Matrix<T,RO,RS> 00750 BFGS (FUNCTOR fun, const Matrix<T,PO,PS>& theta, rng<RNGTYPE>& runif, 00751 unsigned int maxit, T tolerance, bool trace = false) 00752 { 00753 SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error, 00754 "Theta not column vector"); 00755 00756 unsigned int n = theta.size(); 00757 00758 // H is initial inverse hessian 00759 Matrix<T,RO> H = inv(hesscdif(fun, theta)); 00760 00761 // gradient at starting values 00762 Matrix<T,RO> fgrad = gradfdif(fun, theta); 00763 Matrix<T,RO> thetamin = theta; 00764 Matrix<T,RO> fgrad_new = fgrad; 00765 Matrix<T,RO> I = eye<T,RO>(n); 00766 Matrix<T,RO> s; 00767 Matrix<T,RO> y; 00768 00769 unsigned int count = 0; 00770 while( (t(fgrad_new)*fgrad_new)[0] > tolerance) { 00771 Matrix<T> p = -1.0 * H * fgrad; 00772 //std::cout << "initial H * fgrad = " << H * fgrad << "\n"; 00773 //std::cout << "initial p = " << p << "\n"; 00774 00775 T alpha = linesearch2(fun, thetamin, p, runif); 00776 //T alpha = linesearch1(fun, thetamin, p); 00777 00778 //std::cout << "after linesearch p = " << p << "\n"; 00779 00780 00781 Matrix<T> thetamin_new = thetamin + alpha * p; 00782 fgrad_new = gradfdif(fun, thetamin_new); 00783 s = thetamin_new - thetamin; 00784 y = fgrad_new - fgrad; 00785 T rho = 1.0 / (t(y) * s)[0]; 00786 H = (I - rho * s * t(y)) * H *(I - rho * y * t(s)) 00787 + rho * s * t(s); 00788 00789 thetamin = thetamin_new; 00790 fgrad = fgrad_new; 00791 ++count; 00792 00793 #ifndef SCYTHE_RPACK 00794 if (trace) { 00795 std::cout << "BFGS iteration = " << count << std::endl; 00796 std::cout << "thetamin = " << (t(thetamin)) ; 00797 std::cout << "gradient = " << (t(fgrad)) ; 00798 std::cout << "t(gradient) * gradient = " << (t(fgrad) * fgrad) ; 00799 std::cout << "function value = " << fun(thetamin) << 00800 std::endl << std::endl; 00801 } 00802 #endif 00803 //std::cout << "Hessian = " << hesscdif(fun, theta) << "\n"; 00804 //std::cout << "H = " << H << "\n"; 00805 //std::cout << "alpha = " << alpha << std::endl; 00806 //std::cout << "p = " << p << "\n"; 00807 //std::cout << "-1 * H * fgrad = " << -1.0 * H * fgrad << "\n"; 00808 00809 SCYTHE_CHECK(count > maxit, scythe_convergence_error, 00810 "Failed to converge. Try better starting values"); 00811 } 00812 00813 return thetamin; 00814 } 00815 00816 // Default template 00817 template <typename T, matrix_order O, matrix_style S, 00818 typename FUNCTOR, typename RNGTYPE> 00819 Matrix<T,O,Concrete> 00820 BFGS (FUNCTOR fun, const Matrix<T,O,S>& theta, rng<RNGTYPE>& runif, 00821 unsigned int maxit, T tolerance, bool trace = false) 00822 { 00823 return BFGS<O,Concrete> (fun, theta, runif, maxit, tolerance, trace); 00824 } 00825 00826 00827 /* Solves a system of n nonlinear equations in n unknowns of the form 00828 * fun(thetastar) = 0 for thetastar given the function, starting 00829 * value theta, max number of iterations, and tolerance. 00830 * Uses Broyden's method. 00831 */ 00855 template <matrix_order RO, matrix_style RS, typename T, 00856 matrix_order PO, matrix_style PS, typename FUNCTOR> 00857 Matrix<T,RO,RS> 00858 nls_broyden(FUNCTOR fun, const Matrix<T,PO,PS>& theta, 00859 unsigned int maxit = 5000, T tolerance = 1e-6) 00860 { 00861 SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error, 00862 "Theta not column vector"); 00863 00864 00865 Matrix<T,RO> thetastar = theta; 00866 Matrix<T,RO> B = jacfdif(fun, thetastar); 00867 00868 Matrix<T,RO> fthetastar; 00869 Matrix<T,RO> p; 00870 Matrix<T,RO> thetastar_new; 00871 Matrix<T,RO> fthetastar_new; 00872 Matrix<T,RO> s; 00873 Matrix<T,RO> y; 00874 00875 for (unsigned int i = 0; i < maxit; ++i) { 00876 fthetastar = fun(thetastar); 00877 p = lu_solve(B, -1 * fthetastar); 00878 T alpha = (T) 1.0; 00879 thetastar_new = thetastar + alpha*p; 00880 fthetastar_new = fun(thetastar_new); 00881 s = thetastar_new - thetastar; 00882 y = fthetastar_new - fthetastar; 00883 B = B + ((y - B * s) * t(s)) / (t(s) * s); 00884 thetastar = thetastar_new; 00885 if (max(fabs(fthetastar_new)) < tolerance) 00886 return thetastar; 00887 } 00888 00889 SCYTHE_THROW_10(scythe_convergence_error, "Failed to converge. Try better starting values or increase maxit"); 00890 00891 return thetastar; 00892 } 00893 00894 // default template 00895 template <typename T, matrix_order O, matrix_style S, 00896 typename FUNCTOR> 00897 Matrix<T,O,Concrete> 00898 nls_broyden (FUNCTOR fun, const Matrix<T,O,S>& theta, 00899 unsigned int maxit = 5000, T tolerance = 1e-6) 00900 { 00901 return nls_broyden<O,Concrete>(fun, theta, maxit, tolerance); 00902 } 00903 00904 } // namespace scythe 00905 00906 #endif /* SCYTHE_OPTIMIZE_H */