Scythe-1.0.3
optimize.h
Go to the documentation of this file.
00001 /* 
00002  * Scythe Statistical Library Copyright (C) 2000-2002 Andrew D. Martin
00003  * and Kevin M. Quinn; 2002-present Andrew D. Martin, Kevin M. Quinn,
00004  * and Daniel Pemstein.  All Rights Reserved.
00005  *
00006  * This program is free software; you can redistribute it and/or
00007  * modify under the terms of the GNU General Public License as
00008  * published by Free Software Foundation; either version 2 of the
00009  * License, or (at your option) any later version.  See the text files
00010  * COPYING and LICENSE, distributed with this source code, for further
00011  * information.
00012  * --------------------------------------------------------------------
00013  *  scythestat/optimize.h
00014  *
00015  */
00016 
00048 #ifndef SCYTHE_OPTIMIZE_H
00049 #define SCYTHE_OPTIMIZE_H
00050 
00051 #ifdef SCYTHE_COMPILE_DIRECT
00052 #include "matrix.h"
00053 #include "algorithm.h"
00054 #include "error.h"
00055 #include "rng.h"
00056 #include "distributions.h"
00057 #include "la.h"
00058 #include "ide.h"
00059 #include "smath.h"
00060 #include "stat.h"
00061 #else
00062 #include "scythestat/matrix.h"
00063 #include "scythestat/algorithm.h"
00064 #include "scythestat/error.h"
00065 #include "scythestat/rng.h"
00066 #include "scythestat/distributions.h"
00067 #include "scythestat/la.h"
00068 #include "scythestat/ide.h"
00069 #include "scythestat/smath.h"
00070 #include "scythestat/stat.h"
00071 #endif
00072 
00073 /* We want to use an anonymous namespace to make the following consts
00074  * and functions local to this file, but mingw doesn't play nice with
00075  * anonymous namespaces so we do things differently when using the
00076  * cross-compiler.
00077  */
00078 #ifdef __MINGW32__
00079 #define SCYTHE_MINGW32_STATIC static
00080 #else
00081 #define SCYTHE_MINGW32_STATIC
00082 #endif
00083 
00084 namespace scythe {
00085 #ifndef __MINGW32__
00086   namespace {
00087 #endif
00088 
00089     /* Functions (private to this file) that do very little... */
00090     template <typename T, matrix_order O, matrix_style S>
00091     SCYTHE_MINGW32_STATIC T donothing (const Matrix<T,O,S>& x)
00092     {
00093       return (T) 0.0;
00094     }
00095 
00096     template <typename T>
00097     SCYTHE_MINGW32_STATIC T donothing (T& x)
00098     {
00099       return (T) 0.0;
00100     }
00101 #ifndef __MINGW32__
00102   }
00103 #endif
00104 
00105 
00106   /* Return the machine epsilon 
00107    * Notes: Algorithm taken from Sedgewick, Robert. 1992. Algorithms
00108    * in C++. Addison Wesley. pg. 561
00109    */
00116   template <typename T>
00117   T
00118   epsilon()
00119   {
00120     T eps, del, neweps;
00121     del    = (T) 0.5;
00122     eps    = (T) 0.0;
00123     neweps = (T) 1.0;
00124   
00125     while ( del > 0 ) {
00126       if ( 1 + neweps > 1 ) {  /* Then the value might be too large */
00127         eps = neweps;    /* ...save the current value... */
00128         neweps -= del;    /* ...and decrement a bit */
00129       } else {      /* Then the value is too small */
00130         neweps += del;    /* ...so increment it */
00131       }
00132       del *= 0.5;      /* Reduce the adjustment by half */
00133     }
00134 
00135     return eps;
00136   }
00137   
00163   template <typename T, typename FUNCTOR>
00164   T  intsimp (FUNCTOR fun, T a, T b, unsigned int N)
00165   {
00166     SCYTHE_CHECK_10(a > b, scythe_invalid_arg,
00167         "Lower limit larger than upper");
00168     
00169     T I = (T) 0;
00170     T w = (b - a) / N;
00171     for (unsigned int i = 1; i <= N; ++i)
00172       I += w * (fun(a +(i - 1) *w) + 4 * fun(a - w / 2 + i * w) +
00173           fun(a + i * w)) / 6;
00174    
00175     return I;
00176   }
00177   
00206   template <typename T, typename FUNCTOR>
00207   T adaptsimp(FUNCTOR fun, T a, T b, unsigned int N, double tol = 1e-5)
00208   {
00209     SCYTHE_CHECK_10(a > b, scythe_invalid_arg,
00210         "Lower limit larger than upper");
00211 
00212     T I = intsimp(fun, a, b, N);
00213     if (std::fabs(I - intsimp(fun, a, b, N / 2)) > tol)
00214       return adaptsimp(fun, a, (a + b) / 2, N, tol)
00215         + adaptsimp(fun, (a + b) / 2, b, N, tol);
00216 
00217     return I;
00218   }
00219 
00247   template <matrix_order RO, matrix_style RS, typename T,
00248             matrix_order PO, matrix_style PS, typename FUNCTOR>
00249   Matrix<T, RO, RS>
00250   gradfdif (FUNCTOR fun, const Matrix<T,PO,PS>& theta)
00251       
00252   {
00253     SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
00254         "Theta not column vector");
00255 
00256     unsigned int k = theta.size();
00257     T h = std::sqrt(epsilon<T>());
00258     h = std::sqrt(h);
00259 
00260     Matrix<T,RO,RS> grad(k, 1);
00261     Matrix<T,RO> e;
00262     Matrix<T,RO> temp;
00263     for (unsigned int i = 0; i < k; ++i) {
00264       e = Matrix<T,RO>(k, 1);
00265       e[i] = h;
00266       temp = theta + e;
00267       donothing(temp); // XXX I don't understand this
00268       e = temp - theta;
00269       grad[i] = (fun(theta + e) - fun(theta)) / e[i];
00270     }
00271 
00272     return grad;
00273   }
00274 
00275   // Default template version
00276   template <typename T, matrix_order O, matrix_style S, 
00277             typename FUNCTOR>
00278   Matrix<T, O, Concrete>
00279   gradfdif (FUNCTOR fun, const Matrix<T,O,S>& theta)
00280   {
00281     return gradfdif<O,Concrete>(fun, theta);
00282   }
00283 
00314   template <typename T, matrix_order PO1, matrix_style PS1,
00315             matrix_order PO2, matrix_style PS2, typename FUNCTOR>
00316   T
00317   gradfdifls (FUNCTOR fun, T alpha, const Matrix<T,PO1,PS1>& theta, 
00318               const Matrix<T,PO2,PS2>& p)
00319 
00320   {
00321     SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
00322         "Theta not column vector");
00323     SCYTHE_CHECK_10(! p.isColVector(), scythe_dimension_error,
00324         "p not column vector");
00325 
00326     unsigned int k = theta.size();
00327     T h = std::sqrt(epsilon<T>()); 
00328     h = std::sqrt(h);
00329     //T h = std::sqrt(2.2e-16);
00330 
00331     T deriv;
00332 
00333     for (unsigned int i = 0; i < k; ++i) {
00334       T temp = alpha + h;
00335       donothing(temp);
00336       T e = temp - alpha;
00337       deriv = (fun(theta + (alpha + e) * p) - fun(theta + alpha * p)) 
00338               / e;
00339     }
00340     
00341     return deriv;
00342   }
00343 
00370   template <matrix_order RO, matrix_style RS, typename T,
00371             matrix_order PO, matrix_style PS, typename FUNCTOR>
00372   Matrix<T,RO,RS>
00373   jacfdif (FUNCTOR fun, const Matrix<T,PO,PS>& theta)
00374   {
00375     SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
00376         "Theta not column vector");
00377 
00378     Matrix<T,RO> fval = fun(theta);
00379     unsigned int k = theta.rows();
00380     unsigned int n = fval.rows();
00381 
00382     T h = std::sqrt(epsilon<T>()); //2.2e-16
00383     h = std::sqrt(h);
00384 
00385     Matrix<T,RO,RS> J(n,k);
00386     Matrix<T,RO> e;
00387     Matrix<T,RO> temp;
00388     Matrix<T,RO> fthetae;
00389     Matrix<T,RO> ftheta;
00390     
00391     for (int i = 0; i < k; ++i) {
00392       e = Matrix<T,RO>(k,1);
00393       e[i] = h;
00394       temp = theta + e;
00395       donothing(temp); 
00396       e = temp - theta;
00397       fthetae = fun(theta + e);
00398       ftheta = fun(theta);
00399       for (unsigned int j = 0; j < n; ++j) {
00400         J(j,i) = (fthetae[j] - ftheta[j]) / e[i];
00401       }
00402     }
00403    
00404     return J;
00405   }
00406 
00407   // default template
00408   template <typename T, matrix_order PO, matrix_style PS,
00409             typename FUNCTOR>
00410   Matrix<T,PO,PS>
00411   jacfdif (FUNCTOR fun, const Matrix<T,PO,PS>& theta)
00412   {
00413     return jacfdif<PO,Concrete>(fun, theta);
00414   }
00415 
00416 
00443   template <matrix_order RO, matrix_style RS, typename T,
00444             matrix_order PO, matrix_style PS, typename FUNCTOR>
00445   Matrix<T, RO, RS>
00446   hesscdif (FUNCTOR fun, const Matrix<T,PO,PS>& theta)
00447   {
00448     SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
00449         "Theta not column vector");
00450     
00451     T fval = fun(theta);
00452 
00453     //std::cout << std::endl;
00454     //std::cout << "hesscdif theta = " << theta << "\n";
00455     //std::cout << "hesscdif fun(theta) = " << fval << std::endl;
00456 
00457     unsigned int k = theta.rows();
00458 
00459     // stepsize CAREFUL -- THIS IS MACHINE SPECIFIC !!!!
00460     T h2 = std::sqrt(epsilon<T>());
00461     //T h2 = (T) 1e-10;
00462     T h = std::sqrt(h2); 
00463 
00464     Matrix<T, RO, RS> H(k,k);
00465 
00466     //std::cout << "h2 = " << h2 << "  h = " << h << std::endl;
00467 
00468     Matrix<T,RO> ei;
00469     Matrix<T,RO> ej;
00470     Matrix<T,RO> temp;
00471 
00472     for (unsigned int i = 0; i < k; ++i) {
00473       ei = Matrix<T,RO>(k, 1);
00474       ei[i] = h;
00475       temp = theta + ei;
00476       donothing(temp); // XXX Again, I'm baffled
00477       ei = temp - theta;
00478       for (unsigned int j = 0; j < k; ++j){
00479         ej = Matrix<T,RO>(k,1);
00480         ej[j] = h;
00481         temp = theta + ej;
00482         donothing(temp); // XXX and again
00483         ej = temp - theta;
00484         
00485         if (i == j) {
00486           H(i,i) = ( -fun(theta + 2.0 * ei) + 16.0 *
00487               fun(theta + ei) - 30.0 * fval + 16.0 *
00488               fun(theta - ei) -
00489               fun(theta - 2.0 * ei)) / (12.0 * h2);
00490         } else {
00491           H(i,j) = ( fun(theta + ei + ej) - fun(theta + ei - ej)
00492               - fun(theta - ei + ej) + fun(theta - ei - ej))
00493             / (4.0 * h2);
00494         }
00495       }
00496     }
00497        
00498     //std::cout << "end of hesscdif, H = " << H << "\n";
00499     return H;
00500   }
00501 
00502   // default template
00503   template <typename T, matrix_order PO, matrix_style PS,
00504             typename FUNCTOR>
00505   Matrix<T,PO,PS>
00506   hesscdif (FUNCTOR fun, const Matrix<T,PO,PS>& theta)
00507   {
00508     return hesscdif<PO,Concrete>(fun, theta);
00509   }
00510 
00539   template <typename T, matrix_order PO1, matrix_style PS1,
00540             matrix_order PO2, matrix_style PS2, typename FUNCTOR>
00541   T linesearch1 (FUNCTOR fun, const Matrix<T,PO1,PS1>& theta,
00542                  const Matrix<T,PO2,PS2>& p)
00543   {
00544     SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
00545         "Theta not column vector");
00546     SCYTHE_CHECK_10(! p.isColVector(), scythe_dimension_error,
00547         "p not column vector");
00548 
00549     T alpha_bar = (T) 1.0;
00550     T rho = (T) 0.9;
00551     T c   = (T) 0.5;
00552     T alpha = alpha_bar;
00553     Matrix<T,PO1> fgrad = gradfdif(fun, theta);
00554 
00555     while (fun(theta + alpha * p) > 
00556            (fun(theta) + c * alpha * t(fgrad) * p)[0]) {
00557       alpha = rho * alpha;
00558     }
00559 
00560     return alpha;
00561   }
00562 
00594   template <typename T, matrix_order PO1, matrix_style PS1,
00595             matrix_order PO2, matrix_style PS2, typename FUNCTOR,
00596             typename RNGTYPE>
00597   T linesearch2 (FUNCTOR fun, const Matrix<T,PO1,PS1>& theta,
00598                  const Matrix<T,PO2,PS2>& p, rng<RNGTYPE>& runif)
00599   {
00600     SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
00601         "Theta not column vector");
00602     SCYTHE_CHECK_10(! p.isColVector(), scythe_dimension_error,
00603         "p not column vector");
00604 
00605     T alpha_last = (T) 0.0;
00606     T alpha_cur = (T) 1.0;
00607     T alpha_max = (T) 10.0;
00608     T c1 = (T) 1e-4;
00609     T c2 = (T) 0.5;
00610     unsigned int max_iter = 50;
00611     T fgradalpha0 = gradfdifls(fun, (T) 0, theta, p);
00612 
00613     for (unsigned int i = 0; i < max_iter; ++i) {
00614       T phi_cur = fun(theta + alpha_cur * p);
00615       T phi_last = fun(theta + alpha_last * p);
00616      
00617       if ((phi_cur > (fun(theta) + c1 * alpha_cur * fgradalpha0))
00618           || ((phi_cur >= phi_last) && (i > 0))) {
00619         T alphastar = zoom(fun, alpha_last, alpha_cur, theta, p);
00620         return alphastar;
00621       }
00622 
00623       T fgradalpha_cur = gradfdifls(fun, alpha_cur, theta, p);
00624       if (std::fabs(fgradalpha_cur) <= -1 * c2 * fgradalpha0)
00625         return alpha_cur;
00626 
00627       if ( fgradalpha_cur >= (T) 0.0) {
00628         T alphastar = zoom(fun, alpha_cur, alpha_last, theta, p);
00629         return alphastar;
00630       }
00631       
00632       alpha_last = alpha_cur;
00633       // runif stuff below is probably not correc KQ 12/08/2006
00634       // I think it should work now DBP 01/02/2007
00635       alpha_cur = runif() * (alpha_max - alpha_cur) + alpha_cur;
00636     }
00637 
00638     return 0.001;
00639   }
00640 
00669   template <typename T, matrix_order PO1, matrix_style PS1,
00670             matrix_order PO2, matrix_style PS2, typename FUNCTOR>
00671   T zoom (FUNCTOR fun, T alpha_lo, T alpha_hi,
00672           const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p)
00673   {
00674     SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
00675         "Theta not column vector");
00676     SCYTHE_CHECK_10(! p.isColVector(), scythe_dimension_error,
00677         "p not column vector");
00678 
00679     T alpha_j = (alpha_lo + alpha_hi) / 2.0;
00680     T phi_0 = fun(theta);
00681     T c1 = (T) 1e-4;
00682     T c2 = (T) 0.5;
00683     T fgrad0 = gradfdifls(fun, (T) 0, theta, p);
00684 
00685     unsigned int count = 0;
00686     unsigned int maxit = 20;
00687     while(count < maxit) {
00688       T phi_j = fun(theta + alpha_j * p);
00689       T phi_lo = fun(theta + alpha_lo * p);
00690      
00691       if ((phi_j > (phi_0 + c1 * alpha_j * fgrad0))
00692           || (phi_j >= phi_lo)){
00693         alpha_hi = alpha_j;
00694       } else {
00695         T fgradj = gradfdifls(fun, alpha_j, theta, p);
00696         if (std::fabs(fgradj) <= -1 * c2 * fgrad0){ 
00697           return alpha_j;
00698         }
00699         if ( fgradj * (alpha_hi - alpha_lo) >= 0){
00700           alpha_hi = alpha_lo;
00701         }
00702         alpha_lo = alpha_j;
00703       }
00704       ++count;
00705     }
00706    
00707     return alpha_j;
00708   }
00709 
00710 
00744   // there were 2 versions of linesearch1-- the latter was what we
00745   // had been calling linesearch2
00746   template <matrix_order RO, matrix_style RS, typename T, 
00747             matrix_order PO, matrix_style PS,
00748             typename FUNCTOR, typename RNGTYPE>
00749   Matrix<T,RO,RS>
00750   BFGS (FUNCTOR fun, const Matrix<T,PO,PS>& theta, rng<RNGTYPE>& runif, 
00751         unsigned int maxit, T tolerance, bool trace = false)
00752   {
00753     SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
00754         "Theta not column vector");
00755 
00756     unsigned int n = theta.size();
00757 
00758     // H is initial inverse hessian
00759     Matrix<T,RO> H = inv(hesscdif(fun, theta));
00760 
00761     // gradient at starting values
00762     Matrix<T,RO> fgrad = gradfdif(fun, theta);
00763     Matrix<T,RO> thetamin = theta;
00764     Matrix<T,RO> fgrad_new = fgrad;
00765     Matrix<T,RO> I = eye<T,RO>(n); 
00766     Matrix<T,RO> s;
00767     Matrix<T,RO> y;
00768 
00769     unsigned int count = 0;
00770     while( (t(fgrad_new)*fgrad_new)[0] > tolerance) {
00771       Matrix<T> p = -1.0 * H * fgrad;
00772       //std::cout << "initial H * fgrad = " << H * fgrad << "\n";
00773       //std::cout << "initial p = " << p << "\n";
00774 
00775       T alpha = linesearch2(fun, thetamin, p, runif);
00776       //T alpha = linesearch1(fun, thetamin, p);
00777 
00778       //std::cout << "after linesearch p = " << p << "\n";
00779 
00780 
00781       Matrix<T> thetamin_new = thetamin + alpha * p;
00782       fgrad_new = gradfdif(fun, thetamin_new);
00783       s = thetamin_new - thetamin;
00784       y = fgrad_new - fgrad;
00785       T rho = 1.0 / (t(y) * s)[0];
00786       H = (I - rho * s * t(y)) * H *(I - rho * y * t(s))
00787         + rho * s * t(s);
00788 
00789       thetamin = thetamin_new;
00790       fgrad = fgrad_new;
00791       ++count;
00792 
00793 #ifndef SCYTHE_RPACK
00794       if (trace) {
00795         std::cout << "BFGS iteration = " << count << std::endl;
00796         std::cout << "thetamin = " << (t(thetamin)) ;
00797         std::cout << "gradient = " << (t(fgrad)) ;
00798         std::cout << "t(gradient) * gradient = " << (t(fgrad) * fgrad) ;
00799         std::cout << "function value = " << fun(thetamin) << 
00800         std::endl << std::endl;
00801       }
00802 #endif
00803       //std::cout << "Hessian = " << hesscdif(fun, theta) << "\n";
00804       //std::cout << "H = " << H << "\n";
00805       //std::cout << "alpha = " << alpha << std::endl;
00806       //std::cout << "p = " << p << "\n";
00807       //std::cout << "-1 * H * fgrad = " << -1.0 * H * fgrad << "\n";
00808 
00809       SCYTHE_CHECK(count > maxit, scythe_convergence_error,
00810           "Failed to converge.  Try better starting values");
00811     }
00812    
00813     return thetamin;
00814   }
00815 
00816   // Default template
00817   template <typename T, matrix_order O, matrix_style S,
00818             typename FUNCTOR, typename RNGTYPE>
00819   Matrix<T,O,Concrete>
00820   BFGS (FUNCTOR fun, const Matrix<T,O,S>& theta, rng<RNGTYPE>& runif,
00821         unsigned int maxit, T tolerance, bool trace = false)
00822   {
00823     return BFGS<O,Concrete> (fun, theta, runif, maxit, tolerance, trace);
00824   }
00825 
00826   
00827   /* Solves a system of n nonlinear equations in n unknowns of the form
00828    * fun(thetastar) = 0 for thetastar given the function, starting 
00829    * value theta, max number of iterations, and tolerance.
00830    * Uses Broyden's method.
00831    */
00855   template <matrix_order RO, matrix_style RS, typename T,
00856             matrix_order PO, matrix_style PS, typename FUNCTOR>
00857   Matrix<T,RO,RS>
00858   nls_broyden(FUNCTOR fun, const Matrix<T,PO,PS>& theta,
00859               unsigned int maxit = 5000, T tolerance = 1e-6)
00860   {
00861     SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
00862         "Theta not column vector");
00863 
00864 
00865     Matrix<T,RO> thetastar = theta;
00866     Matrix<T,RO> B = jacfdif(fun, thetastar);
00867 
00868     Matrix<T,RO> fthetastar;
00869     Matrix<T,RO> p;
00870     Matrix<T,RO> thetastar_new;
00871     Matrix<T,RO> fthetastar_new;
00872     Matrix<T,RO> s;
00873     Matrix<T,RO> y;
00874 
00875     for (unsigned int i = 0; i < maxit; ++i) {
00876       fthetastar = fun(thetastar);
00877       p = lu_solve(B, -1 * fthetastar);
00878       T alpha = (T) 1.0;
00879       thetastar_new = thetastar + alpha*p;
00880       fthetastar_new = fun(thetastar_new);
00881       s = thetastar_new - thetastar;
00882       y = fthetastar_new - fthetastar;
00883       B = B + ((y - B * s) * t(s)) / (t(s) * s);
00884       thetastar = thetastar_new;
00885       if (max(fabs(fthetastar_new)) < tolerance)
00886         return thetastar;
00887     }
00888  
00889     SCYTHE_THROW_10(scythe_convergence_error,  "Failed to converge.  Try better starting values or increase maxit");
00890 
00891     return thetastar;
00892   }
00893 
00894   // default template
00895   template <typename T, matrix_order O, matrix_style S,
00896             typename FUNCTOR>
00897   Matrix<T,O,Concrete>
00898   nls_broyden (FUNCTOR fun, const Matrix<T,O,S>& theta,
00899                unsigned int maxit = 5000, T tolerance = 1e-6)
00900   {
00901     return nls_broyden<O,Concrete>(fun, theta, maxit, tolerance);
00902   }
00903 
00904 } // namespace scythe
00905 
00906 #endif /* SCYTHE_OPTIMIZE_H */