CppAD: A C++ Algorithmic Differentiation Package 20110419
opt_val_hes.hpp
Go to the documentation of this file.
00001 /* $Id$ */
00002 # ifndef CPPAD_OPT_VAL_HES_INCLUDED
00003 # define CPPAD_OPT_VAL_HES_INCLUDED
00004 
00005 /* --------------------------------------------------------------------------
00006 CppAD: C++ Algorithmic Differentiation: Copyright (C) 2003-10 Bradley M. Bell
00007 
00008 CppAD is distributed under multiple licenses. This distribution is under
00009 the terms of the 
00010                     Common Public License Version 1.0.
00011 
00012 A copy of this license is included in the COPYING file of this distribution.
00013 Please visit http://www.coin-or.org/CppAD/ for information on other licenses.
00014 -------------------------------------------------------------------------- */
00015 /*
00016 $begin opt_val_hes$$
00017 $spell
00018         hes
00019         sy
00020         Jacobian
00021         hes
00022         signdet
00023         jac
00024         Bradley
00025         const
00026         CppAD
00027 $$
00028 
00029 
00030 $index Jacobian$$
00031 $index Hessian$$
00032 $index optimal value$$
00033 $index value$$
00034 $index opt_val_hes$$
00035 
00036 $section Jacobian and Hessian of Optimal Values$$
00037 
00038 $head Syntax$$
00039 $icode%signdet% = opt_val_hes(%x%, %y%, %fun%, %jac%, %hes%)%$$  
00040 
00041 $head See Also$$
00042 $cref/BenderQuad/$$
00043 
00044 $head Reference$$
00045 Algorithmic differentiation of implicit functions and optimal values,
00046 Bradley M. Bell and James V. Burke, Advances in Automatic Differentiation,
00047 2008, Springer.
00048 
00049 $head Purpose$$
00050 We are given a function 
00051 $latex S : \R^n \times \R^m \rightarrow \R^\ell$$
00052 and we define $latex F : \R^n \times \R^m \rightarrow \R$$
00053 and $latex V : \R^n \rightarrow \R $$ by
00054 $latex \[
00055 \begin{array}{rcl}
00056         F(x, y) & = & \sum_{k=0}^{\ell-1} S_k ( x , y) 
00057         \\
00058         V(x)    & = & F [ x , Y(x) ] 
00059         \\
00060         0       & = & \partial_y F [x , Y(x) ]
00061 \end{array}
00062 \] $$
00063 We wish to compute the Jacobian 
00064 and possibly also the Hessian, of $latex V (x)$$.
00065 
00066 $head BaseVector$$
00067 The type $icode BaseVector$$ must be a 
00068 $xref/SimpleVector/$$ class. 
00069 We use $icode Base$$ to refer to the type of the elements of 
00070 $icode BaseVector$$; i.e.,
00071 $codei%
00072         %BaseVector%::value_type
00073 %$$
00074 
00075 $head x$$
00076 The argument $icode x$$ has prototype
00077 $codei%
00078         const %BaseVector%& %x%
00079 %$$
00080 and its size must be equal to $icode n$$.
00081 It specifies the point at which we evaluating 
00082 the Jacobian $latex V^{(1)} (x)$$
00083 (and possibly the Hessian $latex V^{(2)} (x)$$).
00084 
00085 
00086 $head y$$
00087 The argument $icode y$$ has prototype
00088 $codei%
00089         const %BaseVector%& %y%
00090 %$$
00091 and its size must be equal to $icode m$$.
00092 It must be equal to $latex Y(x)$$; i.e., 
00093 it must solve the implicit equation
00094 $latex \[
00095         0 = \partial_y F ( x , y) 
00096 \] $$
00097 
00098 $head Fun$$
00099 The argument $icode fun$$ is an object of type $icode Fun$$ 
00100 which must support the member functions listed below.
00101 CppAD will may be recording operations of the type $code%AD<%Base%>%$$
00102 when these member functions are called.
00103 These member functions must not stop such a recording; e.g.,
00104 they must not call $cref/AD<Base>::abort_recording/abort_recording/$$.
00105 
00106 $subhead Fun::ad_vector$$
00107 The type $icode%Fun%::ad_vector%$$ must be a 
00108 $cref/SimpleVector/$$ class with elements of type $codei%AD<%Base%>%$$; i.e.
00109 $codei%
00110         %Fun%::ad_vector::value_type
00111 %$$
00112 is equal to $codei%AD<%Base%>%$$.
00113 
00114 $subhead fun.ell$$
00115 The type $icode Fun$$ must support the syntax
00116 $codei%
00117         %ell% = %fun%.ell()
00118 %$$
00119 where $icode ell$$ has prototype
00120 $codei%
00121         size_t %ell%
00122 %$$
00123 and is the value of $latex \ell$$; i.e.,
00124 the number of terms in the summation.
00125 $pre
00126 
00127 $$
00128 One can choose $icode ell$$ equal to one, and have 
00129 $latex S(x,y)$$ the same as $latex F(x, y)$$.
00130 Each of the functions $latex S_k (x , y)$$, 
00131 (in the summation defining $latex F(x, y)$$)
00132 is differentiated separately using AD.
00133 For very large problems, breaking $latex F(x, y)$$ into the sum
00134 of separate simpler functions may reduce the amount of memory necessary for
00135 algorithmic differentiation and there by speed up the process.
00136 
00137 $subhead fun.s$$
00138 The type $icode Fun$$ must support the syntax
00139 $codei%
00140         %s_k% = %fun%.s(%k%, %x%, %y%)
00141 %$$
00142 The $icode%fun%.s%$$ argument $icode k$$ has prototype
00143 $codei%
00144         size_t %k%
00145 %$$
00146 and is between zero and $icode%ell% - 1%$$.
00147 The argument $icode x$$ to $icode%fun%.s%$$ has prototype
00148 $codei%
00149         const %Fun%::ad_vector& %x%
00150 %$$
00151 and its size must be equal to $icode n$$.
00152 The argument $icode y$$ to $icode%fun%.s%$$ has prototype
00153 $codei%
00154         const %Fun%::ad_vector& %y%
00155 %$$
00156 and its size must be equal to $icode m$$.
00157 The $icode%fun%.s%$$ result $icode s_k$$ has prototype
00158 $codei%
00159         AD<%Base%> %s_k%
00160 %$$
00161 and its value must be given by $latex s_k = S_k ( x , y )$$.
00162 
00163 $subhead fun.sy$$
00164 The type $icode Fun$$ must support the syntax
00165 $codei%
00166         %sy_k% = %fun%.sy(%k%, %x%, %y%)
00167 %$$
00168 The  argument $icode k$$ to $icode%fun%.sy%$$ has prototype
00169 $codei%
00170         size_t %k%
00171 %$$
00172 The  argument $icode x$$ to $icode%fun%.sy%$$ has prototype
00173 $codei%
00174         const %Fun%::ad_vector& %x%
00175 %$$
00176 and its size must be equal to $icode n$$.
00177 The  argument $icode y$$ to $icode%fun%.sy%$$ has prototype
00178 $codei%
00179         const %Fun%::ad_vector& %y%
00180 %$$
00181 and its size must be equal to $icode m$$.
00182 The $icode%fun%.sy%$$ result $icode sy_k$$ has prototype
00183 $codei%
00184         %Fun%::ad_vector %sy_k%
00185 %$$
00186 its size must be equal to $icode m$$,
00187 and its value must be given by $latex sy_k = \partial_y S_k ( x , y )$$.
00188 
00189 $head jac$$
00190 The argument $icode jac$$ has prototype
00191 $codei%
00192         %BaseVector%& %jac%
00193 %$$
00194 and has size $icode n$$ or zero.
00195 The input values of its elements do not matter.
00196 If it has size zero, it is not affected. Otherwise, on output
00197 it contains the Jacobian of $latex V (x)$$; i.e.,
00198 for $latex j = 0 , \ldots , n-1$$, 
00199 $latex \[
00200         jac[ j ] = V^{(1)} (x)_j
00201 \] $$
00202 where $icode x$$ is the first argument to $code opt_val_hes$$.
00203 
00204 $head hes$$
00205 The argument $icode hes$$ has prototype
00206 $codei%
00207         %BaseVector%& %hes%
00208 %$$
00209 and has size $icode%n% * %n%$$ or zero.
00210 The input values of its elements do not matter.
00211 If it has size zero, it is not affected. Otherwise, on output
00212 it contains the Hessian of $latex V (x)$$; i.e.,
00213 for $latex i = 0 , \ldots , n-1$$, and
00214 $latex j = 0 , \ldots , n-1$$, 
00215 $latex \[
00216         hes[ i * n + j ] = V^{(2)} (x)_{i,j}
00217 \] $$
00218 
00219 
00220 $head signdet$$
00221 If $icode%hes%$$ has size zero, $icode signdet$$ is not defined.
00222 Otherwise
00223 the return value $icode signdet$$ is the sign of the determinant for
00224 $latex \partial_{yy}^2 F(x , y) $$.
00225 If it is zero, then the matrix is singular and 
00226 the Hessian is not computed ($icode hes$$ is not changed).
00227 
00228 $head Example$$
00229 $children%
00230         example/opt_val_hes.cpp
00231 %$$
00232 The file
00233 $xref/opt_val_hes.cpp/$$
00234 contains an example and test of this operation.   
00235 It returns true if it succeeds and false otherwise.
00236 
00237 $end
00238 -----------------------------------------------------------------------------
00239 */
00240 
00241 CPPAD_BEGIN_NAMESPACE
00242 /*!
00243 \file opt_val_hes.hpp
00244 \brief Computing Jabobians and Hessians of Optimal Values
00245 */
00246 
00247 /*!
00248 Computing Jabobians and Hessians of Optimal Values
00249 
00250 We are given a function 
00251 \f$ S : {\rm R}^n \times {\rm R}^m \rightarrow {\rm R}^\ell \f$
00252 and we define \f$ F : {\rm R}^n \times {\rm R}^m \rightarrow {\rm R} \f$ 
00253 and \f$ V : {\rm R}^n \rightarrow {\rm R}  \f$ by
00254 \f[
00255 \begin{array}{rcl}
00256         F(x, y) & = & \sum_{k=0}^{\ell-1} S_k ( x , y) 
00257         \\
00258         V(x)    & = & F [ x , Y(x) ] 
00259         \\
00260         0       & = & \partial_y F [x , Y(x) ]
00261 \end{array}
00262 \f]
00263 We wish to compute the Jacobian 
00264 and possibly also the Hessian, of \f$ V (x) \f$.
00265 
00266 \tparam BaseVector
00267 The type \c BaseVector must be a SimpleVector class. 
00268 We use \c Base to refer to the type of the elements of 
00269 \c BaseVector; i.e.,
00270 <tt>BaseVector::value_type</tt>.
00271 
00272 \param x
00273 is a vector with size \c n.
00274 It specifies the point at which we evaluating 
00275 the Jacobian \f$ V^{(1)} (x) \f$
00276 (and possibly the Hessian \f$ V^{(2)} (x) \f$).
00277 
00278 
00279 \param y
00280 is a vector with size \c m.
00281 It must be equal to \f$ Y(x) \f$; i.e., 
00282 it must solve the implicit equation
00283 \f[
00284         0 = \partial_y F ( x , y) 
00285 \f]
00286 
00287 \param fun
00288 The argument \c fun is an object of type \c Fun
00289 wich must support the member functions listed below.
00290 CppAD will may be recording operations of the type  \c AD<Base>
00291 when these member functions are called.
00292 These member functions must not stop such a recording; e.g.,
00293 they must not call \c AD<Base>::abort_recording.
00294 
00295 \par Fun::ad_vector</tt>
00296 The type <tt>Fun::ad_vector</tt> must be a 
00297 SimpleVector class with elements of type  \c AD<Base>; i.e.
00298 <tt>Fun::ad_vector::value_type</tt>
00299 is equal to  \c AD<Base>.
00300 
00301 \par fun.ell
00302 the type \c Fun must support the syntax
00303 \verbatim
00304         ell = fun.ell()
00305 \endverbatim
00306 where \c ell is a \c size_t value that is set to \f$ \ell \f$; i.e.,
00307 the number of terms in the summation.
00308 
00309 \par fun.s
00310 The type \c Fun must support the syntax
00311 \verbatim
00312         s_k = fun.s(k, x, y)
00313 \endverbatim
00314 The argument \c k has prototype <tt>size_t k</tt>.
00315 The argument \c x has prototype <tt>const Fun::ad_vector& x</tt>
00316 and its size must be equal to \c n.
00317 The argument \c y has prototype <tt>const Fun::ad_vector& y</tt>
00318 and its size must be equal to \c m.
00319 The return value \c s_k has prototype \c AD<Base> s_k
00320 and its value must be given by \f$ s_k = S_k ( x , y ) \f$.
00321 
00322 \par fun.sy
00323 The type \c Fun must support the syntax
00324 \verbatim
00325         sy_k = fun.sy(k, x, y)
00326 \endverbatim
00327 The argument \c k has prototype <tt>size_t k</tt>.
00328 The argument \c x has prototype <tt>const Fun::ad_vector& x</tt>
00329 and its size must be equal to \c n.
00330 The argument \c y has prototype <tt>const Fun::ad_vector& y</tt>
00331 and its size must be equal to \c m.
00332 The return value \c sy_k has prototype <tt>Fun::ad_vector& sy_k</tt>,
00333 its size is \c m
00334 and its value must be given by \f$ sy_k = \partial_y S_k ( x , y ) \f$.
00335 
00336 \param jac
00337 is a vector with size \c n or zero.
00338 The input values of its elements do not matter.
00339 If it has size zero, it is not affected. Otherwise, on output
00340 it contains the Jacobian of \f$ V (x) \f$; i.e.,
00341 for \f$ j = 0 , \ldots , n-1 \f$, 
00342 \f[
00343         jac[ j ] = V^{(1)} (x)_j
00344 \f] $$
00345 where \c x is the first argument to \c opt_val_hes.
00346 
00347 \param hes
00348 is a vector with size <tt>n * n</tt> or zero.
00349 The input values of its elements do not matter.
00350 If it has size zero, it is not affected. Otherwise, on output
00351 it contains the Hessian of \f$ V (x) \f$; i.e.,
00352 for \f$ i = 0 , \ldots , n-1 \f$, and
00353 \f$ j = 0 , \ldots , n-1 \f$, 
00354 \f[
00355         hes[ i * n + j ] = V^{(2)} (x)_{i,j}
00356 \f]
00357 
00358 \return
00359 If <tt>hes.size() == 0</tt>, the return value is not defined.
00360 Otherwise,
00361 the return value is the sign of the determinant for
00362 \f$ \partial_{yy}^2 F(x , y) \f$$.
00363 If it is zero, then the matrix is singular and \c hes is not set
00364 to its specified value.
00365 */
00366 
00367 
00368 template <class BaseVector, class Fun>
00369 int opt_val_hes(
00370         const BaseVector&   x     , 
00371         const BaseVector&   y     , 
00372         Fun                 fun   , 
00373         BaseVector&         jac   ,
00374         BaseVector&         hes   )
00375 {       // determine the base type
00376         typedef typename BaseVector::value_type Base;
00377 
00378         // check that BaseVector is a SimpleVector class with Base elements
00379         CheckSimpleVector<Base, BaseVector>();
00380 
00381         // determine the AD vector type
00382         typedef typename Fun::ad_vector ad_vector;
00383 
00384         // check that ad_vector is a SimpleVector class with AD<Base> elements
00385         CheckSimpleVector< AD<Base> , ad_vector >();
00386 
00387         // size of the x and y spaces
00388         size_t n = x.size();
00389         size_t m = y.size();
00390 
00391         // number of terms in the summation
00392         size_t ell = fun.ell();
00393 
00394         // check size of return values
00395         CPPAD_ASSERT_KNOWN(
00396                 jac.size() == n || jac.size() == 0,
00397                 "opt_val_hes: size of the vector jac is not equal to n or zero"
00398         );
00399         CPPAD_ASSERT_KNOWN(
00400                 hes.size() == n * n || hes.size() == 0,
00401                 "opt_val_hes: size of the vector hes is not equal to n * n or zero"
00402         );
00403 
00404         // some temporary indices
00405         size_t i, j, k;
00406 
00407         // AD version of S_k(x, y)
00408         ad_vector s_k(1);
00409 
00410         // ADFun version of S_k(x, y)
00411         ADFun<Base> S_k;
00412 
00413         // AD version of x 
00414         ad_vector a_x(n);
00415 
00416         // AD version of y
00417         ad_vector a_y(n);
00418 
00419         if( jac.size() > 0  )
00420         {       // this is the easy part, computing the V^{(1)} (x) which is equal 
00421                 // to \partial_x F (x, y) (see Thoerem 2 of the reference).
00422 
00423                 // copy x and y to AD version
00424                 for(j = 0; j < n; j++)
00425                         a_x[j] = x[j];
00426                 for(j = 0; j < m; j++)
00427                         a_y[j] = y[j];
00428 
00429                 // initialize summation
00430                 for(j = 0; j < n; j++)
00431                         jac[j] = Base(0.);
00432 
00433                 // add in \partial_x S_k (x, y)
00434                 for(k = 0; k < ell; k++)
00435                 {       // start recording
00436                         Independent(a_x);
00437                         // record
00438                         s_k[0] = fun.s(k, a_x, a_y);
00439                         // stop recording and store in S_k
00440                         S_k.Dependent(a_x, s_k);
00441                         // compute partial of S_k with respect to x
00442                         BaseVector jac_k = S_k.Jacobian(x);
00443                         // add \partial_x S_k (x, y) to jac
00444                         for(j = 0; j < n; j++)
00445                                 jac[j] += jac_k[j];                      
00446                 }
00447         }
00448         // check if we are done
00449         if( hes.size() == 0 )
00450                 return 0;
00451 
00452         /*
00453         In this case, we need to compute the Hessian. Using Theorem 1 of the
00454         reference:
00455                 Y^{(1)}(x) = - F_yy (x, y)^{-1} F_yx (x, y)
00456         Using Theorem 2 of the reference:
00457                 V^{(2)}(x) = F_xx (x, y) + F_xy (x, y)  Y^{(1)}(x) 
00458         */
00459         // Base and AD version of xy
00460         BaseVector xy(n + m);
00461         ad_vector a_xy(n + m);
00462         for(j = 0; j < n; j++)
00463                 a_xy[j] = xy[j] = x[j]; 
00464         for(j = 0; j < m; j++)
00465                 a_xy[n+j] = xy[n+j] = y[j]; 
00466 
00467         // Initialization summation for Hessian of F
00468         size_t nm_sq = (n + m) * (n + m);
00469         BaseVector F_hes(nm_sq);
00470         for(j = 0; j < nm_sq; j++)
00471                 F_hes[j] = Base(0.);
00472         BaseVector hes_k(nm_sq); 
00473 
00474         // add in Hessian of S_k to hes
00475         for(k = 0; k < ell; k++)
00476         {       // start recording
00477                 Independent(a_xy);
00478                 // split out x
00479                 for(j = 0; j < n; j++)
00480                         a_x[j] = a_xy[j];
00481                 // split out y
00482                 for(j = 0; j < m; j++)
00483                         a_y[j] = a_xy[n+j];
00484                 // record
00485                 s_k[0] = fun.s(k, a_x, a_y);
00486                 // stop recording and store in S_k
00487                 S_k.Dependent(a_xy, s_k);
00488                 // when computing the Hessian it pays to optimize the tape
00489                 S_k.optimize();
00490                 // compute Hessian of S_k 
00491                 hes_k = S_k.Hessian(xy, 0);
00492                 // add \partial_x S_k (x, y) to jac
00493                 for(j = 0; j < nm_sq; j++)
00494                         F_hes[j] += hes_k[j];                    
00495         }
00496         // Extract F_yx
00497         BaseVector F_yx(m * n);
00498         for(i = 0; i < m; i++)
00499         {       for(j = 0; j < n; j++)
00500                         F_yx[i * n + j] = F_hes[ (i+n)*(n+m) + j ];
00501         } 
00502         // Extract F_yy
00503         BaseVector F_yy(n * m);
00504         for(i = 0; i < m; i++)
00505         {       for(j = 0; j < m; j++)
00506                         F_yy[i * m + j] = F_hes[ (i+n)*(n+m) + j + n ];
00507         } 
00508 
00509         // compute - Y^{(1)}(x) = F_yy (x, y)^{-1} F_yx (x, y)
00510         BaseVector neg_Y_x(m * n);
00511         Base logdet;
00512         int signdet = CppAD::LuSolve(m, n, F_yy, F_yx, neg_Y_x, logdet);
00513         if( signdet == 0 )
00514                 return signdet;
00515 
00516         // compute hes = F_xx (x, y) + F_xy (x, y)  Y^{(1)}(x) 
00517         for(i = 0; i < n; i++)
00518         {       for(j = 0; j < n; j++)
00519                 {       hes[i * n + j] = F_hes[ i*(n+m) + j ];
00520                         for(k = 0; k < m; k++)
00521                                 hes[i*n+j] -= F_hes[i*(n+m) + k+n] * neg_Y_x[k*n+j];
00522                 }
00523         }
00524         return signdet;
00525 }
00526 
00527 CPPAD_END_NAMESPACE
00528 
00529 # endif