CppAD: A C++ Algorithmic Differentiation Package  20130102
opt_val_hes.hpp
Go to the documentation of this file.
00001 /* $Id$ */
00002 # ifndef CPPAD_OPT_VAL_HES_INCLUDED
00003 # define CPPAD_OPT_VAL_HES_INCLUDED
00004 
00005 /* --------------------------------------------------------------------------
00006 CppAD: C++ Algorithmic Differentiation: Copyright (C) 2003-12 Bradley M. Bell
00007 
00008 CppAD is distributed under multiple licenses. This distribution is under
00009 the terms of the 
00010                     Eclipse Public License Version 1.0.
00011 
00012 A copy of this license is included in the COPYING file of this distribution.
00013 Please visit http://www.coin-or.org/CppAD/ for information on other licenses.
00014 -------------------------------------------------------------------------- */
00015 /*
00016 $begin opt_val_hes$$
00017 $spell
00018      hes
00019      sy
00020      Jacobian
00021      hes
00022      signdet
00023      jac
00024      Bradley
00025      const
00026      CppAD
00027 $$
00028 
00029 
00030 $index Jacobian$$
00031 $index Hessian$$
00032 $index optimal value$$
00033 $index value$$
00034 $index opt_val_hes$$
00035 
00036 $section Jacobian and Hessian of Optimal Values$$
00037 
00038 $head Syntax$$
00039 $icode%signdet% = opt_val_hes(%x%, %y%, %fun%, %jac%, %hes%)%$$  
00040 
00041 $head See Also$$
00042 $cref BenderQuad$$
00043 
00044 $head Reference$$
00045 Algorithmic differentiation of implicit functions and optimal values,
00046 Bradley M. Bell and James V. Burke, Advances in Automatic Differentiation,
00047 2008, Springer.
00048 
00049 $head Purpose$$
00050 We are given a function 
00051 $latex S : \B{R}^n \times \B{R}^m \rightarrow \B{R}^\ell$$
00052 and we define $latex F : \B{R}^n \times \B{R}^m \rightarrow \B{R}$$
00053 and $latex V : \B{R}^n \rightarrow \B{R} $$ by
00054 $latex \[
00055 \begin{array}{rcl}
00056      F(x, y) & = & \sum_{k=0}^{\ell-1} S_k ( x , y) 
00057      \\
00058      V(x)    & = & F [ x , Y(x) ] 
00059      \\
00060      0       & = & \partial_y F [x , Y(x) ]
00061 \end{array}
00062 \] $$
00063 We wish to compute the Jacobian 
00064 and possibly also the Hessian, of $latex V (x)$$.
00065 
00066 $head BaseVector$$
00067 The type $icode BaseVector$$ must be a 
00068 $cref SimpleVector$$ class. 
00069 We use $icode Base$$ to refer to the type of the elements of 
00070 $icode BaseVector$$; i.e.,
00071 $codei%
00072      %BaseVector%::value_type
00073 %$$
00074 
00075 $head x$$
00076 The argument $icode x$$ has prototype
00077 $codei%
00078      const %BaseVector%& %x%
00079 %$$
00080 and its size must be equal to $icode n$$.
00081 It specifies the point at which we evaluating 
00082 the Jacobian $latex V^{(1)} (x)$$
00083 (and possibly the Hessian $latex V^{(2)} (x)$$).
00084 
00085 
00086 $head y$$
00087 The argument $icode y$$ has prototype
00088 $codei%
00089      const %BaseVector%& %y%
00090 %$$
00091 and its size must be equal to $icode m$$.
00092 It must be equal to $latex Y(x)$$; i.e., 
00093 it must solve the implicit equation
00094 $latex \[
00095      0 = \partial_y F ( x , y) 
00096 \] $$
00097 
00098 $head Fun$$
00099 The argument $icode fun$$ is an object of type $icode Fun$$ 
00100 which must support the member functions listed below.
00101 CppAD will may be recording operations of the type $code%AD<%Base%>%$$
00102 when these member functions are called.
00103 These member functions must not stop such a recording; e.g.,
00104 they must not call $cref/AD<Base>::abort_recording/abort_recording/$$.
00105 
00106 $subhead Fun::ad_vector$$
00107 The type $icode%Fun%::ad_vector%$$ must be a 
00108 $cref SimpleVector$$ class with elements of type $codei%AD<%Base%>%$$; i.e.
00109 $codei%
00110      %Fun%::ad_vector::value_type
00111 %$$
00112 is equal to $codei%AD<%Base%>%$$.
00113 
00114 $subhead fun.ell$$
00115 The type $icode Fun$$ must support the syntax
00116 $codei%
00117      %ell% = %fun%.ell()
00118 %$$
00119 where $icode ell$$ has prototype
00120 $codei%
00121      size_t %ell%
00122 %$$
00123 and is the value of $latex \ell$$; i.e.,
00124 the number of terms in the summation.
00125 $pre
00126 
00127 $$
00128 One can choose $icode ell$$ equal to one, and have 
00129 $latex S(x,y)$$ the same as $latex F(x, y)$$.
00130 Each of the functions $latex S_k (x , y)$$, 
00131 (in the summation defining $latex F(x, y)$$)
00132 is differentiated separately using AD.
00133 For very large problems, breaking $latex F(x, y)$$ into the sum
00134 of separate simpler functions may reduce the amount of memory necessary for
00135 algorithmic differentiation and there by speed up the process.
00136 
00137 $subhead fun.s$$
00138 The type $icode Fun$$ must support the syntax
00139 $codei%
00140      %s_k% = %fun%.s(%k%, %x%, %y%)
00141 %$$
00142 The $icode%fun%.s%$$ argument $icode k$$ has prototype
00143 $codei%
00144      size_t %k%
00145 %$$
00146 and is between zero and $icode%ell% - 1%$$.
00147 The argument $icode x$$ to $icode%fun%.s%$$ has prototype
00148 $codei%
00149      const %Fun%::ad_vector& %x%
00150 %$$
00151 and its size must be equal to $icode n$$.
00152 The argument $icode y$$ to $icode%fun%.s%$$ has prototype
00153 $codei%
00154      const %Fun%::ad_vector& %y%
00155 %$$
00156 and its size must be equal to $icode m$$.
00157 The $icode%fun%.s%$$ result $icode s_k$$ has prototype
00158 $codei%
00159      AD<%Base%> %s_k%
00160 %$$
00161 and its value must be given by $latex s_k = S_k ( x , y )$$.
00162 
00163 $subhead fun.sy$$
00164 The type $icode Fun$$ must support the syntax
00165 $codei%
00166      %sy_k% = %fun%.sy(%k%, %x%, %y%)
00167 %$$
00168 The  argument $icode k$$ to $icode%fun%.sy%$$ has prototype
00169 $codei%
00170      size_t %k%
00171 %$$
00172 The  argument $icode x$$ to $icode%fun%.sy%$$ has prototype
00173 $codei%
00174      const %Fun%::ad_vector& %x%
00175 %$$
00176 and its size must be equal to $icode n$$.
00177 The  argument $icode y$$ to $icode%fun%.sy%$$ has prototype
00178 $codei%
00179      const %Fun%::ad_vector& %y%
00180 %$$
00181 and its size must be equal to $icode m$$.
00182 The $icode%fun%.sy%$$ result $icode sy_k$$ has prototype
00183 $codei%
00184      %Fun%::ad_vector %sy_k%
00185 %$$
00186 its size must be equal to $icode m$$,
00187 and its value must be given by $latex sy_k = \partial_y S_k ( x , y )$$.
00188 
00189 $head jac$$
00190 The argument $icode jac$$ has prototype
00191 $codei%
00192      %BaseVector%& %jac%
00193 %$$
00194 and has size $icode n$$ or zero.
00195 The input values of its elements do not matter.
00196 If it has size zero, it is not affected. Otherwise, on output
00197 it contains the Jacobian of $latex V (x)$$; i.e.,
00198 for $latex j = 0 , \ldots , n-1$$, 
00199 $latex \[
00200      jac[ j ] = V^{(1)} (x)_j
00201 \] $$
00202 where $icode x$$ is the first argument to $code opt_val_hes$$.
00203 
00204 $head hes$$
00205 The argument $icode hes$$ has prototype
00206 $codei%
00207      %BaseVector%& %hes%
00208 %$$
00209 and has size $icode%n% * %n%$$ or zero.
00210 The input values of its elements do not matter.
00211 If it has size zero, it is not affected. Otherwise, on output
00212 it contains the Hessian of $latex V (x)$$; i.e.,
00213 for $latex i = 0 , \ldots , n-1$$, and
00214 $latex j = 0 , \ldots , n-1$$, 
00215 $latex \[
00216      hes[ i * n + j ] = V^{(2)} (x)_{i,j}
00217 \] $$
00218 
00219 
00220 $head signdet$$
00221 If $icode%hes%$$ has size zero, $icode signdet$$ is not defined.
00222 Otherwise
00223 the return value $icode signdet$$ is the sign of the determinant for
00224 $latex \partial_{yy}^2 F(x , y) $$.
00225 If it is zero, then the matrix is singular and 
00226 the Hessian is not computed ($icode hes$$ is not changed).
00227 
00228 $head Example$$
00229 $children%
00230      example/opt_val_hes.cpp
00231 %$$
00232 The file
00233 $cref opt_val_hes.cpp$$
00234 contains an example and test of this operation.   
00235 It returns true if it succeeds and false otherwise.
00236 
00237 $end
00238 -----------------------------------------------------------------------------
00239 */
00240 
00241 CPPAD_BEGIN_NAMESPACE
00242 /*!
00243 \defgroup opt_val_hes_hpp opt_val_hes.hpp
00244 \{
00245 \file opt_val_hes.hpp
00246 \brief Computing Jabobians and Hessians of Optimal Values
00247 */
00248 
00249 /*!
00250 Computing Jabobians and Hessians of Optimal Values
00251 
00252 We are given a function 
00253 \f$ S : {\rm R}^n \times {\rm R}^m \rightarrow {\rm R}^\ell \f$
00254 and we define \f$ F : {\rm R}^n \times {\rm R}^m \rightarrow {\rm R} \f$ 
00255 and \f$ V : {\rm R}^n \rightarrow {\rm R}  \f$ by
00256 \f[
00257 \begin{array}{rcl}
00258      F(x, y) & = & \sum_{k=0}^{\ell-1} S_k ( x , y) 
00259      \\
00260      V(x)    & = & F [ x , Y(x) ] 
00261      \\
00262      0       & = & \partial_y F [x , Y(x) ]
00263 \end{array}
00264 \f]
00265 We wish to compute the Jacobian 
00266 and possibly also the Hessian, of \f$ V (x) \f$.
00267 
00268 \tparam BaseVector
00269 The type \c BaseVector must be a SimpleVector class. 
00270 We use \c Base to refer to the type of the elements of 
00271 \c BaseVector; i.e.,
00272 <tt>BaseVector::value_type</tt>.
00273 
00274 \param x
00275 is a vector with size \c n.
00276 It specifies the point at which we evaluating 
00277 the Jacobian \f$ V^{(1)} (x) \f$
00278 (and possibly the Hessian \f$ V^{(2)} (x) \f$).
00279 
00280 
00281 \param y
00282 is a vector with size \c m.
00283 It must be equal to \f$ Y(x) \f$; i.e., 
00284 it must solve the implicit equation
00285 \f[
00286      0 = \partial_y F ( x , y) 
00287 \f]
00288 
00289 \param fun
00290 The argument \c fun is an object of type \c Fun
00291 wich must support the member functions listed below.
00292 CppAD will may be recording operations of the type  \c AD<Base>
00293 when these member functions are called.
00294 These member functions must not stop such a recording; e.g.,
00295 they must not call \c AD<Base>::abort_recording.
00296 
00297 \par Fun::ad_vector</tt>
00298 The type <tt>Fun::ad_vector</tt> must be a 
00299 SimpleVector class with elements of type  \c AD<Base>; i.e.
00300 <tt>Fun::ad_vector::value_type</tt>
00301 is equal to  \c AD<Base>.
00302 
00303 \par fun.ell
00304 the type \c Fun must support the syntax
00305 \verbatim
00306      ell = fun.ell()
00307 \endverbatim
00308 where \c ell is a \c size_t value that is set to \f$ \ell \f$; i.e.,
00309 the number of terms in the summation.
00310 
00311 \par fun.s
00312 The type \c Fun must support the syntax
00313 \verbatim
00314      s_k = fun.s(k, x, y)
00315 \endverbatim
00316 The argument \c k has prototype <tt>size_t k</tt>.
00317 The argument \c x has prototype <tt>const Fun::ad_vector& x</tt>
00318 and its size must be equal to \c n.
00319 The argument \c y has prototype <tt>const Fun::ad_vector& y</tt>
00320 and its size must be equal to \c m.
00321 The return value \c s_k has prototype \c AD<Base> s_k
00322 and its value must be given by \f$ s_k = S_k ( x , y ) \f$.
00323 
00324 \par fun.sy
00325 The type \c Fun must support the syntax
00326 \verbatim
00327      sy_k = fun.sy(k, x, y)
00328 \endverbatim
00329 The argument \c k has prototype <tt>size_t k</tt>.
00330 The argument \c x has prototype <tt>const Fun::ad_vector& x</tt>
00331 and its size must be equal to \c n.
00332 The argument \c y has prototype <tt>const Fun::ad_vector& y</tt>
00333 and its size must be equal to \c m.
00334 The return value \c sy_k has prototype <tt>Fun::ad_vector& sy_k</tt>,
00335 its size is \c m
00336 and its value must be given by \f$ sy_k = \partial_y S_k ( x , y ) \f$.
00337 
00338 \param jac
00339 is a vector with size \c n or zero.
00340 The input values of its elements do not matter.
00341 If it has size zero, it is not affected. Otherwise, on output
00342 it contains the Jacobian of \f$ V (x) \f$; i.e.,
00343 for \f$ j = 0 , \ldots , n-1 \f$, 
00344 \f[
00345      jac[ j ] = V^{(1)} (x)_j
00346 \f] $$
00347 where \c x is the first argument to \c opt_val_hes.
00348 
00349 \param hes
00350 is a vector with size <tt>n * n</tt> or zero.
00351 The input values of its elements do not matter.
00352 If it has size zero, it is not affected. Otherwise, on output
00353 it contains the Hessian of \f$ V (x) \f$; i.e.,
00354 for \f$ i = 0 , \ldots , n-1 \f$, and
00355 \f$ j = 0 , \ldots , n-1 \f$, 
00356 \f[
00357      hes[ i * n + j ] = V^{(2)} (x)_{i,j}
00358 \f]
00359 
00360 \return
00361 If <tt>hes.size() == 0</tt>, the return value is not defined.
00362 Otherwise,
00363 the return value is the sign of the determinant for
00364 \f$ \partial_{yy}^2 F(x , y) \f$$.
00365 If it is zero, then the matrix is singular and \c hes is not set
00366 to its specified value.
00367 */
00368 
00369 
00370 template <class BaseVector, class Fun>
00371 int opt_val_hes(
00372      const BaseVector&   x     , 
00373      const BaseVector&   y     , 
00374      Fun                 fun   , 
00375      BaseVector&         jac   ,
00376      BaseVector&         hes   )
00377 {    // determine the base type
00378      typedef typename BaseVector::value_type Base;
00379 
00380      // check that BaseVector is a SimpleVector class with Base elements
00381      CheckSimpleVector<Base, BaseVector>();
00382 
00383      // determine the AD vector type
00384      typedef typename Fun::ad_vector ad_vector;
00385 
00386      // check that ad_vector is a SimpleVector class with AD<Base> elements
00387      CheckSimpleVector< AD<Base> , ad_vector >();
00388 
00389      // size of the x and y spaces
00390      size_t n = size_t(x.size());
00391      size_t m = size_t(y.size());
00392 
00393      // number of terms in the summation
00394      size_t ell = fun.ell();
00395 
00396      // check size of return values
00397      CPPAD_ASSERT_KNOWN(
00398           size_t(jac.size()) == n || jac.size() == 0,
00399           "opt_val_hes: size of the vector jac is not equal to n or zero"
00400      );
00401      CPPAD_ASSERT_KNOWN(
00402           size_t(hes.size()) == n * n || hes.size() == 0,
00403           "opt_val_hes: size of the vector hes is not equal to n * n or zero"
00404      );
00405 
00406      // some temporary indices
00407      size_t i, j, k;
00408 
00409      // AD version of S_k(x, y)
00410      ad_vector s_k(1);
00411 
00412      // ADFun version of S_k(x, y)
00413      ADFun<Base> S_k;
00414 
00415      // AD version of x 
00416      ad_vector a_x(n);
00417 
00418      // AD version of y
00419      ad_vector a_y(n);
00420 
00421      if( jac.size() > 0  )
00422      {    // this is the easy part, computing the V^{(1)} (x) which is equal 
00423           // to \partial_x F (x, y) (see Thoerem 2 of the reference).
00424 
00425           // copy x and y to AD version
00426           for(j = 0; j < n; j++)
00427                a_x[j] = x[j];
00428           for(j = 0; j < m; j++)
00429                a_y[j] = y[j];
00430 
00431           // initialize summation
00432           for(j = 0; j < n; j++)
00433                jac[j] = Base(0.);
00434 
00435           // add in \partial_x S_k (x, y)
00436           for(k = 0; k < ell; k++)
00437           {    // start recording
00438                Independent(a_x);
00439                // record
00440                s_k[0] = fun.s(k, a_x, a_y);
00441                // stop recording and store in S_k
00442                S_k.Dependent(a_x, s_k);
00443                // compute partial of S_k with respect to x
00444                BaseVector jac_k = S_k.Jacobian(x);
00445                // add \partial_x S_k (x, y) to jac
00446                for(j = 0; j < n; j++)
00447                     jac[j] += jac_k[j];            
00448           }
00449      }
00450      // check if we are done
00451      if( hes.size() == 0 )
00452           return 0;
00453 
00454      /*
00455      In this case, we need to compute the Hessian. Using Theorem 1 of the
00456      reference:
00457           Y^{(1)}(x) = - F_yy (x, y)^{-1} F_yx (x, y)
00458      Using Theorem 2 of the reference:
00459           V^{(2)}(x) = F_xx (x, y) + F_xy (x, y)  Y^{(1)}(x) 
00460      */
00461      // Base and AD version of xy
00462      BaseVector xy(n + m);
00463      ad_vector a_xy(n + m);
00464      for(j = 0; j < n; j++)
00465           a_xy[j] = xy[j] = x[j]; 
00466      for(j = 0; j < m; j++)
00467           a_xy[n+j] = xy[n+j] = y[j]; 
00468 
00469      // Initialization summation for Hessian of F
00470      size_t nm_sq = (n + m) * (n + m);
00471      BaseVector F_hes(nm_sq);
00472      for(j = 0; j < nm_sq; j++)
00473           F_hes[j] = Base(0.);
00474      BaseVector hes_k(nm_sq); 
00475 
00476      // add in Hessian of S_k to hes
00477      for(k = 0; k < ell; k++)
00478      {    // start recording
00479           Independent(a_xy);
00480           // split out x
00481           for(j = 0; j < n; j++)
00482                a_x[j] = a_xy[j];
00483           // split out y
00484           for(j = 0; j < m; j++)
00485                a_y[j] = a_xy[n+j];
00486           // record
00487           s_k[0] = fun.s(k, a_x, a_y);
00488           // stop recording and store in S_k
00489           S_k.Dependent(a_xy, s_k);
00490           // when computing the Hessian it pays to optimize the tape
00491           S_k.optimize();
00492           // compute Hessian of S_k 
00493           hes_k = S_k.Hessian(xy, 0);
00494           // add \partial_x S_k (x, y) to jac
00495           for(j = 0; j < nm_sq; j++)
00496                F_hes[j] += hes_k[j];               
00497      }
00498      // Extract F_yx
00499      BaseVector F_yx(m * n);
00500      for(i = 0; i < m; i++)
00501      {    for(j = 0; j < n; j++)
00502                F_yx[i * n + j] = F_hes[ (i+n)*(n+m) + j ];
00503      } 
00504      // Extract F_yy
00505      BaseVector F_yy(n * m);
00506      for(i = 0; i < m; i++)
00507      {    for(j = 0; j < m; j++)
00508                F_yy[i * m + j] = F_hes[ (i+n)*(n+m) + j + n ];
00509      } 
00510 
00511      // compute - Y^{(1)}(x) = F_yy (x, y)^{-1} F_yx (x, y)
00512      BaseVector neg_Y_x(m * n);
00513      Base logdet;
00514      int signdet = CppAD::LuSolve(m, n, F_yy, F_yx, neg_Y_x, logdet);
00515      if( signdet == 0 )
00516           return signdet;
00517 
00518      // compute hes = F_xx (x, y) + F_xy (x, y)  Y^{(1)}(x) 
00519      for(i = 0; i < n; i++)
00520      {    for(j = 0; j < n; j++)
00521           {    hes[i * n + j] = F_hes[ i*(n+m) + j ];
00522                for(k = 0; k < m; k++)
00523                     hes[i*n+j] -= F_hes[i*(n+m) + k+n] * neg_Y_x[k*n+j];
00524           }
00525      }
00526      return signdet;
00527 }
00528 
00529 /*! \} */
00530 CPPAD_END_NAMESPACE
00531 
00532 # endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines