CppAD: A C++ Algorithmic Differentiation Package 20110419
|
00001 /* $Id$ */ 00002 # ifndef CPPAD_OPT_VAL_HES_INCLUDED 00003 # define CPPAD_OPT_VAL_HES_INCLUDED 00004 00005 /* -------------------------------------------------------------------------- 00006 CppAD: C++ Algorithmic Differentiation: Copyright (C) 2003-10 Bradley M. Bell 00007 00008 CppAD is distributed under multiple licenses. This distribution is under 00009 the terms of the 00010 Common Public License Version 1.0. 00011 00012 A copy of this license is included in the COPYING file of this distribution. 00013 Please visit http://www.coin-or.org/CppAD/ for information on other licenses. 00014 -------------------------------------------------------------------------- */ 00015 /* 00016 $begin opt_val_hes$$ 00017 $spell 00018 hes 00019 sy 00020 Jacobian 00021 hes 00022 signdet 00023 jac 00024 Bradley 00025 const 00026 CppAD 00027 $$ 00028 00029 00030 $index Jacobian$$ 00031 $index Hessian$$ 00032 $index optimal value$$ 00033 $index value$$ 00034 $index opt_val_hes$$ 00035 00036 $section Jacobian and Hessian of Optimal Values$$ 00037 00038 $head Syntax$$ 00039 $icode%signdet% = opt_val_hes(%x%, %y%, %fun%, %jac%, %hes%)%$$ 00040 00041 $head See Also$$ 00042 $cref/BenderQuad/$$ 00043 00044 $head Reference$$ 00045 Algorithmic differentiation of implicit functions and optimal values, 00046 Bradley M. Bell and James V. Burke, Advances in Automatic Differentiation, 00047 2008, Springer. 00048 00049 $head Purpose$$ 00050 We are given a function 00051 $latex S : \R^n \times \R^m \rightarrow \R^\ell$$ 00052 and we define $latex F : \R^n \times \R^m \rightarrow \R$$ 00053 and $latex V : \R^n \rightarrow \R $$ by 00054 $latex \[ 00055 \begin{array}{rcl} 00056 F(x, y) & = & \sum_{k=0}^{\ell-1} S_k ( x , y) 00057 \\ 00058 V(x) & = & F [ x , Y(x) ] 00059 \\ 00060 0 & = & \partial_y F [x , Y(x) ] 00061 \end{array} 00062 \] $$ 00063 We wish to compute the Jacobian 00064 and possibly also the Hessian, of $latex V (x)$$. 00065 00066 $head BaseVector$$ 00067 The type $icode BaseVector$$ must be a 00068 $xref/SimpleVector/$$ class. 00069 We use $icode Base$$ to refer to the type of the elements of 00070 $icode BaseVector$$; i.e., 00071 $codei% 00072 %BaseVector%::value_type 00073 %$$ 00074 00075 $head x$$ 00076 The argument $icode x$$ has prototype 00077 $codei% 00078 const %BaseVector%& %x% 00079 %$$ 00080 and its size must be equal to $icode n$$. 00081 It specifies the point at which we evaluating 00082 the Jacobian $latex V^{(1)} (x)$$ 00083 (and possibly the Hessian $latex V^{(2)} (x)$$). 00084 00085 00086 $head y$$ 00087 The argument $icode y$$ has prototype 00088 $codei% 00089 const %BaseVector%& %y% 00090 %$$ 00091 and its size must be equal to $icode m$$. 00092 It must be equal to $latex Y(x)$$; i.e., 00093 it must solve the implicit equation 00094 $latex \[ 00095 0 = \partial_y F ( x , y) 00096 \] $$ 00097 00098 $head Fun$$ 00099 The argument $icode fun$$ is an object of type $icode Fun$$ 00100 which must support the member functions listed below. 00101 CppAD will may be recording operations of the type $code%AD<%Base%>%$$ 00102 when these member functions are called. 00103 These member functions must not stop such a recording; e.g., 00104 they must not call $cref/AD<Base>::abort_recording/abort_recording/$$. 00105 00106 $subhead Fun::ad_vector$$ 00107 The type $icode%Fun%::ad_vector%$$ must be a 00108 $cref/SimpleVector/$$ class with elements of type $codei%AD<%Base%>%$$; i.e. 00109 $codei% 00110 %Fun%::ad_vector::value_type 00111 %$$ 00112 is equal to $codei%AD<%Base%>%$$. 00113 00114 $subhead fun.ell$$ 00115 The type $icode Fun$$ must support the syntax 00116 $codei% 00117 %ell% = %fun%.ell() 00118 %$$ 00119 where $icode ell$$ has prototype 00120 $codei% 00121 size_t %ell% 00122 %$$ 00123 and is the value of $latex \ell$$; i.e., 00124 the number of terms in the summation. 00125 $pre 00126 00127 $$ 00128 One can choose $icode ell$$ equal to one, and have 00129 $latex S(x,y)$$ the same as $latex F(x, y)$$. 00130 Each of the functions $latex S_k (x , y)$$, 00131 (in the summation defining $latex F(x, y)$$) 00132 is differentiated separately using AD. 00133 For very large problems, breaking $latex F(x, y)$$ into the sum 00134 of separate simpler functions may reduce the amount of memory necessary for 00135 algorithmic differentiation and there by speed up the process. 00136 00137 $subhead fun.s$$ 00138 The type $icode Fun$$ must support the syntax 00139 $codei% 00140 %s_k% = %fun%.s(%k%, %x%, %y%) 00141 %$$ 00142 The $icode%fun%.s%$$ argument $icode k$$ has prototype 00143 $codei% 00144 size_t %k% 00145 %$$ 00146 and is between zero and $icode%ell% - 1%$$. 00147 The argument $icode x$$ to $icode%fun%.s%$$ has prototype 00148 $codei% 00149 const %Fun%::ad_vector& %x% 00150 %$$ 00151 and its size must be equal to $icode n$$. 00152 The argument $icode y$$ to $icode%fun%.s%$$ has prototype 00153 $codei% 00154 const %Fun%::ad_vector& %y% 00155 %$$ 00156 and its size must be equal to $icode m$$. 00157 The $icode%fun%.s%$$ result $icode s_k$$ has prototype 00158 $codei% 00159 AD<%Base%> %s_k% 00160 %$$ 00161 and its value must be given by $latex s_k = S_k ( x , y )$$. 00162 00163 $subhead fun.sy$$ 00164 The type $icode Fun$$ must support the syntax 00165 $codei% 00166 %sy_k% = %fun%.sy(%k%, %x%, %y%) 00167 %$$ 00168 The argument $icode k$$ to $icode%fun%.sy%$$ has prototype 00169 $codei% 00170 size_t %k% 00171 %$$ 00172 The argument $icode x$$ to $icode%fun%.sy%$$ has prototype 00173 $codei% 00174 const %Fun%::ad_vector& %x% 00175 %$$ 00176 and its size must be equal to $icode n$$. 00177 The argument $icode y$$ to $icode%fun%.sy%$$ has prototype 00178 $codei% 00179 const %Fun%::ad_vector& %y% 00180 %$$ 00181 and its size must be equal to $icode m$$. 00182 The $icode%fun%.sy%$$ result $icode sy_k$$ has prototype 00183 $codei% 00184 %Fun%::ad_vector %sy_k% 00185 %$$ 00186 its size must be equal to $icode m$$, 00187 and its value must be given by $latex sy_k = \partial_y S_k ( x , y )$$. 00188 00189 $head jac$$ 00190 The argument $icode jac$$ has prototype 00191 $codei% 00192 %BaseVector%& %jac% 00193 %$$ 00194 and has size $icode n$$ or zero. 00195 The input values of its elements do not matter. 00196 If it has size zero, it is not affected. Otherwise, on output 00197 it contains the Jacobian of $latex V (x)$$; i.e., 00198 for $latex j = 0 , \ldots , n-1$$, 00199 $latex \[ 00200 jac[ j ] = V^{(1)} (x)_j 00201 \] $$ 00202 where $icode x$$ is the first argument to $code opt_val_hes$$. 00203 00204 $head hes$$ 00205 The argument $icode hes$$ has prototype 00206 $codei% 00207 %BaseVector%& %hes% 00208 %$$ 00209 and has size $icode%n% * %n%$$ or zero. 00210 The input values of its elements do not matter. 00211 If it has size zero, it is not affected. Otherwise, on output 00212 it contains the Hessian of $latex V (x)$$; i.e., 00213 for $latex i = 0 , \ldots , n-1$$, and 00214 $latex j = 0 , \ldots , n-1$$, 00215 $latex \[ 00216 hes[ i * n + j ] = V^{(2)} (x)_{i,j} 00217 \] $$ 00218 00219 00220 $head signdet$$ 00221 If $icode%hes%$$ has size zero, $icode signdet$$ is not defined. 00222 Otherwise 00223 the return value $icode signdet$$ is the sign of the determinant for 00224 $latex \partial_{yy}^2 F(x , y) $$. 00225 If it is zero, then the matrix is singular and 00226 the Hessian is not computed ($icode hes$$ is not changed). 00227 00228 $head Example$$ 00229 $children% 00230 example/opt_val_hes.cpp 00231 %$$ 00232 The file 00233 $xref/opt_val_hes.cpp/$$ 00234 contains an example and test of this operation. 00235 It returns true if it succeeds and false otherwise. 00236 00237 $end 00238 ----------------------------------------------------------------------------- 00239 */ 00240 00241 CPPAD_BEGIN_NAMESPACE 00242 /*! 00243 \file opt_val_hes.hpp 00244 \brief Computing Jabobians and Hessians of Optimal Values 00245 */ 00246 00247 /*! 00248 Computing Jabobians and Hessians of Optimal Values 00249 00250 We are given a function 00251 \f$ S : {\rm R}^n \times {\rm R}^m \rightarrow {\rm R}^\ell \f$ 00252 and we define \f$ F : {\rm R}^n \times {\rm R}^m \rightarrow {\rm R} \f$ 00253 and \f$ V : {\rm R}^n \rightarrow {\rm R} \f$ by 00254 \f[ 00255 \begin{array}{rcl} 00256 F(x, y) & = & \sum_{k=0}^{\ell-1} S_k ( x , y) 00257 \\ 00258 V(x) & = & F [ x , Y(x) ] 00259 \\ 00260 0 & = & \partial_y F [x , Y(x) ] 00261 \end{array} 00262 \f] 00263 We wish to compute the Jacobian 00264 and possibly also the Hessian, of \f$ V (x) \f$. 00265 00266 \tparam BaseVector 00267 The type \c BaseVector must be a SimpleVector class. 00268 We use \c Base to refer to the type of the elements of 00269 \c BaseVector; i.e., 00270 <tt>BaseVector::value_type</tt>. 00271 00272 \param x 00273 is a vector with size \c n. 00274 It specifies the point at which we evaluating 00275 the Jacobian \f$ V^{(1)} (x) \f$ 00276 (and possibly the Hessian \f$ V^{(2)} (x) \f$). 00277 00278 00279 \param y 00280 is a vector with size \c m. 00281 It must be equal to \f$ Y(x) \f$; i.e., 00282 it must solve the implicit equation 00283 \f[ 00284 0 = \partial_y F ( x , y) 00285 \f] 00286 00287 \param fun 00288 The argument \c fun is an object of type \c Fun 00289 wich must support the member functions listed below. 00290 CppAD will may be recording operations of the type \c AD<Base> 00291 when these member functions are called. 00292 These member functions must not stop such a recording; e.g., 00293 they must not call \c AD<Base>::abort_recording. 00294 00295 \par Fun::ad_vector</tt> 00296 The type <tt>Fun::ad_vector</tt> must be a 00297 SimpleVector class with elements of type \c AD<Base>; i.e. 00298 <tt>Fun::ad_vector::value_type</tt> 00299 is equal to \c AD<Base>. 00300 00301 \par fun.ell 00302 the type \c Fun must support the syntax 00303 \verbatim 00304 ell = fun.ell() 00305 \endverbatim 00306 where \c ell is a \c size_t value that is set to \f$ \ell \f$; i.e., 00307 the number of terms in the summation. 00308 00309 \par fun.s 00310 The type \c Fun must support the syntax 00311 \verbatim 00312 s_k = fun.s(k, x, y) 00313 \endverbatim 00314 The argument \c k has prototype <tt>size_t k</tt>. 00315 The argument \c x has prototype <tt>const Fun::ad_vector& x</tt> 00316 and its size must be equal to \c n. 00317 The argument \c y has prototype <tt>const Fun::ad_vector& y</tt> 00318 and its size must be equal to \c m. 00319 The return value \c s_k has prototype \c AD<Base> s_k 00320 and its value must be given by \f$ s_k = S_k ( x , y ) \f$. 00321 00322 \par fun.sy 00323 The type \c Fun must support the syntax 00324 \verbatim 00325 sy_k = fun.sy(k, x, y) 00326 \endverbatim 00327 The argument \c k has prototype <tt>size_t k</tt>. 00328 The argument \c x has prototype <tt>const Fun::ad_vector& x</tt> 00329 and its size must be equal to \c n. 00330 The argument \c y has prototype <tt>const Fun::ad_vector& y</tt> 00331 and its size must be equal to \c m. 00332 The return value \c sy_k has prototype <tt>Fun::ad_vector& sy_k</tt>, 00333 its size is \c m 00334 and its value must be given by \f$ sy_k = \partial_y S_k ( x , y ) \f$. 00335 00336 \param jac 00337 is a vector with size \c n or zero. 00338 The input values of its elements do not matter. 00339 If it has size zero, it is not affected. Otherwise, on output 00340 it contains the Jacobian of \f$ V (x) \f$; i.e., 00341 for \f$ j = 0 , \ldots , n-1 \f$, 00342 \f[ 00343 jac[ j ] = V^{(1)} (x)_j 00344 \f] $$ 00345 where \c x is the first argument to \c opt_val_hes. 00346 00347 \param hes 00348 is a vector with size <tt>n * n</tt> or zero. 00349 The input values of its elements do not matter. 00350 If it has size zero, it is not affected. Otherwise, on output 00351 it contains the Hessian of \f$ V (x) \f$; i.e., 00352 for \f$ i = 0 , \ldots , n-1 \f$, and 00353 \f$ j = 0 , \ldots , n-1 \f$, 00354 \f[ 00355 hes[ i * n + j ] = V^{(2)} (x)_{i,j} 00356 \f] 00357 00358 \return 00359 If <tt>hes.size() == 0</tt>, the return value is not defined. 00360 Otherwise, 00361 the return value is the sign of the determinant for 00362 \f$ \partial_{yy}^2 F(x , y) \f$$. 00363 If it is zero, then the matrix is singular and \c hes is not set 00364 to its specified value. 00365 */ 00366 00367 00368 template <class BaseVector, class Fun> 00369 int opt_val_hes( 00370 const BaseVector& x , 00371 const BaseVector& y , 00372 Fun fun , 00373 BaseVector& jac , 00374 BaseVector& hes ) 00375 { // determine the base type 00376 typedef typename BaseVector::value_type Base; 00377 00378 // check that BaseVector is a SimpleVector class with Base elements 00379 CheckSimpleVector<Base, BaseVector>(); 00380 00381 // determine the AD vector type 00382 typedef typename Fun::ad_vector ad_vector; 00383 00384 // check that ad_vector is a SimpleVector class with AD<Base> elements 00385 CheckSimpleVector< AD<Base> , ad_vector >(); 00386 00387 // size of the x and y spaces 00388 size_t n = x.size(); 00389 size_t m = y.size(); 00390 00391 // number of terms in the summation 00392 size_t ell = fun.ell(); 00393 00394 // check size of return values 00395 CPPAD_ASSERT_KNOWN( 00396 jac.size() == n || jac.size() == 0, 00397 "opt_val_hes: size of the vector jac is not equal to n or zero" 00398 ); 00399 CPPAD_ASSERT_KNOWN( 00400 hes.size() == n * n || hes.size() == 0, 00401 "opt_val_hes: size of the vector hes is not equal to n * n or zero" 00402 ); 00403 00404 // some temporary indices 00405 size_t i, j, k; 00406 00407 // AD version of S_k(x, y) 00408 ad_vector s_k(1); 00409 00410 // ADFun version of S_k(x, y) 00411 ADFun<Base> S_k; 00412 00413 // AD version of x 00414 ad_vector a_x(n); 00415 00416 // AD version of y 00417 ad_vector a_y(n); 00418 00419 if( jac.size() > 0 ) 00420 { // this is the easy part, computing the V^{(1)} (x) which is equal 00421 // to \partial_x F (x, y) (see Thoerem 2 of the reference). 00422 00423 // copy x and y to AD version 00424 for(j = 0; j < n; j++) 00425 a_x[j] = x[j]; 00426 for(j = 0; j < m; j++) 00427 a_y[j] = y[j]; 00428 00429 // initialize summation 00430 for(j = 0; j < n; j++) 00431 jac[j] = Base(0.); 00432 00433 // add in \partial_x S_k (x, y) 00434 for(k = 0; k < ell; k++) 00435 { // start recording 00436 Independent(a_x); 00437 // record 00438 s_k[0] = fun.s(k, a_x, a_y); 00439 // stop recording and store in S_k 00440 S_k.Dependent(a_x, s_k); 00441 // compute partial of S_k with respect to x 00442 BaseVector jac_k = S_k.Jacobian(x); 00443 // add \partial_x S_k (x, y) to jac 00444 for(j = 0; j < n; j++) 00445 jac[j] += jac_k[j]; 00446 } 00447 } 00448 // check if we are done 00449 if( hes.size() == 0 ) 00450 return 0; 00451 00452 /* 00453 In this case, we need to compute the Hessian. Using Theorem 1 of the 00454 reference: 00455 Y^{(1)}(x) = - F_yy (x, y)^{-1} F_yx (x, y) 00456 Using Theorem 2 of the reference: 00457 V^{(2)}(x) = F_xx (x, y) + F_xy (x, y) Y^{(1)}(x) 00458 */ 00459 // Base and AD version of xy 00460 BaseVector xy(n + m); 00461 ad_vector a_xy(n + m); 00462 for(j = 0; j < n; j++) 00463 a_xy[j] = xy[j] = x[j]; 00464 for(j = 0; j < m; j++) 00465 a_xy[n+j] = xy[n+j] = y[j]; 00466 00467 // Initialization summation for Hessian of F 00468 size_t nm_sq = (n + m) * (n + m); 00469 BaseVector F_hes(nm_sq); 00470 for(j = 0; j < nm_sq; j++) 00471 F_hes[j] = Base(0.); 00472 BaseVector hes_k(nm_sq); 00473 00474 // add in Hessian of S_k to hes 00475 for(k = 0; k < ell; k++) 00476 { // start recording 00477 Independent(a_xy); 00478 // split out x 00479 for(j = 0; j < n; j++) 00480 a_x[j] = a_xy[j]; 00481 // split out y 00482 for(j = 0; j < m; j++) 00483 a_y[j] = a_xy[n+j]; 00484 // record 00485 s_k[0] = fun.s(k, a_x, a_y); 00486 // stop recording and store in S_k 00487 S_k.Dependent(a_xy, s_k); 00488 // when computing the Hessian it pays to optimize the tape 00489 S_k.optimize(); 00490 // compute Hessian of S_k 00491 hes_k = S_k.Hessian(xy, 0); 00492 // add \partial_x S_k (x, y) to jac 00493 for(j = 0; j < nm_sq; j++) 00494 F_hes[j] += hes_k[j]; 00495 } 00496 // Extract F_yx 00497 BaseVector F_yx(m * n); 00498 for(i = 0; i < m; i++) 00499 { for(j = 0; j < n; j++) 00500 F_yx[i * n + j] = F_hes[ (i+n)*(n+m) + j ]; 00501 } 00502 // Extract F_yy 00503 BaseVector F_yy(n * m); 00504 for(i = 0; i < m; i++) 00505 { for(j = 0; j < m; j++) 00506 F_yy[i * m + j] = F_hes[ (i+n)*(n+m) + j + n ]; 00507 } 00508 00509 // compute - Y^{(1)}(x) = F_yy (x, y)^{-1} F_yx (x, y) 00510 BaseVector neg_Y_x(m * n); 00511 Base logdet; 00512 int signdet = CppAD::LuSolve(m, n, F_yy, F_yx, neg_Y_x, logdet); 00513 if( signdet == 0 ) 00514 return signdet; 00515 00516 // compute hes = F_xx (x, y) + F_xy (x, y) Y^{(1)}(x) 00517 for(i = 0; i < n; i++) 00518 { for(j = 0; j < n; j++) 00519 { hes[i * n + j] = F_hes[ i*(n+m) + j ]; 00520 for(k = 0; k < m; k++) 00521 hes[i*n+j] -= F_hes[i*(n+m) + k+n] * neg_Y_x[k*n+j]; 00522 } 00523 } 00524 return signdet; 00525 } 00526 00527 CPPAD_END_NAMESPACE 00528 00529 # endif