CppAD: A C++ Algorithmic Differentiation Package
20130102
|
00001 /* $Id$ */ 00002 # ifndef CPPAD_OPTIMIZE_INCLUDED 00003 # define CPPAD_OPTIMIZE_INCLUDED 00004 00005 /* -------------------------------------------------------------------------- 00006 CppAD: C++ Algorithmic Differentiation: Copyright (C) 2003-12 Bradley M. Bell 00007 00008 CppAD is distributed under multiple licenses. This distribution is under 00009 the terms of the 00010 Eclipse Public License Version 1.0. 00011 00012 A copy of this license is included in the COPYING file of this distribution. 00013 Please visit http://www.coin-or.org/CppAD/ for information on other licenses. 00014 -------------------------------------------------------------------------- */ 00015 00016 /* 00017 $begin optimize$$ 00018 $spell 00019 Taylor 00020 var 00021 CppAD 00022 cppad 00023 $$ 00024 00025 $section Optimize an ADFun Object Tape$$ 00026 00027 $index optimize$$ 00028 $index tape, optimize$$ 00029 $index sequence, optimize operations$$ 00030 $index operations, optimize sequence$$ 00031 $index speed, optimize$$ 00032 $index memory, optimize$$ 00033 00034 $head Syntax$$ 00035 $icode%f%.optimize()%$$ 00036 00037 00038 $head Purpose$$ 00039 The operation sequence corresponding to an $cref ADFun$$ object can 00040 be very large and involve many operations; see the 00041 size functions in $cref seq_property$$. 00042 The $icode%f%.optimize%$$ procedure reduces the number of operations, 00043 and thereby the time and the memory, required to 00044 compute function and derivative values. 00045 00046 $head f$$ 00047 The object $icode f$$ has prototype 00048 $icode% 00049 ADFun<%Base%> %f% 00050 %$$ 00051 00052 $head Improvements$$ 00053 You can see the reduction in number of variables in the operation sequence 00054 by calling the function $cref/f.size_var()/seq_property/size_var/$$ 00055 before and after the optimization procedure. 00056 Given that the optimization procedure takes time, 00057 it may be faster to skip this optimize procedure and just compute 00058 derivatives using the original operation sequence. 00059 00060 $subhead Testing$$ 00061 You can run the CppAD $cref/speed/speed_main/$$ tests and see 00062 the corresponding changes in number of variables and execution time; 00063 see $cref cppad_test$$. 00064 00065 $head Efficiency$$ 00066 The $code optimize$$ member function 00067 may greatly reduce the number of variables 00068 in the operation sequence; see $cref/size_var/seq_property/size_var/$$. 00069 If a $cref/zero order forward/ForwardZero/$$ calculation is done during 00070 the construction of $icode f$$, it will require more memory 00071 and time than required after the optimization procedure. 00072 In addition, it will need to be redone. 00073 For this reason, it is more efficient to use 00074 $codei% 00075 ADFun<%Base%> %f%; 00076 %f%.Dependent(%x%, %y%); 00077 %f%.optimize(); 00078 %$$ 00079 instead of 00080 $codei% 00081 ADFun<%Base%> %f%(%x%, %y%) 00082 %f%.optimize(); 00083 %$$ 00084 See the discussion about 00085 $cref/sequence constructors/FunConstruct/Sequence Constructor/$$. 00086 00087 $head Comparison Operators$$ 00088 Any comparison operators that are in the tape are removed by this operation. 00089 Hence the return value of $cref CompareChange$$ will always be zero 00090 for an optimized tape (even if $code NDEBUG$$ is not defined). 00091 00092 $head Checking Optimization$$ 00093 $index NDEBUG$$ 00094 If $cref/NDEBUG/Faq/Speed/NDEBUG/$$ is not defined, 00095 and $cref/f.size_taylor()/size_taylor/$$ is greater than zero, 00096 a $cref ForwardZero$$ calculation is done using the optimized version 00097 of $icode f$$ and the results are checked to see that they are 00098 the same as before. 00099 If they are not the same, the 00100 $cref ErrorHandler$$ is called with a known error message 00101 related to $icode%f%.optimize()%$$. 00102 00103 $head Example$$ 00104 $children% 00105 example/optimize.cpp 00106 %$$ 00107 The file 00108 $cref optimize.cpp$$ 00109 contains an example and test of this operation. 00110 It returns true if it succeeds and false otherwise. 00111 00112 $end 00113 ----------------------------------------------------------------------------- 00114 */ 00115 # include <stack> 00116 00117 CPPAD_BEGIN_NAMESPACE 00118 /*! 00119 \defgroup optimize_hpp optimize.hpp 00120 \{ 00121 \file optimize.hpp 00122 Routines for optimizing a tape 00123 */ 00124 00125 00126 /*! 00127 State for this variable set during reverse sweep. 00128 */ 00129 enum optimize_connection { 00130 /// There is no operation that connects this variable to the 00131 /// independent variables. 00132 not_connected , 00133 00134 /// There is one or more operations that connects this variable to the 00135 /// independent variables. 00136 yes_connected , 00137 00138 /// There is only one parrent operation that connects this variable to 00139 /// the independent variables and it is one of the following: 00140 /// AddvvOp, AddpvOp, SubpvOp, SubvpOp, or SubvvOp. 00141 sum_connected , 00142 00143 /// Satisfies sum_connected and in addition 00144 /// the parrent operation is one of the following: 00145 /// AddvvOp, AddpvOp, SubpvOp, SubvpOp, or SubvvOp. 00146 csum_connected 00147 00148 }; 00149 00150 00151 /*! 00152 Structure used by \c optimize to hold information about one variable. 00153 in the old operation seqeunce. 00154 */ 00155 struct optimize_old_variable { 00156 /// Operator for which this variable is the result, \c NumRes(op) > 0. 00157 /// Set by the reverse sweep at beginning of optimization. 00158 OpCode op; 00159 00160 /// Pointer to first argument (child) for this operator. 00161 /// Set by the reverse sweep at beginning of optimization. 00162 const addr_t* arg; 00163 00164 /// How is this variable connected to the independent variables 00165 optimize_connection connect; 00166 00167 /// Set during forward sweep to the index in the 00168 /// new operation sequence corresponding to this old varable. 00169 addr_t new_var; 00170 }; 00171 00172 /*! 00173 Structures used by \c optimize_record_csum 00174 to hold information about one variable. 00175 */ 00176 struct optimize_csum_variable { 00177 /// Operator for which this variable is the result, \c NumRes(op) > 0. 00178 OpCode op; 00179 00180 /// Pointer to first argument (child) for this operator. 00181 /// Set by the reverse sweep at beginning of optimization. 00182 const addr_t* arg; 00183 00184 /// Is this variable added to the summation 00185 /// (if not it is subtracted) 00186 bool add; 00187 }; 00188 00189 /*! 00190 Structure used to pass work space from \c optimize to \c optimize_record_csum 00191 (so that stacks do not start from zero size every time). 00192 */ 00193 struct optimize_csum_stacks { 00194 /// stack of operations in the cummulative summation 00195 std::stack<struct optimize_csum_variable> op_stack; 00196 /// stack of variables to be added 00197 std::stack<size_t > add_stack; 00198 /// stack of variables to be subtracted 00199 std::stack<size_t > sub_stack; 00200 }; 00201 00202 00203 /*! 00204 Documents arguments that are common to optimization helper functions 00205 (should not be called). 00206 00207 \param tape 00208 is a vector that maps a variable index, in the old operation sequence, 00209 to an <tt>optimize_old_variable</tt> information record. 00210 Note that the index for this vector must be greater than or equal zero and 00211 less than <tt>tape.size()</tt>. 00212 00213 \li <tt>tape[i].op</tt> 00214 is the operator in the old operation sequence 00215 corresponding to the old variable index \c i. 00216 Assertion: <tt>NumRes(tape[i].op) > 0</tt>. 00217 00218 \li <tt>tape[i].arg</tt> 00219 for <tt>j < NumArg( tape[i].op ), tape[i].arg[j]</tt> 00220 is the j-th the argument, in the old operation sequence, 00221 corresponding to the old variable index \c i. 00222 Assertion: <tt>tape[i].arg[j] < i</tt>. 00223 00224 \li <tt>tape[i].new_var</tt> 00225 Suppose 00226 <tt>i <= current, j < NumArg( tape[i].op ), and k = tape[i].arg[j]</tt>, 00227 and \c j corresponds to a varialbe for operator <tt>tape[i].op</tt>. 00228 It follows that <tt>tape[k].new_var</tt> 00229 has alread been set to the variable in the new operation sequence 00230 corresponding to the old variable index \c k. 00231 This means that the \c new_var value has been set 00232 for all the possible arguments that come before \a current. 00233 00234 \param current 00235 is the index in the old operation sequence for 00236 the variable corresponding to the result for the current operator. 00237 Assertions: 00238 <tt> 00239 current < tape.size(), 00240 NumRes( tape[current].op ) > 0. 00241 </tt> 00242 00243 \param npar 00244 is the number of paraemters corresponding to this operation sequence. 00245 00246 \param par 00247 is a vector of length \a npar containing the parameters 00248 for this operation sequence; i.e., 00249 given a parameter index \c i, the corresponding parameter value is 00250 <tt>par[i]</tt>. 00251 */ 00252 template <class Base> 00253 void optimize_prototype( 00254 const CppAD::vector<struct optimize_old_variable>& tape , 00255 size_t current , 00256 size_t npar , 00257 const Base* par ) 00258 { CPPAD_ASSERT_UNKNOWN(false); } 00259 00260 /*! 00261 Check a unary operator for a complete match with a previous operator. 00262 00263 A complete match means that the result of the previous operator 00264 can be used inplace of the result for current operator. 00265 00266 \copydetails optimize_prototype 00267 00268 \param hash_table_var 00269 is a vector with size CPPAD_HASH_TABLE_SIZE 00270 that maps a hash code to the corresponding 00271 variable index in the old operation sequence. 00272 All the values in this table must be less than \a current. 00273 00274 \param code 00275 The input value of code does not matter. 00276 The output value of code is the hash code corresponding to 00277 this operation in the new operation sequence. 00278 00279 \return 00280 If the return value is zero, 00281 no match was found. 00282 If the return value is greater than zero, 00283 it is the index of a new variable that can be used to replace the 00284 old variable. 00285 00286 \par Restrictions: 00287 NumArg( tape[current].op ) == 1 00288 */ 00289 template <class Base> 00290 size_t optimize_unary_match( 00291 const CppAD::vector<struct optimize_old_variable>& tape , 00292 size_t current , 00293 size_t npar , 00294 const Base* par , 00295 const CppAD::vector<size_t>& hash_table_var , 00296 unsigned short& code ) 00297 { const addr_t* arg = tape[current].arg; 00298 OpCode op = tape[current].op; 00299 addr_t new_arg[1]; 00300 00301 CPPAD_ASSERT_UNKNOWN( NumArg(op) == 1 ); 00302 CPPAD_ASSERT_UNKNOWN( NumRes(op) > 0 ); 00303 CPPAD_ASSERT_UNKNOWN( size_t(arg[0]) < current ); 00304 new_arg[0] = tape[arg[0]].new_var; 00305 CPPAD_ASSERT_UNKNOWN( size_t(new_arg[0]) < current ); 00306 code = hash_code( 00307 op , 00308 new_arg , 00309 npar , 00310 par 00311 ); 00312 size_t i = hash_table_var[code]; 00313 CPPAD_ASSERT_UNKNOWN( i < current ); 00314 if( op == tape[i].op ) 00315 { size_t k = tape[i].arg[0]; 00316 CPPAD_ASSERT_UNKNOWN( k < i ); 00317 if (new_arg[0] == tape[k].new_var ) 00318 return tape[i].new_var; 00319 } 00320 return 0; 00321 } 00322 00323 /*! 00324 Check a binary operator for a complete match with a previous operator, 00325 00326 \copydetails optimize_unary_match 00327 00328 \par Restrictions: 00329 The binary operator must be an addition, subtraction, multiplication, division 00330 or power operator. 00331 */ 00332 template <class Base> 00333 inline size_t optimize_binary_match( 00334 const CppAD::vector<struct optimize_old_variable>& tape , 00335 size_t current , 00336 size_t npar , 00337 const Base* par , 00338 const CppAD::vector<size_t>& hash_table_var , 00339 unsigned short& code ) 00340 { OpCode op = tape[current].op; 00341 const addr_t* arg = tape[current].arg; 00342 addr_t new_arg[2]; 00343 bool parameter[2]; 00344 00345 // initialize return value 00346 size_t match_var = 0; 00347 00348 CPPAD_ASSERT_UNKNOWN( NumArg(op) == 2 ); 00349 CPPAD_ASSERT_UNKNOWN( NumRes(op) > 0 ); 00350 switch(op) 00351 { // parameter op variable ---------------------------------- 00352 case AddpvOp: 00353 case MulpvOp: 00354 case DivpvOp: 00355 case PowpvOp: 00356 case SubpvOp: 00357 // arg[0] 00358 parameter[0] = true; 00359 new_arg[0] = arg[0]; 00360 CPPAD_ASSERT_UNKNOWN( size_t(arg[0]) < npar ); 00361 // arg[1] 00362 parameter[1] = false; 00363 new_arg[1] = tape[arg[1]].new_var; 00364 CPPAD_ASSERT_UNKNOWN( size_t(arg[1]) < current ); 00365 break; 00366 00367 // variable op parameter ----------------------------------- 00368 case DivvpOp: 00369 case PowvpOp: 00370 case SubvpOp: 00371 // arg[0] 00372 parameter[0] = false; 00373 new_arg[0] = tape[arg[0]].new_var; 00374 CPPAD_ASSERT_UNKNOWN( size_t(arg[0]) < current ); 00375 // arg[1] 00376 parameter[1] = true; 00377 new_arg[1] = arg[1]; 00378 CPPAD_ASSERT_UNKNOWN( size_t(arg[1]) < npar ); 00379 break; 00380 00381 // variable op variable ----------------------------------- 00382 case AddvvOp: 00383 case MulvvOp: 00384 case DivvvOp: 00385 case PowvvOp: 00386 case SubvvOp: 00387 // arg[0] 00388 parameter[0] = false; 00389 new_arg[0] = tape[arg[0]].new_var; 00390 CPPAD_ASSERT_UNKNOWN( size_t(arg[0]) < current ); 00391 // arg[1] 00392 parameter[1] = false; 00393 new_arg[1] = tape[arg[1]].new_var; 00394 CPPAD_ASSERT_UNKNOWN( size_t(arg[1]) < current ); 00395 break; 00396 00397 // must be one of the cases above 00398 default: 00399 CPPAD_ASSERT_UNKNOWN(false); 00400 } 00401 code = hash_code( 00402 op , 00403 new_arg , 00404 npar , 00405 par 00406 ); 00407 size_t i = hash_table_var[code]; 00408 CPPAD_ASSERT_UNKNOWN( i < current ); 00409 if( op == tape[i].op ) 00410 { bool match = true; 00411 size_t j; 00412 for(j = 0; j < 2; j++) 00413 { size_t k = tape[i].arg[j]; 00414 if( parameter[j] ) 00415 { CPPAD_ASSERT_UNKNOWN( k < npar ); 00416 match &= IdenticalEqualPar( 00417 par[ arg[j] ], par[k] 00418 ); 00419 } 00420 else 00421 { CPPAD_ASSERT_UNKNOWN( k < i ); 00422 match &= (new_arg[j] == tape[k].new_var); 00423 } 00424 } 00425 if( match ) 00426 match_var = tape[i].new_var; 00427 } 00428 if( (match_var > 0) | ( (op != AddvvOp) & (op != MulvvOp ) ) ) 00429 return match_var; 00430 00431 // check for match with argument order switched ---------------------- 00432 CPPAD_ASSERT_UNKNOWN( op == AddvvOp || op == MulvvOp ); 00433 i = new_arg[0]; 00434 new_arg[0] = new_arg[1]; 00435 new_arg[1] = i; 00436 unsigned short code_switch = hash_code( 00437 op , 00438 new_arg , 00439 npar , 00440 par 00441 ); 00442 i = hash_table_var[code_switch]; 00443 CPPAD_ASSERT_UNKNOWN( i < current ); 00444 if( op == tape[i].op ) 00445 { bool match = true; 00446 size_t j; 00447 for(j = 0; j < 2; j++) 00448 { size_t k = tape[i].arg[j]; 00449 CPPAD_ASSERT_UNKNOWN( k < i ); 00450 match &= (new_arg[j] == tape[k].new_var); 00451 } 00452 if( match ) 00453 match_var = tape[i].new_var; 00454 } 00455 return match_var; 00456 } 00457 00458 /*! 00459 Record an operation of the form (parameter op variable). 00460 00461 \copydetails optimize_prototype 00462 00463 \param rec 00464 is the object that will record the operations. 00465 00466 \param op 00467 is the operator that we are recording which must be one of the following: 00468 AddpvOp, DivpvOp, MulpvOp, PowvpOp, SubpvOp. 00469 00470 \param arg 00471 is the vector of arguments for this operator. 00472 00473 \return 00474 the result value is the index corresponding to the current 00475 operation in the new operation sequence. 00476 */ 00477 template <class Base> 00478 size_t optimize_record_pv( 00479 const CppAD::vector<struct optimize_old_variable>& tape , 00480 size_t current , 00481 size_t npar , 00482 const Base* par , 00483 recorder<Base>* rec , 00484 OpCode op , 00485 const addr_t* arg ) 00486 { 00487 # ifndef NDEBUG 00488 switch(op) 00489 { case AddpvOp: 00490 case DivpvOp: 00491 case MulpvOp: 00492 case PowpvOp: 00493 case SubpvOp: 00494 break; 00495 00496 default: 00497 CPPAD_ASSERT_UNKNOWN(false); 00498 } 00499 # endif 00500 CPPAD_ASSERT_UNKNOWN( size_t(arg[0]) < npar ); 00501 CPPAD_ASSERT_UNKNOWN( size_t(arg[1]) < current ); 00502 addr_t new_arg[2]; 00503 new_arg[0] = rec->PutPar( par[arg[0]] ); 00504 new_arg[1] = tape[ arg[1] ].new_var; 00505 rec->PutArg( new_arg[0], new_arg[1] ); 00506 size_t i = rec->PutOp(op); 00507 CPPAD_ASSERT_UNKNOWN( size_t(new_arg[0]) < i ); 00508 return i; 00509 } 00510 00511 00512 /*! 00513 Record an operation of the form (variable op parameter). 00514 00515 \copydetails optimize_prototype 00516 00517 \param rec 00518 is the object that will record the operations. 00519 00520 \param op 00521 is the operator that we are recording which must be one of the following: 00522 DivvpOp, PowvpOp, SubvpOp. 00523 00524 \param arg 00525 is the vector of arguments for this operator. 00526 00527 \return 00528 the result value is the index corresponding to the current 00529 operation in the new operation sequence. 00530 */ 00531 template <class Base> 00532 size_t optimize_record_vp( 00533 const CppAD::vector<struct optimize_old_variable>& tape , 00534 size_t current , 00535 size_t npar , 00536 const Base* par , 00537 recorder<Base>* rec , 00538 OpCode op , 00539 const addr_t* arg ) 00540 { 00541 # ifndef NDEBUG 00542 switch(op) 00543 { case DivvpOp: 00544 case PowvpOp: 00545 case SubvpOp: 00546 break; 00547 00548 default: 00549 CPPAD_ASSERT_UNKNOWN(false); 00550 } 00551 # endif 00552 CPPAD_ASSERT_UNKNOWN( size_t(arg[0]) < current ); 00553 CPPAD_ASSERT_UNKNOWN( size_t(arg[1]) < npar ); 00554 addr_t new_arg[2]; 00555 new_arg[0] = tape[ arg[0] ].new_var; 00556 new_arg[1] = rec->PutPar( par[arg[1]] ); 00557 rec->PutArg( new_arg[0], new_arg[1] ); 00558 size_t i = rec->PutOp(op); 00559 CPPAD_ASSERT_UNKNOWN( size_t(new_arg[0]) < i ); 00560 return i; 00561 } 00562 00563 /*! 00564 Record an operation of the form (variable op variable). 00565 00566 \copydetails optimize_prototype 00567 00568 \param rec 00569 is the object that will record the operations. 00570 00571 \param op 00572 is the operator that we are recording which must be one of the following: 00573 AddvvOp, DivvvOp, MulvvOp, PowvpOp, SubvvOp. 00574 00575 \param arg 00576 is the vector of arguments for this operator. 00577 00578 \return 00579 the result value is the index corresponding to the current 00580 operation in the new operation sequence. 00581 */ 00582 template <class Base> 00583 size_t optimize_record_vv( 00584 const CppAD::vector<struct optimize_old_variable>& tape , 00585 size_t current , 00586 size_t npar , 00587 const Base* par , 00588 recorder<Base>* rec , 00589 OpCode op , 00590 const addr_t* arg ) 00591 { 00592 # ifndef NDEBUG 00593 switch(op) 00594 { case AddvvOp: 00595 case DivvvOp: 00596 case MulvvOp: 00597 case PowvvOp: 00598 case SubvvOp: 00599 break; 00600 00601 default: 00602 CPPAD_ASSERT_UNKNOWN(false); 00603 } 00604 # endif 00605 CPPAD_ASSERT_UNKNOWN( size_t(arg[0]) < current ); 00606 CPPAD_ASSERT_UNKNOWN( size_t(arg[1]) < current ); 00607 addr_t new_arg[2]; 00608 new_arg[0] = tape[ arg[0] ].new_var; 00609 new_arg[1] = tape[ arg[1] ].new_var; 00610 rec->PutArg( new_arg[0], new_arg[1] ); 00611 size_t i = rec->PutOp(op); 00612 CPPAD_ASSERT_UNKNOWN( size_t(new_arg[0]) < i ); 00613 return i; 00614 } 00615 00616 // ========================================================================== 00617 00618 /*! 00619 Recording a cummulative cummulative summation starting at its highest parrent. 00620 00621 \copydetails optimize_prototype 00622 00623 \param rec 00624 is the object that will record the operations. 00625 00626 \param work 00627 Is used for computaiton. On input and output, 00628 <tt>work.op_stack.empty()</tt>, 00629 <tt>work.add_stack.empty()</tt>, and 00630 <tt>work.sub_stack.empty()</tt>, 00631 are all true true. 00632 These stacks are passed in so that elements can be allocated once 00633 and then the elements can be reused with calls to \c optimize_record_csum. 00634 00635 \par Exception 00636 <tt>tape[i].new_var</tt> 00637 is not yet defined for any node \c i that is \c csum_connected 00638 to the \a current node. 00639 For example; suppose that index \c j corresponds to a variable 00640 in the current operator, 00641 <tt>i = tape[current].arg[j]</tt>, 00642 and 00643 <tt>tape[arg[j]].connect == csum_connected</tt>. 00644 It then follows that 00645 <tt>tape[i].new_var == tape.size()</tt>. 00646 00647 \par Restriction: 00648 \li <tt>tape[current].op</tt> 00649 must be one of <tt>AddpvOp, AddvvOp, SubpvOp, SubvpOp, SubvvOp</tt>. 00650 00651 \li <tt>tape[current].connect</tt> must be \c yes_connected. 00652 */ 00653 00654 00655 template <class Base> 00656 size_t optimize_record_csum( 00657 const CppAD::vector<struct optimize_old_variable>& tape , 00658 size_t current , 00659 size_t npar , 00660 const Base* par , 00661 recorder<Base>* rec , 00662 optimize_csum_stacks& work ) 00663 { 00664 00665 CPPAD_ASSERT_UNKNOWN( work.op_stack.empty() ); 00666 CPPAD_ASSERT_UNKNOWN( work.add_stack.empty() ); 00667 CPPAD_ASSERT_UNKNOWN( work.sub_stack.empty() ); 00668 CPPAD_ASSERT_UNKNOWN( tape[current].connect == yes_connected ); 00669 00670 size_t i; 00671 OpCode op; 00672 const addr_t* arg; 00673 bool add; 00674 struct optimize_csum_variable var; 00675 00676 var.op = tape[current].op; 00677 var.arg = tape[current].arg; 00678 var.add = true; 00679 work.op_stack.push( var ); 00680 Base sum_par(0); 00681 while( ! work.op_stack.empty() ) 00682 { var = work.op_stack.top(); 00683 work.op_stack.pop(); 00684 op = var.op; 00685 arg = var.arg; 00686 add = var.add; 00687 // process first argument to this operator 00688 switch(op) 00689 { case AddpvOp: 00690 case SubpvOp: 00691 CPPAD_ASSERT_UNKNOWN( size_t(arg[0]) < npar ); 00692 if( add ) 00693 sum_par += par[arg[0]]; 00694 else sum_par -= par[arg[0]]; 00695 break; 00696 00697 case AddvvOp: 00698 case SubvpOp: 00699 case SubvvOp: 00700 if( tape[arg[0]].connect == csum_connected ) 00701 { CPPAD_ASSERT_UNKNOWN( 00702 size_t(tape[arg[0]].new_var) == tape.size() 00703 ); 00704 var.op = tape[arg[0]].op; 00705 var.arg = tape[arg[0]].arg; 00706 var.add = add; 00707 work.op_stack.push( var ); 00708 } 00709 else if( add ) 00710 work.add_stack.push(arg[0]); 00711 else work.sub_stack.push(arg[0]); 00712 break; 00713 00714 default: 00715 CPPAD_ASSERT_UNKNOWN(false); 00716 } 00717 // process second argument to this operator 00718 switch(op) 00719 { 00720 case SubvpOp: 00721 CPPAD_ASSERT_UNKNOWN( size_t(arg[1]) < npar ); 00722 if( add ) 00723 sum_par -= par[arg[1]]; 00724 else sum_par += par[arg[1]]; 00725 break; 00726 00727 case SubvvOp: 00728 case SubpvOp: 00729 add = ! add; 00730 00731 case AddvvOp: 00732 case AddpvOp: 00733 if( tape[arg[1]].connect == csum_connected ) 00734 { CPPAD_ASSERT_UNKNOWN( 00735 size_t(tape[arg[1]].new_var) == tape.size() 00736 ); 00737 var.op = tape[arg[1]].op; 00738 var.arg = tape[arg[1]].arg; 00739 var.add = add; 00740 work.op_stack.push( var ); 00741 } 00742 else if( add ) 00743 work.add_stack.push(arg[1]); 00744 else work.sub_stack.push(arg[1]); 00745 break; 00746 00747 default: 00748 CPPAD_ASSERT_UNKNOWN(false); 00749 } 00750 } 00751 // number of variables in this cummulative sum operator 00752 size_t n_add = work.add_stack.size(); 00753 size_t n_sub = work.sub_stack.size(); 00754 size_t old_arg, new_arg; 00755 rec->PutArg(n_add); // arg[0] 00756 rec->PutArg(n_sub); // arg[1] 00757 new_arg = rec->PutPar( sum_par ); 00758 rec->PutArg(new_arg); // arg[2] 00759 for(i = 0; i < n_add; i++) 00760 { CPPAD_ASSERT_UNKNOWN( ! work.add_stack.empty() ); 00761 old_arg = work.add_stack.top(); 00762 new_arg = tape[old_arg].new_var; 00763 CPPAD_ASSERT_UNKNOWN( new_arg < tape.size() ); 00764 rec->PutArg(new_arg); // arg[3+i] 00765 work.add_stack.pop(); 00766 } 00767 for(i = 0; i < n_sub; i++) 00768 { CPPAD_ASSERT_UNKNOWN( ! work.sub_stack.empty() ); 00769 old_arg = work.sub_stack.top(); 00770 new_arg = tape[old_arg].new_var; 00771 CPPAD_ASSERT_UNKNOWN( new_arg < tape.size() ); 00772 rec->PutArg(new_arg); // arg[3 + arg[0] + i] 00773 work.sub_stack.pop(); 00774 } 00775 rec->PutArg(n_add + n_sub); // arg[3 + arg[0] + arg[1]] 00776 i = rec->PutOp(CSumOp); 00777 CPPAD_ASSERT_UNKNOWN(new_arg < tape.size()); 00778 00779 return i; 00780 } 00781 // ========================================================================== 00782 /*! 00783 Convert a player object to an optimized recorder object 00784 00785 \tparam Base 00786 base type for the operator; i.e., this operation was recorded 00787 using AD< \a Base > and computations by this routine are done using type 00788 \a Base. 00789 00790 \param n 00791 is the number of independent variables on the tape. 00792 00793 \param dep_taddr 00794 On input this vector contains the indices for each of the dependent 00795 variable values in the operation sequence corresponding to \a play. 00796 Upon return it contains the indices for the same variables but in 00797 the operation sequence corresponding to \a rec. 00798 00799 \param play 00800 This is the operation sequence that we are optimizing. 00801 It is essentially const, except for play back state which 00802 changes while it plays back the operation seqeunce. 00803 00804 \param rec 00805 The input contents of this recording does not matter. 00806 Upon return, it contains an optimized verison of the 00807 operation sequence corresponding to \a play. 00808 */ 00809 00810 template <class Base> 00811 void optimize( 00812 size_t n , 00813 CppAD::vector<size_t>& dep_taddr , 00814 player<Base>* play , 00815 recorder<Base>* rec ) 00816 { 00817 // temporary indices 00818 size_t i, j, k; 00819 00820 // temporary variables 00821 OpCode op; // current operator 00822 const addr_t* arg; // operator arguments 00823 size_t i_var; // index of first result for current operator 00824 00825 // range and domain dimensions for F 00826 size_t m = dep_taddr.size(); 00827 00828 // number of variables in the player 00829 const size_t num_var = play->num_rec_var(); 00830 00831 # ifndef NDEBUG 00832 // number of paraemters in the player 00833 const size_t num_par = play->num_rec_par(); 00834 # endif 00835 00836 // number of VecAD indices 00837 size_t num_vecad_ind = play->num_rec_vecad_ind(); 00838 00839 // number of VecAD vectors 00840 size_t num_vecad_vec = play->num_rec_vecad_vec(); 00841 00842 // ------------------------------------------------------------- 00843 // data structure that maps variable index in original operation 00844 // sequence to corresponding operator information 00845 CppAD::vector<struct optimize_old_variable> tape(num_var); 00846 // ------------------------------------------------------------- 00847 // Determine how each variable is connected to the dependent variables 00848 00849 // initialize all variables has having no connections 00850 for(i = 0; i < num_var; i++) 00851 tape[i].connect = not_connected; 00852 00853 for(j = 0; j < m; j++) 00854 { // mark dependent variables as having one or more connections 00855 tape[ dep_taddr[j] ].connect = yes_connected; 00856 } 00857 00858 // vecad_connect contains a value for each VecAD object. 00859 // vecad maps a VecAD index (which corresponds to the beginning of the 00860 // VecAD object) to the vecad_connect falg for the VecAD object. 00861 CppAD::vector<optimize_connection> vecad_connect(num_vecad_vec); 00862 CppAD::vector<size_t> vecad(num_vecad_ind); 00863 j = 0; 00864 for(i = 0; i < num_vecad_vec; i++) 00865 { vecad_connect[i] = not_connected; 00866 // length of this VecAD 00867 size_t length = play->GetVecInd(j); 00868 // set to proper index for this VecAD 00869 vecad[j] = i; 00870 for(k = 1; k <= length; k++) 00871 vecad[j+k] = num_vecad_vec; // invalid index 00872 // start of next VecAD 00873 j += length + 1; 00874 } 00875 CPPAD_ASSERT_UNKNOWN( j == num_vecad_ind ); 00876 00877 // work space used by UserOp. 00878 typedef std::set<size_t> size_set; 00879 size_t user_q = 0; // maximum set element plus one 00880 vector< size_set > user_r; // sparsity pattern for the argument x 00881 vector< size_set > user_s; // sparisty pattern for the result y 00882 size_t user_index = 0; // indentifier for this user_atomic operation 00883 size_t user_id = 0; // user identifier for this call to operator 00884 size_t user_i = 0; // index in result vector 00885 size_t user_j = 0; // index in argument vector 00886 size_t user_m = 0; // size of result vector 00887 size_t user_n = 0; // size of arugment vector 00888 // next expected operator in a UserOp sequence 00889 enum { user_start, user_arg, user_ret, user_end } user_state; 00890 std::stack<bool> user_keep; 00891 00892 // Initialize a reverse mode sweep through the operation sequence 00893 size_t i_op; 00894 play->start_reverse(op, arg, i_op, i_var); 00895 CPPAD_ASSERT_UNKNOWN( op == EndOp ); 00896 size_t mask; 00897 user_state = user_end; 00898 while(op != BeginOp) 00899 { // next op 00900 play->next_reverse(op, arg, i_op, i_var); 00901 // This if is not necessary becasue last assignment 00902 // with this value of i_var will have NumRes(op) > 0 00903 if( NumRes(op) > 0 ) 00904 { tape[i_var].op = op; 00905 tape[i_var].arg = arg; 00906 } 00907 # ifndef NDEBUG 00908 if( i_op <= n ) 00909 { CPPAD_ASSERT_UNKNOWN((op == InvOp) | (op == BeginOp)); 00910 } 00911 else CPPAD_ASSERT_UNKNOWN((op != InvOp) & (op != BeginOp)); 00912 # endif 00913 switch( op ) 00914 { 00915 // Unary operator where operand is arg[0] 00916 case AbsOp: 00917 case AcosOp: 00918 case AsinOp: 00919 case AtanOp: 00920 case CosOp: 00921 case CoshOp: 00922 case DivvpOp: 00923 case ExpOp: 00924 case LogOp: 00925 case PowvpOp: 00926 case SinOp: 00927 case SinhOp: 00928 case SqrtOp: 00929 case TanOp: 00930 case TanhOp: 00931 if( tape[i_var].connect != not_connected ) 00932 tape[arg[0]].connect = yes_connected; 00933 break; // -------------------------------------------- 00934 00935 // Unary operator where operand is arg[1] 00936 case DisOp: 00937 case DivpvOp: 00938 case MulpvOp: 00939 case PowpvOp: 00940 if( tape[i_var].connect != not_connected ) 00941 tape[arg[1]].connect = yes_connected; 00942 break; // -------------------------------------------- 00943 00944 // Special case for SubvpOp 00945 case SubvpOp: 00946 if( tape[i_var].connect != not_connected ) 00947 { 00948 if( tape[arg[0]].connect == not_connected ) 00949 tape[arg[0]].connect = sum_connected; 00950 else 00951 tape[arg[0]].connect = yes_connected; 00952 if( tape[i_var].connect == sum_connected ) 00953 tape[i_var].connect = csum_connected; 00954 } 00955 break; // -------------------------------------------- 00956 00957 // Special case for AddpvOp and SubpvOp 00958 case AddpvOp: 00959 case SubpvOp: 00960 if( tape[i_var].connect != not_connected ) 00961 { 00962 if( tape[arg[1]].connect == not_connected ) 00963 tape[arg[1]].connect = sum_connected; 00964 else 00965 tape[arg[1]].connect = yes_connected; 00966 if( tape[i_var].connect == sum_connected ) 00967 tape[i_var].connect = csum_connected; 00968 } 00969 break; // -------------------------------------------- 00970 00971 00972 // Special case for AddvvOp and SubvvOp 00973 case AddvvOp: 00974 case SubvvOp: 00975 if( tape[i_var].connect != not_connected ) 00976 { 00977 if( tape[arg[0]].connect == not_connected ) 00978 tape[arg[0]].connect = sum_connected; 00979 else 00980 tape[arg[0]].connect = yes_connected; 00981 00982 if( tape[arg[1]].connect == not_connected ) 00983 tape[arg[1]].connect = sum_connected; 00984 else 00985 tape[arg[1]].connect = yes_connected; 00986 if( tape[i_var].connect == sum_connected ) 00987 tape[i_var].connect = csum_connected; 00988 } 00989 break; // -------------------------------------------- 00990 00991 // Other binary operators 00992 // where operands are arg[0], arg[1] 00993 case DivvvOp: 00994 case MulvvOp: 00995 case PowvvOp: 00996 if( tape[i_var].connect != not_connected ) 00997 { 00998 tape[arg[0]].connect = yes_connected; 00999 tape[arg[1]].connect = yes_connected; 01000 } 01001 break; // -------------------------------------------- 01002 01003 // Conditional expression operators 01004 case CExpOp: 01005 CPPAD_ASSERT_UNKNOWN( NumArg(CExpOp) == 6 ); 01006 if( tape[i_var].connect != not_connected ) 01007 { 01008 mask = 1; 01009 for(i = 2; i < 6; i++) 01010 { if( arg[1] & mask ) 01011 { CPPAD_ASSERT_UNKNOWN( size_t(arg[i]) < i_var ); 01012 tape[arg[i]].connect = yes_connected; 01013 } 01014 mask = mask << 1; 01015 } 01016 } 01017 break; // -------------------------------------------- 01018 01019 // Operations where there is noting to do 01020 case BeginOp: 01021 case ComOp: 01022 case EndOp: 01023 case InvOp: 01024 case ParOp: 01025 case PriOp: 01026 break; // -------------------------------------------- 01027 01028 // Load using a parameter index 01029 case LdpOp: 01030 if( tape[i_var].connect != not_connected ) 01031 { 01032 i = vecad[ arg[0] - 1 ]; 01033 vecad_connect[i] = yes_connected; 01034 } 01035 break; // -------------------------------------------- 01036 01037 // Load using a variable index 01038 case LdvOp: 01039 if( tape[i_var].connect != not_connected ) 01040 { 01041 i = vecad[ arg[0] - 1 ]; 01042 vecad_connect[i] = yes_connected; 01043 tape[arg[1]].connect = yes_connected; 01044 } 01045 break; // -------------------------------------------- 01046 01047 // Store a variable using a parameter index 01048 case StpvOp: 01049 i = vecad[ arg[0] - 1 ]; 01050 if( vecad_connect[i] != not_connected ) 01051 tape[arg[2]].connect = yes_connected; 01052 break; // -------------------------------------------- 01053 01054 // Store a variable using a variable index 01055 case StvvOp: 01056 i = vecad[ arg[0] - 1 ]; 01057 if( vecad_connect[i] ) 01058 { tape[arg[1]].connect = yes_connected; 01059 tape[arg[2]].connect = yes_connected; 01060 } 01061 break; 01062 // ============================================================ 01063 case UserOp: 01064 // start or end atomic operation sequence 01065 CPPAD_ASSERT_UNKNOWN( NumRes( UserOp ) == 0 ); 01066 CPPAD_ASSERT_UNKNOWN( NumArg( UserOp ) == 4 ); 01067 if( user_state == user_end ) 01068 { user_index = arg[0]; 01069 user_id = arg[1]; 01070 user_n = arg[2]; 01071 user_m = arg[3]; 01072 user_q = 1; 01073 if(user_r.size() < user_n ) 01074 user_r.resize(user_n); 01075 if(user_s.size() < user_m ) 01076 user_s.resize(user_m); 01077 user_j = user_n; 01078 user_i = user_m; 01079 user_state = user_ret; 01080 user_keep.push(false); 01081 } 01082 else 01083 { CPPAD_ASSERT_UNKNOWN( user_state == user_start ); 01084 CPPAD_ASSERT_UNKNOWN( user_index == size_t(arg[0]) ); 01085 CPPAD_ASSERT_UNKNOWN( user_id == size_t(arg[1]) ); 01086 CPPAD_ASSERT_UNKNOWN( user_n == size_t(arg[2]) ); 01087 CPPAD_ASSERT_UNKNOWN( user_m == size_t(arg[3]) ); 01088 user_state = user_end; 01089 } 01090 break; 01091 01092 case UsrapOp: 01093 // parameter argument in an atomic operation sequence 01094 CPPAD_ASSERT_UNKNOWN( user_state == user_arg ); 01095 CPPAD_ASSERT_UNKNOWN( 0 < user_j && user_j <= user_n ); 01096 CPPAD_ASSERT_UNKNOWN( NumArg(op) == 1 ); 01097 CPPAD_ASSERT_UNKNOWN( size_t(arg[0]) < num_par ); 01098 --user_j; 01099 if( user_j == 0 ) 01100 user_state = user_start; 01101 break; 01102 01103 case UsravOp: 01104 // variable argument in an atomic operation sequence 01105 CPPAD_ASSERT_UNKNOWN( user_state == user_arg ); 01106 CPPAD_ASSERT_UNKNOWN( 0 < user_j && user_j <= user_n ); 01107 CPPAD_ASSERT_UNKNOWN( NumArg(op) == 1 ); 01108 CPPAD_ASSERT_UNKNOWN( size_t(arg[0]) <= i_var ); 01109 CPPAD_ASSERT_UNKNOWN( 0 < arg[0] ); 01110 --user_j; 01111 if( ! user_r[user_j].empty() ) 01112 { tape[arg[0]].connect = yes_connected; 01113 user_keep.top() = true; 01114 } 01115 if( user_j == 0 ) 01116 user_state = user_start; 01117 break; 01118 01119 case UsrrpOp: 01120 // parameter result in an atomic operation sequence 01121 CPPAD_ASSERT_UNKNOWN( user_state == user_ret ); 01122 CPPAD_ASSERT_UNKNOWN( 0 < user_i && user_i <= user_m ); 01123 CPPAD_ASSERT_UNKNOWN( NumArg(op) == 1 ); 01124 CPPAD_ASSERT_UNKNOWN( size_t(arg[0]) < num_par ); 01125 --user_i; 01126 user_s[user_i].clear(); 01127 if( user_i == 0 ) 01128 { // call users function for this operation 01129 user_atomic<Base>::rev_jac_sparse(user_index, user_id, 01130 user_n, user_m, user_q, user_r, user_s 01131 ); 01132 user_state = user_arg; 01133 } 01134 break; 01135 01136 case UsrrvOp: 01137 // variable result in an atomic operation sequence 01138 CPPAD_ASSERT_UNKNOWN( user_state == user_ret ); 01139 CPPAD_ASSERT_UNKNOWN( 0 < user_i && user_i <= user_m ); 01140 --user_i; 01141 user_s[user_i].clear(); 01142 if( tape[i_var].connect != not_connected ) 01143 user_s[user_i].insert(0); 01144 if( user_i == 0 ) 01145 { // call users function for this operation 01146 user_atomic<Base>::rev_jac_sparse(user_index, user_id, 01147 user_n, user_m, user_q, user_r, user_s 01148 ); 01149 user_state = user_arg; 01150 } 01151 break; 01152 // ============================================================ 01153 01154 // all cases should be handled above 01155 default: 01156 CPPAD_ASSERT_UNKNOWN(0); 01157 } 01158 } 01159 // values corresponding to BeginOp 01160 CPPAD_ASSERT_UNKNOWN( i_op == 0 && i_var == 0 && op == BeginOp ); 01161 tape[i_var].op = op; 01162 // ------------------------------------------------------------- 01163 01164 // Erase all information in the recording 01165 rec->free(); 01166 01167 // Initilaize table mapping hash code to variable index in tape 01168 // as pointing to the BeginOp at the beginning of the tape 01169 CppAD::vector<size_t> hash_table_var(CPPAD_HASH_TABLE_SIZE); 01170 for(i = 0; i < CPPAD_HASH_TABLE_SIZE; i++) 01171 hash_table_var[i] = 0; 01172 CPPAD_ASSERT_UNKNOWN( tape[0].op == BeginOp ); 01173 01174 // initialize mapping from old variable index to new variable index 01175 for(i = 0; i < num_var; i++) 01176 tape[i].new_var = num_var; // invalid index 01177 01178 01179 // initialize mapping from old VecAD index to new VecAD index 01180 CppAD::vector<size_t> new_vecad_ind(num_vecad_ind); 01181 for(i = 0; i < num_vecad_ind; i++) 01182 new_vecad_ind[i] = num_vecad_ind; // invalid index 01183 01184 j = 0; // index into the old set of indices 01185 for(i = 0; i < num_vecad_vec; i++) 01186 { // length of this VecAD 01187 size_t length = play->GetVecInd(j); 01188 if( vecad_connect[i] != not_connected ) 01189 { // Put this VecAD vector in new recording 01190 CPPAD_ASSERT_UNKNOWN(length < num_vecad_ind); 01191 new_vecad_ind[j] = rec->PutVecInd(length); 01192 for(k = 1; k <= length; k++) new_vecad_ind[j+k] = 01193 rec->PutVecInd( 01194 rec->PutPar( 01195 play->GetPar( 01196 play->GetVecInd(j+k) 01197 ) ) ); 01198 } 01199 // start of next VecAD 01200 j += length + 1; 01201 } 01202 CPPAD_ASSERT_UNKNOWN( j == num_vecad_ind ); 01203 01204 // start playing the operations in the forward direction 01205 play->start_forward(op, arg, i_op, i_var); 01206 01207 // playing forward skips BeginOp at the beginning, but not EndOp at 01208 // the end. Put BeginOp at beginning of recording 01209 CPPAD_ASSERT_UNKNOWN( op == BeginOp ); 01210 CPPAD_ASSERT_NARG_NRES(BeginOp, 0, 1); 01211 tape[i_var].new_var = rec->PutOp(BeginOp); 01212 01213 // temporary buffer for new argument values 01214 addr_t new_arg[6]; 01215 01216 // temporary work space used by optimize_record_csum 01217 // (decalared here to avoid realloaction of memory) 01218 optimize_csum_stacks csum_work; 01219 01220 user_state = user_start; 01221 while(op != EndOp) 01222 { // next op 01223 play->next_forward(op, arg, i_op, i_var); 01224 CPPAD_ASSERT_UNKNOWN( (i_op > n) | (op == InvOp) ); 01225 CPPAD_ASSERT_UNKNOWN( (i_op <= n) | (op != InvOp) ); 01226 01227 // determine if we should keep this operation in the new 01228 // operation sequence 01229 bool keep; 01230 switch( op ) 01231 { case ComOp: 01232 case PriOp: 01233 keep = false; 01234 break; 01235 01236 case InvOp: 01237 case EndOp: 01238 keep = true; 01239 break; 01240 01241 case StppOp: 01242 case StvpOp: 01243 case StpvOp: 01244 case StvvOp: 01245 CPPAD_ASSERT_UNKNOWN( NumRes(op) == 0 ); 01246 i = vecad[ arg[0] - 1 ]; 01247 keep = vecad_connect[i] != not_connected; 01248 break; 01249 01250 case AddpvOp: 01251 case AddvvOp: 01252 case SubpvOp: 01253 case SubvpOp: 01254 case SubvvOp: 01255 keep = tape[i_var].connect != not_connected; 01256 keep &= tape[i_var].connect != csum_connected; 01257 break; 01258 01259 case UserOp: 01260 case UsrapOp: 01261 case UsravOp: 01262 case UsrrpOp: 01263 case UsrrvOp: 01264 keep = user_keep.top(); 01265 break; 01266 01267 default: 01268 keep = tape[i_var].connect != not_connected; 01269 break; 01270 } 01271 01272 size_t match_var = 0; 01273 unsigned short code = 0; 01274 bool replace_hash = false; 01275 if( keep ) switch( op ) 01276 { 01277 // Unary operator where operand is arg[0] 01278 case AbsOp: 01279 case AcosOp: 01280 case AsinOp: 01281 case AtanOp: 01282 case CosOp: 01283 case CoshOp: 01284 case ExpOp: 01285 case LogOp: 01286 case SinOp: 01287 case SinhOp: 01288 case SqrtOp: 01289 case TanOp: 01290 case TanhOp: 01291 match_var = optimize_unary_match( 01292 tape , // inputs 01293 i_var , 01294 play->num_rec_par() , 01295 play->GetPar() , 01296 hash_table_var , 01297 code // outputs 01298 ); 01299 if( match_var > 0 ) 01300 tape[i_var].new_var = match_var; 01301 else 01302 { 01303 replace_hash = true; 01304 new_arg[0] = tape[ arg[0] ].new_var; 01305 rec->PutArg( new_arg[0] ); 01306 i = rec->PutOp(op); 01307 tape[i_var].new_var = i; 01308 CPPAD_ASSERT_UNKNOWN( size_t(new_arg[0]) < i ); 01309 } 01310 break; 01311 // --------------------------------------------------- 01312 // Binary operators where 01313 // left is a variable and right is a parameter 01314 case SubvpOp: 01315 if( tape[arg[0]].connect == csum_connected ) 01316 { 01317 // convert to a sequence of summation operators 01318 tape[i_var].new_var = optimize_record_csum( 01319 tape , // inputs 01320 i_var , 01321 play->num_rec_par() , 01322 play->GetPar() , 01323 rec , 01324 csum_work 01325 ); 01326 // abort rest of this case 01327 break; 01328 } 01329 case DivvpOp: 01330 case PowvpOp: 01331 match_var = optimize_binary_match( 01332 tape , // inputs 01333 i_var , 01334 play->num_rec_par() , 01335 play->GetPar() , 01336 hash_table_var , 01337 code // outputs 01338 ); 01339 if( match_var > 0 ) 01340 tape[i_var].new_var = match_var; 01341 else 01342 { tape[i_var].new_var = optimize_record_vp( 01343 tape , // inputs 01344 i_var , 01345 play->num_rec_par() , 01346 play->GetPar() , 01347 rec , 01348 op , 01349 arg 01350 ); 01351 replace_hash = true; 01352 } 01353 break; 01354 // --------------------------------------------------- 01355 // Binary operators where 01356 // left is a parameter and right is a variable 01357 case SubpvOp: 01358 case AddpvOp: 01359 if( tape[arg[1]].connect == csum_connected ) 01360 { 01361 // convert to a sequence of summation operators 01362 tape[i_var].new_var = optimize_record_csum( 01363 tape , // inputs 01364 i_var , 01365 play->num_rec_par() , 01366 play->GetPar() , 01367 rec , 01368 csum_work 01369 ); 01370 // abort rest of this case 01371 break; 01372 } 01373 case DivpvOp: 01374 case MulpvOp: 01375 case PowpvOp: 01376 match_var = optimize_binary_match( 01377 tape , // inputs 01378 i_var , 01379 play->num_rec_par() , 01380 play->GetPar() , 01381 hash_table_var , 01382 code // outputs 01383 ); 01384 if( match_var > 0 ) 01385 tape[i_var].new_var = match_var; 01386 else 01387 { tape[i_var].new_var = optimize_record_pv( 01388 tape , // inputs 01389 i_var , 01390 play->num_rec_par() , 01391 play->GetPar() , 01392 rec , 01393 op , 01394 arg 01395 ); 01396 replace_hash = true; 01397 } 01398 break; 01399 // --------------------------------------------------- 01400 // Binary operator where 01401 // both operators are variables 01402 case AddvvOp: 01403 case SubvvOp: 01404 if( (tape[arg[0]].connect == csum_connected) | 01405 (tape[arg[1]].connect == csum_connected) 01406 ) 01407 { 01408 // convert to a sequence of summation operators 01409 tape[i_var].new_var = optimize_record_csum( 01410 tape , // inputs 01411 i_var , 01412 play->num_rec_par() , 01413 play->GetPar() , 01414 rec , 01415 csum_work 01416 ); 01417 // abort rest of this case 01418 break; 01419 } 01420 case DivvvOp: 01421 case MulvvOp: 01422 case PowvvOp: 01423 match_var = optimize_binary_match( 01424 tape , // inputs 01425 i_var , 01426 play->num_rec_par() , 01427 play->GetPar() , 01428 hash_table_var , 01429 code // outputs 01430 ); 01431 if( match_var > 0 ) 01432 tape[i_var].new_var = match_var; 01433 else 01434 { tape[i_var].new_var = optimize_record_vv( 01435 tape , // inputs 01436 i_var , 01437 play->num_rec_par() , 01438 play->GetPar() , 01439 rec , 01440 op , 01441 arg 01442 ); 01443 replace_hash = true; 01444 } 01445 break; 01446 // --------------------------------------------------- 01447 // Conditional expression operators 01448 case CExpOp: 01449 CPPAD_ASSERT_NARG_NRES(op, 6, 1); 01450 new_arg[0] = arg[0]; 01451 new_arg[1] = arg[1]; 01452 mask = 1; 01453 for(i = 2; i < 6; i++) 01454 { if( arg[1] & mask ) 01455 { new_arg[i] = tape[arg[i]].new_var; 01456 CPPAD_ASSERT_UNKNOWN( 01457 size_t(new_arg[i]) < num_var 01458 ); 01459 } 01460 else new_arg[i] = rec->PutPar( 01461 play->GetPar( arg[i] ) 01462 ); 01463 mask = mask << 1; 01464 } 01465 rec->PutArg( 01466 new_arg[0] , 01467 new_arg[1] , 01468 new_arg[2] , 01469 new_arg[3] , 01470 new_arg[4] , 01471 new_arg[5] 01472 ); 01473 tape[i_var].new_var = rec->PutOp(op); 01474 break; 01475 // --------------------------------------------------- 01476 // Operations with no arguments and no results 01477 case EndOp: 01478 CPPAD_ASSERT_NARG_NRES(op, 0, 0); 01479 rec->PutOp(op); 01480 break; 01481 // --------------------------------------------------- 01482 // Operations with no arguments and one result 01483 case InvOp: 01484 CPPAD_ASSERT_NARG_NRES(op, 0, 1); 01485 tape[i_var].new_var = rec->PutOp(op); 01486 break; 01487 // --------------------------------------------------- 01488 // Operations with one argument that is a parameter 01489 case ParOp: 01490 CPPAD_ASSERT_NARG_NRES(op, 1, 1); 01491 new_arg[0] = rec->PutPar( play->GetPar(arg[0] ) ); 01492 01493 rec->PutArg( new_arg[0] ); 01494 tape[i_var].new_var = rec->PutOp(op); 01495 break; 01496 // --------------------------------------------------- 01497 // Load using a parameter index 01498 case LdpOp: 01499 CPPAD_ASSERT_NARG_NRES(op, 3, 1); 01500 new_arg[0] = new_vecad_ind[ arg[0] ]; 01501 new_arg[1] = arg[1]; 01502 CPPAD_ASSERT_UNKNOWN( size_t(new_arg[0]) < num_vecad_ind ); 01503 rec->PutArg( 01504 new_arg[0], 01505 new_arg[1], 01506 0 01507 ); 01508 tape[i_var].new_var = rec->PutOp(op); 01509 break; 01510 // --------------------------------------------------- 01511 // Load using a variable index 01512 case LdvOp: 01513 CPPAD_ASSERT_NARG_NRES(op, 3, 1); 01514 new_arg[0] = new_vecad_ind[ arg[0] ]; 01515 new_arg[1] = tape[arg[1]].new_var; 01516 CPPAD_ASSERT_UNKNOWN( size_t(new_arg[0]) < num_vecad_ind ); 01517 CPPAD_ASSERT_UNKNOWN( size_t(new_arg[1]) < num_var ); 01518 rec->PutArg( 01519 new_arg[0], 01520 new_arg[1], 01521 0 01522 ); 01523 tape[i_var].new_var = rec->PutOp(op); 01524 break; 01525 // --------------------------------------------------- 01526 // Store a parameter using a parameter index 01527 case StppOp: 01528 CPPAD_ASSERT_NARG_NRES(op, 3, 0); 01529 new_arg[0] = new_vecad_ind[ arg[0] ]; 01530 new_arg[1] = rec->PutPar( play->GetPar(arg[1]) ); 01531 new_arg[2] = rec->PutPar( play->GetPar(arg[2]) ); 01532 CPPAD_ASSERT_UNKNOWN( size_t(new_arg[0]) < num_vecad_ind ); 01533 rec->PutArg( 01534 new_arg[0], 01535 new_arg[1], 01536 new_arg[2] 01537 ); 01538 rec->PutOp(op); 01539 break; 01540 // --------------------------------------------------- 01541 // Store a parameter using a variable index 01542 case StvpOp: 01543 CPPAD_ASSERT_NARG_NRES(op, 3, 0); 01544 new_arg[0] = new_vecad_ind[ arg[0] ]; 01545 new_arg[1] = tape[arg[1]].new_var; 01546 new_arg[2] = rec->PutPar( play->GetPar(arg[2]) ); 01547 CPPAD_ASSERT_UNKNOWN( size_t(new_arg[0]) < num_vecad_ind ); 01548 CPPAD_ASSERT_UNKNOWN( size_t(new_arg[1]) < num_var ); 01549 rec->PutArg( 01550 new_arg[0], 01551 new_arg[1], 01552 new_arg[2] 01553 ); 01554 rec->PutOp(op); 01555 break; 01556 // --------------------------------------------------- 01557 // Store a variable using a parameter index 01558 case StpvOp: 01559 CPPAD_ASSERT_NARG_NRES(op, 3, 0); 01560 new_arg[0] = new_vecad_ind[ arg[0] ]; 01561 new_arg[1] = rec->PutPar( play->GetPar(arg[1]) ); 01562 new_arg[2] = tape[arg[2]].new_var; 01563 CPPAD_ASSERT_UNKNOWN( size_t(new_arg[0]) < num_vecad_ind ); 01564 CPPAD_ASSERT_UNKNOWN( size_t(new_arg[1]) < num_var ); 01565 CPPAD_ASSERT_UNKNOWN( size_t(new_arg[2]) < num_var ); 01566 rec->PutArg( 01567 new_arg[0], 01568 new_arg[1], 01569 new_arg[2] 01570 ); 01571 rec->PutOp(op); 01572 break; 01573 // --------------------------------------------------- 01574 // Store a variable using a variable index 01575 case StvvOp: 01576 CPPAD_ASSERT_NARG_NRES(op, 3, 0); 01577 new_arg[0] = new_vecad_ind[ arg[0] ]; 01578 new_arg[1] = tape[arg[1]].new_var; 01579 new_arg[2] = tape[arg[2]].new_var; 01580 CPPAD_ASSERT_UNKNOWN( size_t(new_arg[0]) < num_vecad_ind ); 01581 CPPAD_ASSERT_UNKNOWN( size_t(new_arg[1]) < num_var ); 01582 CPPAD_ASSERT_UNKNOWN( size_t(new_arg[2]) < num_var ); 01583 rec->PutArg( 01584 new_arg[0], 01585 new_arg[1], 01586 new_arg[2] 01587 ); 01588 rec->PutOp(op); 01589 break; 01590 01591 // ----------------------------------------------------------- 01592 case UserOp: 01593 CPPAD_ASSERT_NARG_NRES(op, 4, 0); 01594 if( user_state == user_start ) 01595 user_state = user_arg; 01596 else 01597 { user_state = user_start; 01598 user_keep.pop(); 01599 } 01600 // user_index, user_id, user_n, user_m 01601 rec->PutArg(arg[0], arg[1], arg[2], arg[3]); 01602 rec->PutOp(UserOp); 01603 break; 01604 01605 case UsrapOp: 01606 CPPAD_ASSERT_NARG_NRES(op, 1, 0); 01607 new_arg[0] = rec->PutPar( play->GetPar(arg[0]) ); 01608 rec->PutArg(new_arg[0]); 01609 rec->PutOp(UsrapOp); 01610 break; 01611 01612 case UsravOp: 01613 CPPAD_ASSERT_NARG_NRES(op, 1, 0); 01614 new_arg[0] = tape[arg[0]].new_var; 01615 rec->PutArg(new_arg[0]); 01616 rec->PutOp(UsravOp); 01617 break; 01618 01619 case UsrrpOp: 01620 CPPAD_ASSERT_NARG_NRES(op, 1, 0); 01621 new_arg[0] = rec->PutPar( play->GetPar(arg[0]) ); 01622 rec->PutArg(new_arg[0]); 01623 rec->PutOp(UsrrpOp); 01624 break; 01625 01626 case UsrrvOp: 01627 CPPAD_ASSERT_NARG_NRES(op, 0, 1); 01628 tape[i_var].new_var = rec->PutOp(UsrrvOp); 01629 break; 01630 // --------------------------------------------------- 01631 01632 // all cases should be handled above 01633 default: 01634 CPPAD_ASSERT_UNKNOWN(false); 01635 01636 } 01637 if( replace_hash ) 01638 { // The old variable index i_var corresponds to the 01639 // new variable index tape[i_var].new_var. In addition 01640 // this is the most recent variable that has this code. 01641 hash_table_var[code] = i_var; 01642 } 01643 01644 } 01645 // modify the dependent variable vector to new indices 01646 for(i = 0; i < dep_taddr.size(); i++ ) 01647 { CPPAD_ASSERT_UNKNOWN( size_t(tape[ dep_taddr[i] ].new_var) < num_var ); 01648 dep_taddr[i] = tape[ dep_taddr[i] ].new_var; 01649 } 01650 } 01651 01652 /*! 01653 Optimize a player object operation sequence 01654 01655 The operation sequence for this object is replaced by one with fewer operations 01656 but the same funcition and derivative values. 01657 01658 \tparam Base 01659 base type for the operator; i.e., this operation was recorded 01660 using AD< \a Base > and computations by this routine are done using type 01661 \a Base. 01662 01663 */ 01664 template <class Base> 01665 void ADFun<Base>::optimize(void) 01666 { // place to store the optimized version of the recording 01667 recorder<Base> rec; 01668 01669 // number of independent variables 01670 size_t n = ind_taddr_.size(); 01671 01672 # ifndef NDEBUG 01673 size_t i, j, m = dep_taddr_.size(); 01674 CppAD::vector<Base> x(n), y(m), check(m); 01675 bool check_zero_order = taylor_per_var_ > 0; 01676 Base max_taylor(0); 01677 if( check_zero_order ) 01678 { // zero order coefficients for independent vars 01679 for(j = 0; j < n; j++) 01680 { CPPAD_ASSERT_UNKNOWN( play_.GetOp(j+1) == InvOp ); 01681 CPPAD_ASSERT_UNKNOWN( ind_taddr_[j] == j+1 ); 01682 x[j] = taylor_[ ind_taddr_[j] * taylor_col_dim_ + 0]; 01683 } 01684 // zero order coefficients for dependent vars 01685 for(i = 0; i < m; i++) 01686 { CPPAD_ASSERT_UNKNOWN( dep_taddr_[i] < total_num_var_ ); 01687 y[i] = taylor_[ dep_taddr_[i] * taylor_col_dim_ + 0]; 01688 } 01689 // maximum zero order coefficient not counting BeginOp at beginning 01690 // (which is correpsonds to uninitialized memory). 01691 for(i = 1; i < total_num_var_; i++) 01692 { if( abs_geq(taylor_[i*taylor_col_dim_+0] , max_taylor) ) 01693 max_taylor = taylor_[i*taylor_col_dim_+0]; 01694 } 01695 } 01696 # endif 01697 01698 // create the optimized recording 01699 CppAD::optimize<Base>(n, dep_taddr_, &play_, &rec); 01700 01701 // number of variables in the recording 01702 total_num_var_ = rec.num_rec_var(); 01703 01704 // now replace the recording 01705 play_.get(rec); 01706 01707 // free memory allocated for sparse Jacobian calculation 01708 // (the results are no longer valid) 01709 for_jac_sparse_pack_.resize(0, 0); 01710 for_jac_sparse_set_.resize(0,0); 01711 01712 // free old Taylor coefficient memory 01713 taylor_.free(); 01714 taylor_per_var_ = 0; 01715 taylor_col_dim_ = 0; 01716 01717 # ifndef NDEBUG 01718 if( check_zero_order ) 01719 { 01720 // zero order forward calculation using new operation sequence 01721 check = Forward(0, x); 01722 01723 // check results 01724 Base eps = 10. * epsilon<Base>(); 01725 for(i = 0; i < m; i++) CPPAD_ASSERT_KNOWN( 01726 abs_geq( eps * max_taylor , check[i] - y[i] ) , 01727 "Error during check of f.optimize()." 01728 ); 01729 01730 // Erase memory that this calculation was done so NDEBUG gives 01731 // same final state for this object (from users perspective) 01732 taylor_per_var_ = 0; 01733 } 01734 # endif 01735 } 01736 01737 /*! \} */ 01738 CPPAD_END_NAMESPACE 01739 # endif