001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math.optimization.general;
018    
019    import java.util.Arrays;
020    
021    import org.apache.commons.math.FunctionEvaluationException;
022    import org.apache.commons.math.optimization.OptimizationException;
023    import org.apache.commons.math.optimization.VectorialPointValuePair;
024    
025    
026    /** 
027     * This class solves a least squares problem using the Levenberg-Marquardt algorithm.
028     *
029     * <p>This implementation <em>should</em> work even for over-determined systems
030     * (i.e. systems having more point than equations). Over-determined systems
031     * are solved by ignoring the point which have the smallest impact according
032     * to their jacobian column norm. Only the rank of the matrix and some loop bounds
033     * are changed to implement this.</p>
034     *
035     * <p>The resolution engine is a simple translation of the MINPACK <a
036     * href="http://www.netlib.org/minpack/lmder.f">lmder</a> routine with minor
037     * changes. The changes include the over-determined resolution and the Q.R.
038     * decomposition which has been rewritten following the algorithm described in the
039     * P. Lascaux and R. Theodor book <i>Analyse num&eacute;rique matricielle
040     * appliqu&eacute;e &agrave; l'art de l'ing&eacute;nieur</i>, Masson 1986. The
041     * redistribution policy for MINPACK is available <a
042     * href="http://www.netlib.org/minpack/disclaimer">here</a>, for convenience, it
043     * is reproduced below.</p>
044     *
045     * <table border="0" width="80%" cellpadding="10" align="center" bgcolor="#E0E0E0">
046     * <tr><td>
047     *    Minpack Copyright Notice (1999) University of Chicago.
048     *    All rights reserved
049     * </td></tr>
050     * <tr><td>
051     * Redistribution and use in source and binary forms, with or without
052     * modification, are permitted provided that the following conditions
053     * are met:
054     * <ol>
055     *  <li>Redistributions of source code must retain the above copyright
056     *      notice, this list of conditions and the following disclaimer.</li>
057     * <li>Redistributions in binary form must reproduce the above
058     *     copyright notice, this list of conditions and the following
059     *     disclaimer in the documentation and/or other materials provided
060     *     with the distribution.</li>
061     * <li>The end-user documentation included with the redistribution, if any,
062     *     must include the following acknowledgment:
063     *     <code>This product includes software developed by the University of
064     *           Chicago, as Operator of Argonne National Laboratory.</code>
065     *     Alternately, this acknowledgment may appear in the software itself,
066     *     if and wherever such third-party acknowledgments normally appear.</li>
067     * <li><strong>WARRANTY DISCLAIMER. THE SOFTWARE IS SUPPLIED "AS IS"
068     *     WITHOUT WARRANTY OF ANY KIND. THE COPYRIGHT HOLDER, THE
069     *     UNITED STATES, THE UNITED STATES DEPARTMENT OF ENERGY, AND
070     *     THEIR EMPLOYEES: (1) DISCLAIM ANY WARRANTIES, EXPRESS OR
071     *     IMPLIED, INCLUDING BUT NOT LIMITED TO ANY IMPLIED WARRANTIES
072     *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE
073     *     OR NON-INFRINGEMENT, (2) DO NOT ASSUME ANY LEGAL LIABILITY
074     *     OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR
075     *     USEFULNESS OF THE SOFTWARE, (3) DO NOT REPRESENT THAT USE OF
076     *     THE SOFTWARE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS, (4)
077     *     DO NOT WARRANT THAT THE SOFTWARE WILL FUNCTION
078     *     UNINTERRUPTED, THAT IT IS ERROR-FREE OR THAT ANY ERRORS WILL
079     *     BE CORRECTED.</strong></li>
080     * <li><strong>LIMITATION OF LIABILITY. IN NO EVENT WILL THE COPYRIGHT
081     *     HOLDER, THE UNITED STATES, THE UNITED STATES DEPARTMENT OF
082     *     ENERGY, OR THEIR EMPLOYEES: BE LIABLE FOR ANY INDIRECT,
083     *     INCIDENTAL, CONSEQUENTIAL, SPECIAL OR PUNITIVE DAMAGES OF
084     *     ANY KIND OR NATURE, INCLUDING BUT NOT LIMITED TO LOSS OF
085     *     PROFITS OR LOSS OF DATA, FOR ANY REASON WHATSOEVER, WHETHER
086     *     SUCH LIABILITY IS ASSERTED ON THE BASIS OF CONTRACT, TORT
087     *     (INCLUDING NEGLIGENCE OR STRICT LIABILITY), OR OTHERWISE,
088     *     EVEN IF ANY OF SAID PARTIES HAS BEEN WARNED OF THE
089     *     POSSIBILITY OF SUCH LOSS OR DAMAGES.</strong></li>
090     * <ol></td></tr>
091     * </table>
092    
093     * @author Argonne National Laboratory. MINPACK project. March 1980 (original fortran)
094     * @author Burton S. Garbow (original fortran)
095     * @author Kenneth E. Hillstrom (original fortran)
096     * @author Jorge J. More (original fortran)
097    
098     * @version $Revision: 795978 $ $Date: 2009-07-20 15:57:08 -0400 (Mon, 20 Jul 2009) $
099     * @since 2.0
100     *
101     */
102    public class LevenbergMarquardtOptimizer extends AbstractLeastSquaresOptimizer {
103    
104        /** Number of solved point. */
105        private int solvedCols;
106    
107        /** Diagonal elements of the R matrix in the Q.R. decomposition. */
108        private double[] diagR;
109    
110        /** Norms of the columns of the jacobian matrix. */
111        private double[] jacNorm;
112    
113        /** Coefficients of the Householder transforms vectors. */
114        private double[] beta;
115    
116        /** Columns permutation array. */
117        private int[] permutation;
118    
119        /** Rank of the jacobian matrix. */
120        private int rank;
121    
122        /** Levenberg-Marquardt parameter. */
123        private double lmPar;
124    
125        /** Parameters evolution direction associated with lmPar. */
126        private double[] lmDir;
127    
128        /** Positive input variable used in determining the initial step bound. */
129        private double initialStepBoundFactor;
130    
131        /** Desired relative error in the sum of squares. */
132        private double costRelativeTolerance;
133    
134        /**  Desired relative error in the approximate solution parameters. */
135        private double parRelativeTolerance;
136    
137        /** Desired max cosine on the orthogonality between the function vector
138         * and the columns of the jacobian. */
139        private double orthoTolerance;
140    
141        /** 
142         * Build an optimizer for least squares problems.
143         * <p>The default values for the algorithm settings are:
144         *   <ul>
145         *    <li>{@link #setInitialStepBoundFactor initial step bound factor}: 100.0</li>
146         *    <li>{@link #setMaxIterations maximal iterations}: 1000</li>
147         *    <li>{@link #setCostRelativeTolerance cost relative tolerance}: 1.0e-10</li>
148         *    <li>{@link #setParRelativeTolerance parameters relative tolerance}: 1.0e-10</li>
149         *    <li>{@link #setOrthoTolerance orthogonality tolerance}: 1.0e-10</li>
150         *   </ul>
151         * </p>
152         */
153        public LevenbergMarquardtOptimizer() {
154    
155            // set up the superclass with a default  max cost evaluations setting
156            setMaxIterations(1000);
157    
158            // default values for the tuning parameters
159            setInitialStepBoundFactor(100.0);
160            setCostRelativeTolerance(1.0e-10);
161            setParRelativeTolerance(1.0e-10);
162            setOrthoTolerance(1.0e-10);
163    
164        }
165    
166        /** 
167         * Set the positive input variable used in determining the initial step bound.
168         * This bound is set to the product of initialStepBoundFactor and the euclidean
169         * norm of diag*x if nonzero, or else to initialStepBoundFactor itself. In most
170         * cases factor should lie in the interval (0.1, 100.0). 100.0 is a generally
171         * recommended value.
172         *
173         * @param initialStepBoundFactor initial step bound factor
174         */
175        public void setInitialStepBoundFactor(double initialStepBoundFactor) {
176            this.initialStepBoundFactor = initialStepBoundFactor;
177        }
178    
179        /** 
180         * Set the desired relative error in the sum of squares.
181         * 
182         * @param costRelativeTolerance desired relative error in the sum of squares
183         */
184        public void setCostRelativeTolerance(double costRelativeTolerance) {
185            this.costRelativeTolerance = costRelativeTolerance;
186        }
187    
188        /** 
189         * Set the desired relative error in the approximate solution parameters.
190         * 
191         * @param parRelativeTolerance desired relative error
192         * in the approximate solution parameters
193         */
194        public void setParRelativeTolerance(double parRelativeTolerance) {
195            this.parRelativeTolerance = parRelativeTolerance;
196        }
197    
198        /** 
199         * Set the desired max cosine on the orthogonality.
200         * 
201         * @param orthoTolerance desired max cosine on the orthogonality
202         * between the function vector and the columns of the jacobian
203         */
204        public void setOrthoTolerance(double orthoTolerance) {
205            this.orthoTolerance = orthoTolerance;
206        }
207    
208        /** {@inheritDoc} */
209        @Override
210        protected VectorialPointValuePair doOptimize()
211            throws FunctionEvaluationException, OptimizationException, IllegalArgumentException {
212    
213            // arrays shared with the other private methods
214            solvedCols  = Math.min(rows, cols);
215            diagR       = new double[cols];
216            jacNorm     = new double[cols];
217            beta        = new double[cols];
218            permutation = new int[cols];
219            lmDir       = new double[cols];
220    
221            // local point
222            double   delta   = 0, xNorm = 0;
223            double[] diag    = new double[cols];
224            double[] oldX    = new double[cols];
225            double[] oldRes  = new double[rows];
226            double[] work1   = new double[cols];
227            double[] work2   = new double[cols];
228            double[] work3   = new double[cols];
229    
230            // evaluate the function at the starting point and calculate its norm
231            updateResidualsAndCost();
232    
233            // outer loop
234            lmPar = 0;
235            boolean firstIteration = true;
236            while (true) {
237    
238                incrementIterationsCounter();
239    
240                // compute the Q.R. decomposition of the jacobian matrix
241                updateJacobian();
242                qrDecomposition();
243    
244                // compute Qt.res
245                qTy(residuals);
246    
247                // now we don't need Q anymore,
248                // so let jacobian contain the R matrix with its diagonal elements
249                for (int k = 0; k < solvedCols; ++k) {
250                    int pk = permutation[k];
251                    jacobian[k][pk] = diagR[pk];
252                }
253    
254                if (firstIteration) {
255    
256                    // scale the point according to the norms of the columns
257                    // of the initial jacobian
258                    xNorm = 0;
259                    for (int k = 0; k < cols; ++k) {
260                        double dk = jacNorm[k];
261                        if (dk == 0) {
262                            dk = 1.0;
263                        }
264                        double xk = dk * point[k];
265                        xNorm  += xk * xk;
266                        diag[k] = dk;
267                    }
268                    xNorm = Math.sqrt(xNorm);
269    
270                    // initialize the step bound delta
271                    delta = (xNorm == 0) ? initialStepBoundFactor : (initialStepBoundFactor * xNorm);
272    
273                }
274    
275                // check orthogonality between function vector and jacobian columns
276                double maxCosine = 0;
277                if (cost != 0) {
278                    for (int j = 0; j < solvedCols; ++j) {
279                        int    pj = permutation[j];
280                        double s  = jacNorm[pj];
281                        if (s != 0) {
282                            double sum = 0;
283                            for (int i = 0; i <= j; ++i) {
284                                sum += jacobian[i][pj] * residuals[i];
285                            }
286                            maxCosine = Math.max(maxCosine, Math.abs(sum) / (s * cost));
287                        }
288                    }
289                }
290                if (maxCosine <= orthoTolerance) {
291                    // convergence has been reached
292                    return new VectorialPointValuePair(point, objective);
293                }
294    
295                // rescale if necessary
296                for (int j = 0; j < cols; ++j) {
297                    diag[j] = Math.max(diag[j], jacNorm[j]);
298                }
299    
300                // inner loop
301                for (double ratio = 0; ratio < 1.0e-4;) {
302    
303                    // save the state
304                    for (int j = 0; j < solvedCols; ++j) {
305                        int pj = permutation[j];
306                        oldX[pj] = point[pj];
307                    }
308                    double previousCost = cost;
309                    double[] tmpVec = residuals;
310                    residuals = oldRes;
311                    oldRes    = tmpVec;
312    
313                    // determine the Levenberg-Marquardt parameter
314                    determineLMParameter(oldRes, delta, diag, work1, work2, work3);
315    
316                    // compute the new point and the norm of the evolution direction
317                    double lmNorm = 0;
318                    for (int j = 0; j < solvedCols; ++j) {
319                        int pj = permutation[j];
320                        lmDir[pj] = -lmDir[pj];
321                        point[pj] = oldX[pj] + lmDir[pj];
322                        double s = diag[pj] * lmDir[pj];
323                        lmNorm  += s * s;
324                    }
325                    lmNorm = Math.sqrt(lmNorm);
326    
327                    // on the first iteration, adjust the initial step bound.
328                    if (firstIteration) {
329                        delta = Math.min(delta, lmNorm);
330                    }
331    
332                    // evaluate the function at x + p and calculate its norm
333                    updateResidualsAndCost();
334    
335                    // compute the scaled actual reduction
336                    double actRed = -1.0;
337                    if (0.1 * cost < previousCost) {
338                        double r = cost / previousCost;
339                        actRed = 1.0 - r * r;
340                    }
341    
342                    // compute the scaled predicted reduction
343                    // and the scaled directional derivative
344                    for (int j = 0; j < solvedCols; ++j) {
345                        int pj = permutation[j];
346                        double dirJ = lmDir[pj];
347                        work1[j] = 0;
348                        for (int i = 0; i <= j; ++i) {
349                            work1[i] += jacobian[i][pj] * dirJ;
350                        }
351                    }
352                    double coeff1 = 0;
353                    for (int j = 0; j < solvedCols; ++j) {
354                        coeff1 += work1[j] * work1[j];
355                    }
356                    double pc2 = previousCost * previousCost;
357                    coeff1 = coeff1 / pc2;
358                    double coeff2 = lmPar * lmNorm * lmNorm / pc2;
359                    double preRed = coeff1 + 2 * coeff2;
360                    double dirDer = -(coeff1 + coeff2);
361    
362                    // ratio of the actual to the predicted reduction
363                    ratio = (preRed == 0) ? 0 : (actRed / preRed);
364    
365                    // update the step bound
366                    if (ratio <= 0.25) {
367                        double tmp =
368                            (actRed < 0) ? (0.5 * dirDer / (dirDer + 0.5 * actRed)) : 0.5;
369                            if ((0.1 * cost >= previousCost) || (tmp < 0.1)) {
370                                tmp = 0.1;
371                            }
372                            delta = tmp * Math.min(delta, 10.0 * lmNorm);
373                            lmPar /= tmp;
374                    } else if ((lmPar == 0) || (ratio >= 0.75)) {
375                        delta = 2 * lmNorm;
376                        lmPar *= 0.5;
377                    }
378    
379                    // test for successful iteration.
380                    if (ratio >= 1.0e-4) {
381                        // successful iteration, update the norm
382                        firstIteration = false;
383                        xNorm = 0;
384                        for (int k = 0; k < cols; ++k) {
385                            double xK = diag[k] * point[k];
386                            xNorm    += xK * xK;
387                        }
388                        xNorm = Math.sqrt(xNorm);
389                    } else {
390                        // failed iteration, reset the previous values
391                        cost = previousCost;
392                        for (int j = 0; j < solvedCols; ++j) {
393                            int pj = permutation[j];
394                            point[pj] = oldX[pj];
395                        }
396                        tmpVec    = residuals;
397                        residuals = oldRes;
398                        oldRes    = tmpVec;
399                    }
400    
401                    // tests for convergence.
402                    if (((Math.abs(actRed) <= costRelativeTolerance) &&
403                            (preRed <= costRelativeTolerance) &&
404                            (ratio <= 2.0)) ||
405                            (delta <= parRelativeTolerance * xNorm)) {
406                        return new VectorialPointValuePair(point, objective);
407                    }
408    
409                    // tests for termination and stringent tolerances
410                    // (2.2204e-16 is the machine epsilon for IEEE754)
411                    if ((Math.abs(actRed) <= 2.2204e-16) && (preRed <= 2.2204e-16) && (ratio <= 2.0)) {
412                        throw new OptimizationException("cost relative tolerance is too small ({0})," +
413                                " no further reduction in the" +
414                                " sum of squares is possible",
415                                costRelativeTolerance);
416                    } else if (delta <= 2.2204e-16 * xNorm) {
417                        throw new OptimizationException("parameters relative tolerance is too small" +
418                                " ({0}), no further improvement in" +
419                                " the approximate solution is possible",
420                                parRelativeTolerance);
421                    } else if (maxCosine <= 2.2204e-16)  {
422                        throw new OptimizationException("orthogonality tolerance is too small ({0})," +
423                                " solution is orthogonal to the jacobian",
424                                orthoTolerance);
425                    }
426    
427                }
428    
429            }
430    
431        }
432    
433        /** 
434         * Determine the Levenberg-Marquardt parameter.
435         * <p>This implementation is a translation in Java of the MINPACK
436         * <a href="http://www.netlib.org/minpack/lmpar.f">lmpar</a>
437         * routine.</p>
438         * <p>This method sets the lmPar and lmDir attributes.</p>
439         * <p>The authors of the original fortran function are:</p>
440         * <ul>
441         *   <li>Argonne National Laboratory. MINPACK project. March 1980</li>
442         *   <li>Burton  S. Garbow</li>
443         *   <li>Kenneth E. Hillstrom</li>
444         *   <li>Jorge   J. More</li>
445         * </ul>
446         * <p>Luc Maisonobe did the Java translation.</p>
447         * 
448         * @param qy array containing qTy
449         * @param delta upper bound on the euclidean norm of diagR * lmDir
450         * @param diag diagonal matrix
451         * @param work1 work array
452         * @param work2 work array
453         * @param work3 work array
454         */
455        private void determineLMParameter(double[] qy, double delta, double[] diag,
456                double[] work1, double[] work2, double[] work3) {
457    
458            // compute and store in x the gauss-newton direction, if the
459            // jacobian is rank-deficient, obtain a least squares solution
460            for (int j = 0; j < rank; ++j) {
461                lmDir[permutation[j]] = qy[j];
462            }
463            for (int j = rank; j < cols; ++j) {
464                lmDir[permutation[j]] = 0;
465            }
466            for (int k = rank - 1; k >= 0; --k) {
467                int pk = permutation[k];
468                double ypk = lmDir[pk] / diagR[pk];
469                for (int i = 0; i < k; ++i) {
470                    lmDir[permutation[i]] -= ypk * jacobian[i][pk];
471                }
472                lmDir[pk] = ypk;
473            }
474    
475            // evaluate the function at the origin, and test
476            // for acceptance of the Gauss-Newton direction
477            double dxNorm = 0;
478            for (int j = 0; j < solvedCols; ++j) {
479                int pj = permutation[j];
480                double s = diag[pj] * lmDir[pj];
481                work1[pj] = s;
482                dxNorm += s * s;
483            }
484            dxNorm = Math.sqrt(dxNorm);
485            double fp = dxNorm - delta;
486            if (fp <= 0.1 * delta) {
487                lmPar = 0;
488                return;
489            }
490    
491            // if the jacobian is not rank deficient, the Newton step provides
492            // a lower bound, parl, for the zero of the function,
493            // otherwise set this bound to zero
494            double sum2, parl = 0;
495            if (rank == solvedCols) {
496                for (int j = 0; j < solvedCols; ++j) {
497                    int pj = permutation[j];
498                    work1[pj] *= diag[pj] / dxNorm; 
499                }
500                sum2 = 0;
501                for (int j = 0; j < solvedCols; ++j) {
502                    int pj = permutation[j];
503                    double sum = 0;
504                    for (int i = 0; i < j; ++i) {
505                        sum += jacobian[i][pj] * work1[permutation[i]];
506                    }
507                    double s = (work1[pj] - sum) / diagR[pj];
508                    work1[pj] = s;
509                    sum2 += s * s;
510                }
511                parl = fp / (delta * sum2);
512            }
513    
514            // calculate an upper bound, paru, for the zero of the function
515            sum2 = 0;
516            for (int j = 0; j < solvedCols; ++j) {
517                int pj = permutation[j];
518                double sum = 0;
519                for (int i = 0; i <= j; ++i) {
520                    sum += jacobian[i][pj] * qy[i];
521                }
522                sum /= diag[pj];
523                sum2 += sum * sum;
524            }
525            double gNorm = Math.sqrt(sum2);
526            double paru = gNorm / delta;
527            if (paru == 0) {
528                // 2.2251e-308 is the smallest positive real for IEE754
529                paru = 2.2251e-308 / Math.min(delta, 0.1);
530            }
531    
532            // if the input par lies outside of the interval (parl,paru),
533            // set par to the closer endpoint
534            lmPar = Math.min(paru, Math.max(lmPar, parl));
535            if (lmPar == 0) {
536                lmPar = gNorm / dxNorm;
537            }
538    
539            for (int countdown = 10; countdown >= 0; --countdown) {
540    
541                // evaluate the function at the current value of lmPar
542                if (lmPar == 0) {
543                    lmPar = Math.max(2.2251e-308, 0.001 * paru);
544                }
545                double sPar = Math.sqrt(lmPar);
546                for (int j = 0; j < solvedCols; ++j) {
547                    int pj = permutation[j];
548                    work1[pj] = sPar * diag[pj];
549                }
550                determineLMDirection(qy, work1, work2, work3);
551    
552                dxNorm = 0;
553                for (int j = 0; j < solvedCols; ++j) {
554                    int pj = permutation[j];
555                    double s = diag[pj] * lmDir[pj];
556                    work3[pj] = s;
557                    dxNorm += s * s;
558                }
559                dxNorm = Math.sqrt(dxNorm);
560                double previousFP = fp;
561                fp = dxNorm - delta;
562    
563                // if the function is small enough, accept the current value
564                // of lmPar, also test for the exceptional cases where parl is zero
565                if ((Math.abs(fp) <= 0.1 * delta) ||
566                        ((parl == 0) && (fp <= previousFP) && (previousFP < 0))) {
567                    return;
568                }
569    
570                // compute the Newton correction
571                for (int j = 0; j < solvedCols; ++j) {
572                    int pj = permutation[j];
573                    work1[pj] = work3[pj] * diag[pj] / dxNorm; 
574                }
575                for (int j = 0; j < solvedCols; ++j) {
576                    int pj = permutation[j];
577                    work1[pj] /= work2[j];
578                    double tmp = work1[pj];
579                    for (int i = j + 1; i < solvedCols; ++i) {
580                        work1[permutation[i]] -= jacobian[i][pj] * tmp;
581                    }
582                }
583                sum2 = 0;
584                for (int j = 0; j < solvedCols; ++j) {
585                    double s = work1[permutation[j]];
586                    sum2 += s * s;
587                }
588                double correction = fp / (delta * sum2);
589    
590                // depending on the sign of the function, update parl or paru.
591                if (fp > 0) {
592                    parl = Math.max(parl, lmPar);
593                } else if (fp < 0) {
594                    paru = Math.min(paru, lmPar);
595                }
596    
597                // compute an improved estimate for lmPar
598                lmPar = Math.max(parl, lmPar + correction);
599    
600            }
601        }
602    
603        /** 
604         * Solve a*x = b and d*x = 0 in the least squares sense.
605         * <p>This implementation is a translation in Java of the MINPACK
606         * <a href="http://www.netlib.org/minpack/qrsolv.f">qrsolv</a>
607         * routine.</p>
608         * <p>This method sets the lmDir and lmDiag attributes.</p>
609         * <p>The authors of the original fortran function are:</p>
610         * <ul>
611         *   <li>Argonne National Laboratory. MINPACK project. March 1980</li>
612         *   <li>Burton  S. Garbow</li>
613         *   <li>Kenneth E. Hillstrom</li>
614         *   <li>Jorge   J. More</li>
615         * </ul>
616         * <p>Luc Maisonobe did the Java translation.</p>
617         * 
618         * @param qy array containing qTy
619         * @param diag diagonal matrix
620         * @param lmDiag diagonal elements associated with lmDir
621         * @param work work array
622         */
623        private void determineLMDirection(double[] qy, double[] diag,
624                double[] lmDiag, double[] work) {
625    
626            // copy R and Qty to preserve input and initialize s
627            //  in particular, save the diagonal elements of R in lmDir
628            for (int j = 0; j < solvedCols; ++j) {
629                int pj = permutation[j];
630                for (int i = j + 1; i < solvedCols; ++i) {
631                    jacobian[i][pj] = jacobian[j][permutation[i]];
632                }
633                lmDir[j] = diagR[pj];
634                work[j]  = qy[j];
635            }
636    
637            // eliminate the diagonal matrix d using a Givens rotation
638            for (int j = 0; j < solvedCols; ++j) {
639    
640                // prepare the row of d to be eliminated, locating the
641                // diagonal element using p from the Q.R. factorization
642                int pj = permutation[j];
643                double dpj = diag[pj];
644                if (dpj != 0) {
645                    Arrays.fill(lmDiag, j + 1, lmDiag.length, 0);
646                }
647                lmDiag[j] = dpj;
648    
649                //  the transformations to eliminate the row of d
650                // modify only a single element of Qty
651                // beyond the first n, which is initially zero.
652                double qtbpj = 0;
653                for (int k = j; k < solvedCols; ++k) {
654                    int pk = permutation[k];
655    
656                    // determine a Givens rotation which eliminates the
657                    // appropriate element in the current row of d
658                    if (lmDiag[k] != 0) {
659    
660                        double sin, cos;
661                        double rkk = jacobian[k][pk];
662                        if (Math.abs(rkk) < Math.abs(lmDiag[k])) {
663                            double cotan = rkk / lmDiag[k];
664                            sin   = 1.0 / Math.sqrt(1.0 + cotan * cotan);
665                            cos   = sin * cotan;
666                        } else {
667                            double tan = lmDiag[k] / rkk;
668                            cos = 1.0 / Math.sqrt(1.0 + tan * tan);
669                            sin = cos * tan;
670                        }
671    
672                        // compute the modified diagonal element of R and
673                        // the modified element of (Qty,0)
674                        jacobian[k][pk] = cos * rkk + sin * lmDiag[k];
675                        double temp = cos * work[k] + sin * qtbpj;
676                        qtbpj = -sin * work[k] + cos * qtbpj;
677                        work[k] = temp;
678    
679                        // accumulate the tranformation in the row of s
680                        for (int i = k + 1; i < solvedCols; ++i) {
681                            double rik = jacobian[i][pk];
682                            temp = cos * rik + sin * lmDiag[i];
683                            lmDiag[i] = -sin * rik + cos * lmDiag[i];
684                            jacobian[i][pk] = temp;
685                        }
686    
687                    }
688                }
689    
690                // store the diagonal element of s and restore
691                // the corresponding diagonal element of R
692                lmDiag[j] = jacobian[j][permutation[j]];
693                jacobian[j][permutation[j]] = lmDir[j];
694    
695            }
696    
697            // solve the triangular system for z, if the system is
698            // singular, then obtain a least squares solution
699            int nSing = solvedCols;
700            for (int j = 0; j < solvedCols; ++j) {
701                if ((lmDiag[j] == 0) && (nSing == solvedCols)) {
702                    nSing = j;
703                }
704                if (nSing < solvedCols) {
705                    work[j] = 0;
706                }
707            }
708            if (nSing > 0) {
709                for (int j = nSing - 1; j >= 0; --j) {
710                    int pj = permutation[j];
711                    double sum = 0;
712                    for (int i = j + 1; i < nSing; ++i) {
713                        sum += jacobian[i][pj] * work[i];
714                    }
715                    work[j] = (work[j] - sum) / lmDiag[j];
716                }
717            }
718    
719            // permute the components of z back to components of lmDir
720            for (int j = 0; j < lmDir.length; ++j) {
721                lmDir[permutation[j]] = work[j];
722            }
723    
724        }
725    
726        /** 
727         * Decompose a matrix A as A.P = Q.R using Householder transforms.
728         * <p>As suggested in the P. Lascaux and R. Theodor book
729         * <i>Analyse num&eacute;rique matricielle appliqu&eacute;e &agrave;
730         * l'art de l'ing&eacute;nieur</i> (Masson, 1986), instead of representing
731         * the Householder transforms with u<sub>k</sub> unit vectors such that:
732         * <pre>
733         * H<sub>k</sub> = I - 2u<sub>k</sub>.u<sub>k</sub><sup>t</sup>
734         * </pre>
735         * we use <sub>k</sub> non-unit vectors such that:
736         * <pre>
737         * H<sub>k</sub> = I - beta<sub>k</sub>v<sub>k</sub>.v<sub>k</sub><sup>t</sup>
738         * </pre>
739         * where v<sub>k</sub> = a<sub>k</sub> - alpha<sub>k</sub> e<sub>k</sub>.
740         * The beta<sub>k</sub> coefficients are provided upon exit as recomputing
741         * them from the v<sub>k</sub> vectors would be costly.</p>
742         * <p>This decomposition handles rank deficient cases since the tranformations
743         * are performed in non-increasing columns norms order thanks to columns
744         * pivoting. The diagonal elements of the R matrix are therefore also in
745         * non-increasing absolute values order.</p>
746         * @exception OptimizationException if the decomposition cannot be performed
747         */
748        private void qrDecomposition() throws OptimizationException {
749    
750            // initializations
751            for (int k = 0; k < cols; ++k) {
752                permutation[k] = k;
753                double norm2 = 0;
754                for (int i = 0; i < jacobian.length; ++i) {
755                    double akk = jacobian[i][k];
756                    norm2 += akk * akk;
757                }
758                jacNorm[k] = Math.sqrt(norm2);
759            }
760    
761            // transform the matrix column after column
762            for (int k = 0; k < cols; ++k) {
763    
764                // select the column with the greatest norm on active components
765                int nextColumn = -1;
766                double ak2 = Double.NEGATIVE_INFINITY;
767                for (int i = k; i < cols; ++i) {
768                    double norm2 = 0;
769                    for (int j = k; j < jacobian.length; ++j) {
770                        double aki = jacobian[j][permutation[i]];
771                        norm2 += aki * aki;
772                    }
773                    if (Double.isInfinite(norm2) || Double.isNaN(norm2)) {
774                        throw new OptimizationException(
775                                "unable to perform Q.R decomposition on the {0}x{1} jacobian matrix",
776                                rows, cols);
777                    }
778                    if (norm2 > ak2) {
779                        nextColumn = i;
780                        ak2        = norm2;
781                    }
782                }
783                if (ak2 == 0) {
784                    rank = k;
785                    return;
786                }
787                int pk                  = permutation[nextColumn];
788                permutation[nextColumn] = permutation[k];
789                permutation[k]          = pk;
790    
791                // choose alpha such that Hk.u = alpha ek
792                double akk   = jacobian[k][pk];
793                double alpha = (akk > 0) ? -Math.sqrt(ak2) : Math.sqrt(ak2);
794                double betak = 1.0 / (ak2 - akk * alpha);
795                beta[pk]     = betak;
796    
797                // transform the current column
798                diagR[pk]        = alpha;
799                jacobian[k][pk] -= alpha;
800    
801                // transform the remaining columns
802                for (int dk = cols - 1 - k; dk > 0; --dk) {
803                    double gamma = 0;
804                    for (int j = k; j < jacobian.length; ++j) {
805                        gamma += jacobian[j][pk] * jacobian[j][permutation[k + dk]];
806                    }
807                    gamma *= betak;
808                    for (int j = k; j < jacobian.length; ++j) {
809                        jacobian[j][permutation[k + dk]] -= gamma * jacobian[j][pk];
810                    }
811                }
812    
813            }
814    
815            rank = solvedCols;
816    
817        }
818    
819        /** 
820         * Compute the product Qt.y for some Q.R. decomposition.
821         * 
822         * @param y vector to multiply (will be overwritten with the result)
823         */
824        private void qTy(double[] y) {
825            for (int k = 0; k < cols; ++k) {
826                int pk = permutation[k];
827                double gamma = 0;
828                for (int i = k; i < rows; ++i) {
829                    gamma += jacobian[i][pk] * y[i];
830                }
831                gamma *= beta[pk];
832                for (int i = k; i < rows; ++i) {
833                    y[i] -= gamma * jacobian[i][pk];
834                }
835            }
836        }
837    
838    }