View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math.stat.correlation;
18  
19  import org.apache.commons.math.MathRuntimeException;
20  import org.apache.commons.math.linear.RealMatrix;
21  import org.apache.commons.math.linear.BlockRealMatrix;
22  import org.apache.commons.math.stat.descriptive.moment.Mean;
23  import org.apache.commons.math.stat.descriptive.moment.Variance;
24  
25  /**
26   * Computes covariances for pairs of arrays or columns of a matrix.
27   * 
28   * <p>The constructors that take <code>RealMatrix</code> or 
29   * <code>double[][]</code> arguments generate covariance matrices.  The
30   * columns of the input matrices are assumed to represent variable values.</p>
31   * 
32   * <p>The constructor argument <code>biasCorrected</code> determines whether or
33   * not computed covariances are bias-corrected.</p>
34   * 
35   * <p>Unbiased covariances are given by the formula</p>
36   * <code>cov(X, Y) = &Sigma;[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / (n - 1)</code>
37   * where <code>E(X)</code> is the mean of <code>X</code> and <code>E(Y)</code>
38   * is the mean of the <code>Y</code> values.
39   * 
40   * <p>Non-bias-corrected estimates use <code>n</code> in place of <code>n - 1</code>
41   * 
42   * @version $Revision: 799857 $ $Date: 2009-08-01 09:07:12 -0400 (Sat, 01 Aug 2009) $
43   * @since 2.0
44   */
45  public class Covariance {
46      
47      /** covariance matrix */
48      private final RealMatrix covarianceMatrix;
49  
50      /**
51       * Create an empty covariance matrix.
52       */
53      /** Number of observations (length of covariate vectors) */
54      private final int n;
55      
56      /** 
57       * Create a Covariance with no data
58       */
59      public Covariance() {
60          super();
61          covarianceMatrix = null;
62          n = 0;
63      }
64      
65      /**
66       * Create a Covariance matrix from a rectangular array
67       * whose columns represent covariates.
68       * 
69       * <p>The <code>biasCorrected</code> parameter determines whether or not
70       * covariance estimates are bias-corrected.</p>
71       * 
72       * <p>The input array must be rectangular with at least two columns
73       * and two rows.</p>
74       * 
75       * @param data rectangular array with columns representing covariates
76       * @param biasCorrected true means covariances are bias-corrected
77       * @throws IllegalArgumentException if the input data array is not
78       * rectangular with at least two rows and two columns.
79       */
80      public Covariance(double[][] data, boolean biasCorrected) {
81          this(new BlockRealMatrix(data), biasCorrected);
82      }
83      
84      /**
85       * Create a Covariance matrix from a rectangular array
86       * whose columns represent covariates.
87       * 
88       * <p>The input array must be rectangular with at least two columns
89       * and two rows</p>
90       * 
91       * @param data rectangular array with columns representing covariates
92       * @throws IllegalArgumentException if the input data array is not
93       * rectangular with at least two rows and two columns.
94       */
95      public Covariance(double[][] data) {
96          this(data, true);
97      }
98      
99      /**
100      * Create a covariance matrix from a matrix whose columns
101      * represent covariates.
102      * 
103      * <p>The <code>biasCorrected</code> parameter determines whether or not
104      * covariance estimates are bias-corrected.</p>
105      * 
106      * <p>The matrix must have at least two columns and two rows</p>
107      * 
108      * @param matrix matrix with columns representing covariates
109      * @param biasCorrected true means covariances are bias-corrected
110      * @throws IllegalArgumentException if the input matrix does not have
111      * at least two rows and two columns
112      */
113     public Covariance(RealMatrix matrix, boolean biasCorrected) {
114        checkSufficientData(matrix);
115        n = matrix.getRowDimension();
116        covarianceMatrix = computeCovarianceMatrix(matrix, biasCorrected);
117     }
118     
119     /**
120      * Create a covariance matrix from a matrix whose columns
121      * represent covariates.
122      * 
123      * <p>The matrix must have at least two columns and two rows</p>
124      * 
125      * @param matrix matrix with columns representing covariates
126      * @throws IllegalArgumentException if the input matrix does not have
127      * at least two rows and two columns
128      */
129     public Covariance(RealMatrix matrix) {
130         this(matrix, true);
131     }
132     
133     /**
134      * Returns the covariance matrix
135      * 
136      * @return covariance matrix
137      */
138     public RealMatrix getCovarianceMatrix() {
139         return covarianceMatrix;
140     }
141     
142     /**
143      * Returns the number of observations (length of covariate vectors)
144      * 
145      * @return number of observations
146      */
147     
148     public int getN() {
149         return n;
150     }
151     
152     /**
153      * Compute a covariance matrix from a matrix whose columns represent
154      * covariates.
155      * @param matrix input matrix (must have at least two columns and two rows)
156      * @param biasCorrected determines whether or not covariance estimates are bias-corrected
157      * @return covariance matrix
158      */
159     protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected) {
160         int dimension = matrix.getColumnDimension();
161         Variance variance = new Variance(biasCorrected);
162         RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension);
163         for (int i = 0; i < dimension; i++) {
164             for (int j = 0; j < i; j++) {
165               double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected);
166               outMatrix.setEntry(i, j, cov);
167               outMatrix.setEntry(j, i, cov);
168             }
169             outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i)));
170         }
171         return outMatrix;
172     }
173     
174     /**
175      * Create a covariance matrix from a matrix whose columns represent
176      * covariates. Covariances are computed using the bias-corrected formula.
177      * @param matrix input matrix (must have at least two columns and two rows)
178      * @return covariance matrix
179      * @see #Covariance
180      */
181     protected RealMatrix computeCovarianceMatrix(RealMatrix matrix) {
182         return computeCovarianceMatrix(matrix, true);
183     }
184     
185     /**
186      * Compute a covariance matrix from a rectangular array whose columns represent
187      * covariates.
188      * @param data input array (must have at least two columns and two rows)
189      * @param biasCorrected determines whether or not covariance estimates are bias-corrected
190      * @return covariance matrix
191      */
192     protected RealMatrix computeCovarianceMatrix(double[][] data, boolean biasCorrected) {
193         return computeCovarianceMatrix(new BlockRealMatrix(data), biasCorrected);
194     }
195     
196     /**
197      * Create a covariance matrix from a rectangual array whose columns represent
198      * covariates. Covariances are computed using the bias-corrected formula.
199      * @param data input array (must have at least two columns and two rows)
200      * @return covariance matrix
201      * @see #Covariance
202      */
203     protected RealMatrix computeCovarianceMatrix(double[][] data) {
204         return computeCovarianceMatrix(data, true);
205     }
206     
207     /**
208      * Computes the covariance between the two arrays.
209      * 
210      * <p>Array lengths must match and the common length must be at least 2.</p>
211      *
212      * @param xArray first data array
213      * @param yArray second data array
214      * @param biasCorrected if true, returned value will be bias-corrected 
215      * @return returns the covariance for the two arrays 
216      * @throws  IllegalArgumentException if the arrays lengths do not match or
217      * there is insufficient data
218      */
219     public double covariance(final double[] xArray, final double[] yArray, boolean biasCorrected) 
220         throws IllegalArgumentException {
221         Mean mean = new Mean();
222         double result = 0d;
223         int length = xArray.length;
224         if(length == yArray.length && length > 1) {
225             double xMean = mean.evaluate(xArray);
226             double yMean = mean.evaluate(yArray);
227             for (int i = 0; i < length; i++) {
228                 double xDev = xArray[i] - xMean;
229                 double yDev = yArray[i] - yMean;
230                 result += (xDev * yDev - result) / (i + 1);
231             }
232         }
233         else {
234             throw MathRuntimeException.createIllegalArgumentException(
235                "arrays must have the same length and both must have at " +
236                "least two elements. xArray has size {0}, yArray has {1} elements",
237                     length, yArray.length);
238         }
239         return biasCorrected ? result * ((double) length / (double)(length - 1)) : result;
240     }
241     
242     /**
243      * Computes the covariance between the two arrays, using the bias-corrected
244      * formula.
245      * 
246      * <p>Array lengths must match and the common length must be at least 2.</p>
247      *
248      * @param xArray first data array
249      * @param yArray second data array
250      * @return returns the covariance for the two arrays 
251      * @throws  IllegalArgumentException if the arrays lengths do not match or
252      * there is insufficient data
253      */
254     public double covariance(final double[] xArray, final double[] yArray) 
255         throws IllegalArgumentException {
256         return covariance(xArray, yArray, true);
257     }
258     
259     /**
260      * Throws IllegalArgumentException of the matrix does not have at least
261      * two columns and two rows
262      * @param matrix matrix to check
263      */
264     private void checkSufficientData(final RealMatrix matrix) {
265         int nRows = matrix.getRowDimension();
266         int nCols = matrix.getColumnDimension();
267         if (nRows < 2 || nCols < 2) {
268             throw MathRuntimeException.createIllegalArgumentException(
269                     "insufficient data: only {0} rows and {1} columns.",
270                     nRows, nCols);
271         }
272     }
273 }