View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math.stat.correlation;
18  
19  import org.apache.commons.math.MathException;
20  import org.apache.commons.math.MathRuntimeException;
21  import org.apache.commons.math.distribution.TDistribution;
22  import org.apache.commons.math.distribution.TDistributionImpl;
23  import org.apache.commons.math.linear.RealMatrix;
24  import org.apache.commons.math.linear.BlockRealMatrix;
25  import org.apache.commons.math.stat.regression.SimpleRegression;
26  
27  /**
28   * Computes Pearson's product-moment correlation coefficients for pairs of arrays
29   * or columns of a matrix.
30   * 
31   * <p>The constructors that take <code>RealMatrix</code> or 
32   * <code>double[][]</code> arguments generate correlation matrices.  The
33   * columns of the input matrices are assumed to represent variable values.
34   * Correlations are given by the formula</p>
35   * <code>cor(X, Y) = &Sigma;[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / [(n - 1)s(X)s(Y)]</code>
36   * where <code>E(X)</code> is the mean of <code>X</code>, <code>E(Y)</code>
37   * is the mean of the <code>Y</code> values and s(X), s(Y) are standard deviations.
38   * 
39   * @version $Revision: 799857 $ $Date: 2009-08-01 09:07:12 -0400 (Sat, 01 Aug 2009) $
40   * @since 2.0
41   */
42  public class PearsonsCorrelation {
43      
44      /** correlation matrix */
45      private final RealMatrix correlationMatrix;
46      
47      /** number of observations */
48      private final int nObs;
49      
50      /**
51       * Create a PearsonsCorrelation instance without data
52       */
53      public PearsonsCorrelation() {
54          super();
55          correlationMatrix = null;
56          nObs = 0;
57      }
58      
59      /**
60       * Create a PearsonsCorrelation from a rectangular array
61       * whose columns represent values of variables to be correlated.
62       * 
63       * @param data rectangular array with columns representing variables
64       * @throws IllegalArgumentException if the input data array is not
65       * rectangular with at least two rows and two columns.
66       */
67      public PearsonsCorrelation(double[][] data) {
68          this(new BlockRealMatrix(data));
69      }
70      
71      /**
72       * Create a PearsonsCorrelation from a RealMatrix whose columns
73       * represent variables to be correlated.
74       * 
75       * @param matrix matrix with columns representing variables to correlate
76       */
77      public PearsonsCorrelation(RealMatrix matrix) {
78          checkSufficientData(matrix);
79          nObs = matrix.getRowDimension();
80          correlationMatrix = computeCorrelationMatrix(matrix);
81      }
82      
83      /**
84       * Create a PearsonsCorrelation from a {@link Covariance}.  The correlation
85       * matrix is computed by scaling the Covariance's covariance matrix.
86       * The Covariance instance must have been created from a data matrix with
87       * columns representing variable values.
88       * 
89       * @param covariance Covariance instance
90       */
91      public PearsonsCorrelation(Covariance covariance) {
92          RealMatrix covarianceMatrix = covariance.getCovarianceMatrix();
93          if (covarianceMatrix == null) {
94              throw MathRuntimeException.createIllegalArgumentException("covariance matrix is null");
95          }
96          nObs = covariance.getN();
97          correlationMatrix = covarianceToCorrelation(covarianceMatrix);
98      }
99      
100     /**
101      * Create a PearsonsCorrelation from a covariance matrix.  The correlation
102      * matrix is computed by scaling the covariance matrix.
103      * 
104      * @param covarianceMatrix covariance matrix
105      * @param numberOfObservations the number of observations in the dataset used to compute
106      * the covariance matrix
107      */
108     public PearsonsCorrelation(RealMatrix covarianceMatrix, int numberOfObservations) {
109         nObs = numberOfObservations;
110         correlationMatrix = covarianceToCorrelation(covarianceMatrix);
111         
112     }
113     
114     /**
115      * Returns the correlation matrix
116      * 
117      * @return correlation matrix
118      */
119     public RealMatrix getCorrelationMatrix() {
120         return correlationMatrix;  
121     }
122     
123     /**
124      * Returns a matrix of standard errors associated with the estimates
125      * in the correlation matrix.<br/>
126      * <code>getCorrelationStandardErrors().getEntry(i,j)</code> is the standard
127      * error associated with <code>getCorrelationMatrix.getEntry(i,j)</code>
128      * <p>The formula used to compute the standard error is <br/>
129      * <code>SE<sub>r</sub> = ((1 - r<sup>2</sup>) / (n - 2))<sup>1/2</sup></code>
130      * where <code>r</code> is the estimated correlation coefficient and 
131      * <code>n</code> is the number of observations in the source dataset.</p>
132      * 
133      * @return matrix of correlation standard errors
134      */
135     public RealMatrix getCorrelationStandardErrors() {
136         int nVars = correlationMatrix.getColumnDimension();
137         double[][] out = new double[nVars][nVars];
138         for (int i = 0; i < nVars; i++) {
139             for (int j = 0; j < nVars; j++) {
140                 double r = correlationMatrix.getEntry(i, j);
141                 out[i][j] = Math.sqrt((1 - r * r) /(nObs - 2));
142             }
143         }
144         return new BlockRealMatrix(out);
145     }
146 
147     /**
148      * Returns a matrix of p-values associated with the (two-sided) null
149      * hypothesis that the corresponding correlation coefficient is zero.
150      * <p><code>getCorrelationPValues().getEntry(i,j)</code> is the probability
151      * that a random variable distributed as <code>t<sub>n-2</sub></code> takes
152      * a value with absolute value greater than or equal to <br>
153      * <code>|r|((n - 2) / (1 - r<sup>2</sup>))<sup>1/2</sup></code></p>
154      * <p>The values in the matrix are sometimes referred to as the 
155      * <i>significance</i> of the corresponding correlation coefficients.</p>
156      * 
157      * @return matrix of p-values
158      * @throws MathException if an error occurs estimating probabilities
159      */
160     public RealMatrix getCorrelationPValues() throws MathException {
161         TDistribution tDistribution = new TDistributionImpl(nObs - 2);
162         int nVars = correlationMatrix.getColumnDimension();
163         double[][] out = new double[nVars][nVars];
164         for (int i = 0; i < nVars; i++) {
165             for (int j = 0; j < nVars; j++) {
166                 if (i == j) {
167                     out[i][j] = 0d;
168                 } else {
169                     double r = correlationMatrix.getEntry(i, j);
170                     double t = Math.abs(r * Math.sqrt((nObs - 2)/(1 - r * r)));
171                     out[i][j] = 2 * (1 - tDistribution.cumulativeProbability(t));
172                 }
173             }
174         }
175         return new BlockRealMatrix(out);
176     }
177     
178     
179     /**
180      * Computes the correlation matrix for the columns of the
181      * input matrix.
182      * 
183      * @param matrix matrix with columns representing variables to correlate
184      * @return correlation matrix
185      */
186     public RealMatrix computeCorrelationMatrix(RealMatrix matrix) {
187         int nVars = matrix.getColumnDimension();
188         RealMatrix outMatrix = new BlockRealMatrix(nVars, nVars);
189         for (int i = 0; i < nVars; i++) {
190             for (int j = 0; j < i; j++) {
191               double corr = correlation(matrix.getColumn(i), matrix.getColumn(j));
192               outMatrix.setEntry(i, j, corr);
193               outMatrix.setEntry(j, i, corr);
194             }
195             outMatrix.setEntry(i, i, 1d);
196         }
197         return outMatrix;
198     }
199     
200     /**
201      * Computes the correlation matrix for the columns of the
202      * input rectangular array.  The colums of the array represent values
203      * of variables to be correlated.
204      * 
205      * @param data matrix with columns representing variables to correlate
206      * @return correlation matrix
207      */
208     public RealMatrix computeCorrelationMatrix(double[][] data) {
209        return computeCorrelationMatrix(new BlockRealMatrix(data));
210     }
211     
212     /**
213      * Computes the Pearson's product-moment correlation coefficient between the two arrays.
214      * 
215      * </p>Throws IllegalArgumentException if the arrays do not have the same length
216      * or their common length is less than 2</p>
217      *
218      * @param xArray first data array
219      * @param yArray second data array
220      * @return Returns Pearson's correlation coefficient for the two arrays 
221      * @throws  IllegalArgumentException if the arrays lengths do not match or
222      * there is insufficient data
223      */
224     public double correlation(final double[] xArray, final double[] yArray) throws IllegalArgumentException {
225         SimpleRegression regression = new SimpleRegression();
226         if(xArray.length == yArray.length && xArray.length > 1) {
227             for(int i=0; i<xArray.length; i++) {
228                 regression.addData(xArray[i], yArray[i]);
229             }
230             return regression.getR();
231         }
232         else {
233             throw MathRuntimeException.createIllegalArgumentException(
234                     "invalid array dimensions. xArray has size {0}; yArray has {1} elements",
235                     xArray.length, yArray.length);
236         }
237     }
238     
239     /**
240      * Derives a correlation matrix from a covariance matrix.
241      * 
242      * <p>Uses the formula <br/>
243      * <code>r(X,Y) = cov(X,Y)/s(X)s(Y)</code> where 
244      * <code>r(&middot,&middot;)</code> is the correlation coefficient and
245      * <code>s(&middot;)</code> means standard deviation.</p>
246      * 
247      * @param covarianceMatrix the covariance matrix
248      * @return correlation matrix
249      */
250     public RealMatrix covarianceToCorrelation(RealMatrix covarianceMatrix) {
251         int nVars = covarianceMatrix.getColumnDimension();
252         RealMatrix outMatrix = new BlockRealMatrix(nVars, nVars);
253         for (int i = 0; i < nVars; i++) {
254             double sigma = Math.sqrt(covarianceMatrix.getEntry(i, i));
255             outMatrix.setEntry(i, i, 1d);
256             for (int j = 0; j < i; j++) {
257                 double entry = covarianceMatrix.getEntry(i, j) / 
258                        (sigma * Math.sqrt(covarianceMatrix.getEntry(j, j)));
259                 outMatrix.setEntry(i, j, entry);
260                 outMatrix.setEntry(j, i, entry);
261             }
262         }
263         return outMatrix;
264     }
265     
266     /**
267      * Throws IllegalArgumentException of the matrix does not have at least
268      * two columns and two rows
269      * 
270      * @param matrix matrix to check for sufficiency
271      */
272     private void checkSufficientData(final RealMatrix matrix) {
273         int nRows = matrix.getRowDimension();
274         int nCols = matrix.getColumnDimension();
275         if (nRows < 2 || nCols < 2) {
276             throw MathRuntimeException.createIllegalArgumentException(
277                     "insufficient data: only {0} rows and {1} columns.",
278                     nRows, nCols);
279         }
280     }
281 }