001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math.stat.correlation;
018    
019    import org.apache.commons.math.TestUtils;
020    import org.apache.commons.math.linear.RealMatrix;
021    import org.apache.commons.math.linear.Array2DRowRealMatrix;
022    import org.apache.commons.math.stat.descriptive.moment.Variance;
023    
024    import junit.framework.TestCase;
025    
026    public class CovarianceTest extends TestCase {
027        
028        protected final double[] longleyData = new double[] {
029                60323,83.0,234289,2356,1590,107608,1947,
030                61122,88.5,259426,2325,1456,108632,1948,
031                60171,88.2,258054,3682,1616,109773,1949,
032                61187,89.5,284599,3351,1650,110929,1950,
033                63221,96.2,328975,2099,3099,112075,1951,
034                63639,98.1,346999,1932,3594,113270,1952,
035                64989,99.0,365385,1870,3547,115094,1953,
036                63761,100.0,363112,3578,3350,116219,1954,
037                66019,101.2,397469,2904,3048,117388,1955,
038                67857,104.6,419180,2822,2857,118734,1956,
039                68169,108.4,442769,2936,2798,120445,1957,
040                66513,110.8,444546,4681,2637,121950,1958,
041                68655,112.6,482704,3813,2552,123366,1959,
042                69564,114.2,502601,3931,2514,125368,1960,
043                69331,115.7,518173,4806,2572,127852,1961,
044                70551,116.9,554894,4007,2827,130081,1962
045            };
046        
047        protected final double[] swissData = new double[] {
048                80.2,17.0,15,12,9.96,
049                83.1,45.1,6,9,84.84,
050                92.5,39.7,5,5,93.40,
051                85.8,36.5,12,7,33.77,
052                76.9,43.5,17,15,5.16,
053                76.1,35.3,9,7,90.57,
054                83.8,70.2,16,7,92.85,
055                92.4,67.8,14,8,97.16,
056                82.4,53.3,12,7,97.67,
057                82.9,45.2,16,13,91.38,
058                87.1,64.5,14,6,98.61,
059                64.1,62.0,21,12,8.52,
060                66.9,67.5,14,7,2.27,
061                68.9,60.7,19,12,4.43,
062                61.7,69.3,22,5,2.82,
063                68.3,72.6,18,2,24.20,
064                71.7,34.0,17,8,3.30,
065                55.7,19.4,26,28,12.11,
066                54.3,15.2,31,20,2.15,
067                65.1,73.0,19,9,2.84,
068                65.5,59.8,22,10,5.23,
069                65.0,55.1,14,3,4.52,
070                56.6,50.9,22,12,15.14,
071                57.4,54.1,20,6,4.20,
072                72.5,71.2,12,1,2.40,
073                74.2,58.1,14,8,5.23,
074                72.0,63.5,6,3,2.56,
075                60.5,60.8,16,10,7.72,
076                58.3,26.8,25,19,18.46,
077                65.4,49.5,15,8,6.10,
078                75.5,85.9,3,2,99.71,
079                69.3,84.9,7,6,99.68,
080                77.3,89.7,5,2,100.00,
081                70.5,78.2,12,6,98.96,
082                79.4,64.9,7,3,98.22,
083                65.0,75.9,9,9,99.06,
084                92.2,84.6,3,3,99.46,
085                79.3,63.1,13,13,96.83,
086                70.4,38.4,26,12,5.62,
087                65.7,7.7,29,11,13.79,
088                72.7,16.7,22,13,11.22,
089                64.4,17.6,35,32,16.92,
090                77.6,37.6,15,7,4.97,
091                67.6,18.7,25,7,8.65,
092                35.0,1.2,37,53,42.34,
093                44.7,46.6,16,29,50.43,
094                42.8,27.7,22,29,58.33
095            };
096     
097        
098        /**
099         * Test Longley dataset against R.
100         * Data Source: J. Longley (1967) "An Appraisal of Least Squares
101         * Programs for the Electronic Computer from the Point of View of the User"
102         * Journal of the American Statistical Association, vol. 62. September,
103         * pp. 819-841.
104         * 
105         * Data are from NIST:
106         * http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Longley.dat
107         */
108        public void testLongly() {  
109            RealMatrix matrix = createRealMatrix(longleyData, 16, 7);
110            RealMatrix covarianceMatrix = new Covariance(matrix).getCovarianceMatrix();
111            double[] rData = new double[] {
112             12333921.73333333246, 3.679666000000000e+04, 343330206.333333313,
113             1649102.666666666744, 1117681.066666666651, 23461965.733333334, 16240.93333333333248,
114             36796.66000000000, 1.164576250000000e+02, 1063604.115416667,
115             6258.666250000000, 3490.253750000000, 73503.000000000, 50.92333333333334,
116             343330206.33333331347, 1.063604115416667e+06, 9879353659.329166412,
117             56124369.854166664183, 30880428.345833335072, 685240944.600000024, 470977.90000000002328,
118             1649102.66666666674, 6.258666250000000e+03, 56124369.854166664,
119             873223.429166666698, -115378.762499999997, 4462741.533333333, 2973.03333333333330,
120             1117681.06666666665, 3.490253750000000e+03, 30880428.345833335,
121             -115378.762499999997, 484304.095833333326, 1764098.133333333, 1382.43333333333339,
122             23461965.73333333433, 7.350300000000000e+04, 685240944.600000024,
123             4462741.533333333209, 1764098.133333333302, 48387348.933333330, 32917.40000000000146,
124             16240.93333333333, 5.092333333333334e+01, 470977.900000000,
125             2973.033333333333, 1382.433333333333, 32917.40000000, 22.66666666666667
126            };
127            
128            TestUtils.assertEquals("covariance matrix", createRealMatrix(rData, 7, 7), covarianceMatrix, 10E-9);
129    
130        }
131        
132        /**
133         * Test R Swiss fertility dataset against R.
134         * Data Source: R datasets package
135         */
136        public void testSwissFertility() {
137             RealMatrix matrix = createRealMatrix(swissData, 47, 5);
138             RealMatrix covarianceMatrix = new Covariance(matrix).getCovarianceMatrix();
139             double[] rData = new double[] {
140               156.0424976873265, 100.1691489361702, -64.36692876965772, -79.7295097132285, 241.5632030527289,
141               100.169148936170251, 515.7994172062905, -124.39283071230344, -139.6574005550416, 379.9043755781684,
142               -64.3669287696577, -124.3928307123034, 63.64662349676226, 53.5758556891767, -190.5606105457909,
143               -79.7295097132285, -139.6574005550416, 53.57585568917669, 92.4560592044403, -61.6988297872340,
144                241.5632030527289, 379.9043755781684, -190.56061054579092, -61.6988297872340, 1739.2945371877890
145             };
146             
147             TestUtils.assertEquals("covariance matrix", createRealMatrix(rData, 5, 5), covarianceMatrix, 10E-13);
148        }
149        
150        /**
151         * Constant column
152         */
153        public void testConstant() {
154            double[] noVariance = new double[] {1, 1, 1, 1};
155            double[] values = new double[] {1, 2, 3, 4};
156            assertEquals(0d, new Covariance().covariance(noVariance, values, true), Double.MIN_VALUE);
157            assertEquals(0d, new Covariance().covariance(noVariance, noVariance, true), Double.MIN_VALUE);
158        }
159        
160        
161        /**
162         * Insufficient data
163         */
164        public void testInsufficientData() {
165            double[] one = new double[] {1};
166            double[] two = new double[] {2};
167            try {
168                new Covariance().covariance(one, two, false);
169                fail("Expecting IllegalArgumentException");
170            } catch (IllegalArgumentException ex) {
171                // Expected
172            }
173            RealMatrix matrix = new Array2DRowRealMatrix(new double[][] {{0},{1}});
174            try {
175                new Covariance(matrix);
176                fail("Expecting IllegalArgumentException");
177            } catch (IllegalArgumentException ex) {
178                // Expected
179            }
180        }
181        
182        /**
183         * Verify that diagonal entries are consistent with Variance computation and matrix matches
184         * column-by-column covariances
185         */
186        public void testConsistency() {
187            final RealMatrix matrix = createRealMatrix(swissData, 47, 5);
188            final RealMatrix covarianceMatrix = new Covariance(matrix).getCovarianceMatrix();
189            
190            // Variances on the diagonal
191            Variance variance = new Variance();
192            for (int i = 0; i < 5; i++) {
193                assertEquals(variance.evaluate(matrix.getColumn(i)), covarianceMatrix.getEntry(i,i), 10E-14);
194            }
195            
196            // Symmetry, column-consistency
197            assertEquals(covarianceMatrix.getEntry(2, 3), 
198                    new Covariance().covariance(matrix.getColumn(2), matrix.getColumn(3), true), 10E-14);
199            assertEquals(covarianceMatrix.getEntry(2, 3), covarianceMatrix.getEntry(3, 2), Double.MIN_VALUE);
200            
201            // All columns same -> all entries = column variance
202            RealMatrix repeatedColumns = new Array2DRowRealMatrix(47, 3);
203            for (int i = 0; i < 3; i++) {
204                repeatedColumns.setColumnMatrix(i, matrix.getColumnMatrix(0));
205            }
206            RealMatrix repeatedCovarianceMatrix = new Covariance(repeatedColumns).getCovarianceMatrix();
207            double columnVariance = variance.evaluate(matrix.getColumn(0));
208            for (int i = 0; i < 3; i++) {
209                for (int j = 0; j < 3; j++) {
210                    assertEquals(columnVariance, repeatedCovarianceMatrix.getEntry(i, j), 10E-14);
211                }
212            }
213            
214            // Check bias-correction defaults
215            double[][] data = matrix.getData();
216            TestUtils.assertEquals("Covariances", 
217                    covarianceMatrix, new Covariance().computeCovarianceMatrix(data),Double.MIN_VALUE);
218            TestUtils.assertEquals("Covariances", 
219                    covarianceMatrix, new Covariance().computeCovarianceMatrix(data, true),Double.MIN_VALUE);
220            
221            double[] x = data[0];
222            double[] y = data[1];
223            assertEquals(new Covariance().covariance(x, y), 
224                    new Covariance().covariance(x, y, true), Double.MIN_VALUE); 
225        }
226        
227        protected RealMatrix createRealMatrix(double[] data, int nRows, int nCols) {
228            double[][] matrixData = new double[nRows][nCols];
229            int ptr = 0;
230            for (int i = 0; i < nRows; i++) {
231                System.arraycopy(data, ptr, matrixData[i], 0, nCols);
232                ptr += nCols;
233            }
234            return new Array2DRowRealMatrix(matrixData); 
235        }
236    }