1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.math.stat.descriptive;
19  
20  import junit.framework.Test;
21  import junit.framework.TestCase;
22  import junit.framework.TestSuite;
23  
24  import java.util.Collection;
25  import java.util.ArrayList;
26  
27  import org.apache.commons.math.random.RandomData;
28  import org.apache.commons.math.random.RandomDataImpl;
29  import org.apache.commons.math.TestUtils;
30  
31  
32  /**
33   * Test cases for {@link AggregateSummaryStatistics}
34   *
35   */
36  public class AggregateSummaryStatisticsTest extends TestCase {
37      
38      /**
39       * Creates and returns a {@code Test} representing all the test cases in this
40       * class
41       *
42       * @return a {@code Test} representing all the test cases in this class
43       */
44      public static Test suite() {
45          TestSuite suite = new TestSuite(AggregateSummaryStatisticsTest.class);
46          suite.setName("AggregateSummaryStatistics tests");
47          return suite;
48      }
49      
50      /**
51       * Tests the standard aggregation behavior
52       */
53      public void testAggregation() {
54          AggregateSummaryStatistics aggregate = new AggregateSummaryStatistics();
55          SummaryStatistics setOneStats = aggregate.createContributingStatistics();
56          SummaryStatistics setTwoStats = aggregate.createContributingStatistics();
57          
58          assertNotNull("The set one contributing stats are null", setOneStats);
59          assertNotNull("The set two contributing stats are null", setTwoStats);
60          assertNotSame("Contributing stats objects are the same", setOneStats, setTwoStats);
61          
62          setOneStats.addValue(2);
63          setOneStats.addValue(3);
64          setOneStats.addValue(5);
65          setOneStats.addValue(7);
66          setOneStats.addValue(11);
67          assertEquals("Wrong number of set one values", 5, setOneStats.getN());
68          assertEquals("Wrong sum of set one values", 28.0, setOneStats.getSum());
69          
70          setTwoStats.addValue(2);
71          setTwoStats.addValue(4);
72          setTwoStats.addValue(8);
73          assertEquals("Wrong number of set two values", 3, setTwoStats.getN());
74          assertEquals("Wrong sum of set two values", 14.0, setTwoStats.getSum());
75          
76          assertEquals("Wrong number of aggregate values", 8, aggregate.getN());
77          assertEquals("Wrong aggregate sum", 42.0, aggregate.getSum());
78      }
79      
80      /**
81       * Verify that aggregating over a partition gives the same results
82       * as direct computation.
83       * 
84       *  1) Randomly generate a dataset of 10-100 values
85       *     from [-100, 100]
86       *  2) Divide the dataset it into 2-5 partitions
87       *  3) Create an AggregateSummaryStatistic and ContributingStatistics
88       *     for each partition 
89       *  4) Compare results from the AggregateSummaryStatistic with values
90       *     returned by a single SummaryStatistics instance that is provided 
91       *     the full dataset
92       */
93      public void testAggregationConsistency() throws Exception {
94          
95          // Generate a random sample and random partition
96          double[] totalSample = generateSample();
97          double[][] subSamples = generatePartition(totalSample);
98          int nSamples = subSamples.length;
99          
100         // Create aggregator and total stats for comparison
101         AggregateSummaryStatistics aggregate = new AggregateSummaryStatistics();
102         SummaryStatistics totalStats = new SummaryStatistics();
103         
104         // Create array of component stats
105         SummaryStatistics componentStats[] = new SummaryStatistics[nSamples];
106         
107         for (int i = 0; i < nSamples; i++) {
108             
109             // Make componentStats[i] a contributing statistic to aggregate
110             componentStats[i] = aggregate.createContributingStatistics();
111             
112             // Add values from subsample
113             for (int j = 0; j < subSamples[i].length; j++) {
114                 componentStats[i].addValue(subSamples[i][j]);
115             }
116         }
117         
118         // Compute totalStats directly
119         for (int i = 0; i < totalSample.length; i++) {
120             totalStats.addValue(totalSample[i]);
121         }
122         
123         /*
124          * Compare statistics in totalStats with aggregate.
125          * Note that guaranteed success of this comparison depends on the
126          * fact that <aggregate> gets values in exactly the same order
127          * as <totalStats>. 
128          *  
129          */
130         assertEquals(totalStats.getSummary(), aggregate.getSummary());  
131         
132     }
133     
134     /**
135      * Test aggregate function by randomly generating a dataset of 10-100 values
136      * from [-100, 100], dividing it into 2-5 partitions, computing stats for each
137      * partition and comparing the result of aggregate(...) applied to the collection
138      * of per-partition SummaryStatistics with a single SummaryStatistics computed
139      * over the full sample.
140      * 
141      * @throws Exception
142      */
143     public void testAggregate() throws Exception {
144         
145         // Generate a random sample and random partition
146         double[] totalSample = generateSample();
147         double[][] subSamples = generatePartition(totalSample);
148         int nSamples = subSamples.length;
149        
150         // Compute combined stats directly
151         SummaryStatistics totalStats = new SummaryStatistics();
152         for (int i = 0; i < totalSample.length; i++) {
153             totalStats.addValue(totalSample[i]);
154         }
155         
156         // Now compute subsample stats individually and aggregate
157         SummaryStatistics[] subSampleStats = new SummaryStatistics[nSamples];
158         for (int i = 0; i < nSamples; i++) {
159             subSampleStats[i] = new SummaryStatistics();
160         }
161         Collection<SummaryStatistics> aggregate = new ArrayList<SummaryStatistics>();
162         for (int i = 0; i < nSamples; i++) {
163             for (int j = 0; j < subSamples[i].length; j++) { 
164                 subSampleStats[i].addValue(subSamples[i][j]);
165             }
166             aggregate.add(subSampleStats[i]);
167         }
168         
169         // Compare values
170         StatisticalSummary aggregatedStats = AggregateSummaryStatistics.aggregate(aggregate);
171         assertEquals(totalStats.getSummary(), aggregatedStats, 10E-12);
172     }
173     
174     
175     public void testAggregateDegenerate() throws Exception {
176         double[] totalSample = {1, 2, 3, 4, 5};
177         double[][] subSamples = {{1}, {2}, {3}, {4}, {5}};
178         
179         // Compute combined stats directly
180         SummaryStatistics totalStats = new SummaryStatistics();
181         for (int i = 0; i < totalSample.length; i++) {
182             totalStats.addValue(totalSample[i]);
183         }
184         
185         // Now compute subsample stats individually and aggregate
186         SummaryStatistics[] subSampleStats = new SummaryStatistics[5];
187         for (int i = 0; i < 5; i++) {
188             subSampleStats[i] = new SummaryStatistics();
189         }
190         Collection<SummaryStatistics> aggregate = new ArrayList<SummaryStatistics>();
191         for (int i = 0; i < 5; i++) {
192             for (int j = 0; j < subSamples[i].length; j++) { 
193                 subSampleStats[i].addValue(subSamples[i][j]);
194             }
195             aggregate.add(subSampleStats[i]);
196         }
197         
198         // Compare values
199         StatisticalSummaryValues aggregatedStats = AggregateSummaryStatistics.aggregate(aggregate);
200         assertEquals(totalStats.getSummary(), aggregatedStats, 10E-12);
201     }
202     
203     public void testAggregateSpecialValues() throws Exception {
204         double[] totalSample = {Double.POSITIVE_INFINITY, 2, 3, Double.NaN, 5};
205         double[][] subSamples = {{Double.POSITIVE_INFINITY, 2}, {3}, {Double.NaN}, {5}};
206         
207         // Compute combined stats directly
208         SummaryStatistics totalStats = new SummaryStatistics();
209         for (int i = 0; i < totalSample.length; i++) {
210             totalStats.addValue(totalSample[i]);
211         }
212         
213         // Now compute subsample stats individually and aggregate
214         SummaryStatistics[] subSampleStats = new SummaryStatistics[5];
215         for (int i = 0; i < 4; i++) {
216             subSampleStats[i] = new SummaryStatistics();
217         }
218         Collection<SummaryStatistics> aggregate = new ArrayList<SummaryStatistics>();
219         for (int i = 0; i < 4; i++) {
220             for (int j = 0; j < subSamples[i].length; j++) { 
221                 subSampleStats[i].addValue(subSamples[i][j]);
222             }
223             aggregate.add(subSampleStats[i]);
224         }
225         
226         // Compare values
227         StatisticalSummaryValues aggregatedStats = AggregateSummaryStatistics.aggregate(aggregate);
228         assertEquals(totalStats.getSummary(), aggregatedStats, 10E-12);
229         
230     }
231     
232     /**
233      * Verifies that a StatisticalSummary and a StatisticalSummaryValues are equal up
234      * to delta, with NaNs, infinities returned in the same spots. For max, min, n, values
235      * have to agree exactly, delta is used only for sum, mean, variance, std dev.
236      */
237     protected static void assertEquals(StatisticalSummary expected, StatisticalSummary observed, double delta) {
238         TestUtils.assertEquals(expected.getMax(), observed.getMax(), 0);
239         TestUtils.assertEquals(expected.getMin(), observed.getMin(), 0);
240         assertEquals(expected.getN(), observed.getN());
241         TestUtils.assertEquals(expected.getSum(), observed.getSum(), delta);
242         TestUtils.assertEquals(expected.getMean(), observed.getMean(), delta);
243         TestUtils.assertEquals(expected.getStandardDeviation(), observed.getStandardDeviation(), delta);
244         TestUtils.assertEquals(expected.getVariance(), observed.getVariance(), delta);
245     }
246 
247     
248     /**
249      * Generates a random sample of double values.
250      * Sample size is random, between 10 and 100 and values are 
251      * uniformly distributed over [-100, 100].
252      * 
253      * @return array of random double values
254      */
255     private double[] generateSample() {
256         final RandomData randomData = new RandomDataImpl();
257         final int sampleSize = randomData.nextInt(10,100);
258         double[] out = new double[sampleSize];
259         for (int i = 0; i < out.length; i++) {
260             out[i] = randomData.nextUniform(-100, 100);
261         }
262         return out;     
263     }
264     
265     /**
266      * Generates a partition of <sample> into up to 5 sequentially selected
267      * subsamples with randomly selected partition points.
268      * 
269      * @param sample array to partition
270      * @return rectangular array with rows = subsamples
271      */
272     private double[][] generatePartition(double[] sample) {
273         final int length = sample.length;
274         final double[][] out = new double[5][];
275         final RandomData randomData = new RandomDataImpl();
276         int cur = 0;
277         int offset = 0;
278         int sampleCount = 0;
279         for (int i = 0; i < 5; i++) {
280             if (cur == length || offset == length) {
281                 break;
282             }
283             final int next = (i == 4 || cur == length - 1) ? length - 1 : randomData.nextInt(cur, length - 1);
284             final int subLength = next - cur + 1;
285             out[i] = new double[subLength];
286             System.arraycopy(sample, offset, out[i], 0, subLength);
287             cur = next + 1;
288             sampleCount++;
289             offset += subLength;
290         }
291         if (sampleCount < 5) {
292             double[][] out2 = new double[sampleCount][];
293             for (int j = 0; j < sampleCount; j++) {
294                 final int curSize = out[j].length;
295                 out2[j] = new double[curSize];
296                 System.arraycopy(out[j], 0, out2[j], 0, curSize);
297             }
298             return out2;
299         } else {
300             return out;
301         }
302     }
303     
304 }