001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.math.stat.descriptive;
019
020 import java.io.Serializable;
021 import java.util.Collection;
022 import java.util.Iterator;
023
024 /**
025 * <p>
026 * An aggregator for {@code SummaryStatistics} from several data sets or
027 * data set partitions. In its simplest usage mode, the client creates an
028 * instance via the zero-argument constructor, then uses
029 * {@link #createContributingStatistics()} to obtain a {@code SummaryStatistics}
030 * for each individual data set / partition. The per-set statistics objects
031 * are used as normal, and at any time the aggregate statistics for all the
032 * contributors can be obtained from this object.
033 * </p><p>
034 * Clients with specialized requirements can use alternative constructors to
035 * control the statistics implementations and initial values used by the
036 * contributing and the internal aggregate {@code SummaryStatistics} objects.
037 * </p><p>
038 * A static {@link #aggregate(Collection)} method is also included that computes
039 * aggregate statistics directly from a Collection of SummaryStatistics instances.
040 * </p><p>
041 * When {@link #createContributingStatistics()} is used to create SummaryStatistics
042 * instances to be aggregated concurrently, the created instances'
043 * {@link SummaryStatistics#addValue(double)} methods must synchronize on the aggregating
044 * instance maintained by this class. In multithreaded environments, if the functionality
045 * provided by {@link #aggregate(Collection)} is adequate, that method should be used
046 * to avoid unecessary computation and synchronization delays.</p>
047 *
048 * @since 2.0
049 * @version $Revision: 811833 $ $Date: 2009-09-06 18:27:50 +0200 (dim. 06 sept. 2009) $
050 *
051 */
052 public class AggregateSummaryStatistics implements StatisticalSummary,
053 Serializable {
054
055
056 /** Serializable version identifier */
057 private static final long serialVersionUID = -8207112444016386906L;
058
059 /**
060 * A SummaryStatistics serving as a prototype for creating SummaryStatistics
061 * contributing to this aggregate
062 */
063 private final SummaryStatistics statisticsPrototype;
064
065 /**
066 * The SummaryStatistics in which aggregate statistics are accumulated.
067 */
068 private final SummaryStatistics statistics;
069
070 /**
071 * Initializes a new AggregateSummaryStatistics with default statistics
072 * implementations.
073 *
074 */
075 public AggregateSummaryStatistics() {
076 this(new SummaryStatistics());
077 }
078
079 /**
080 * Initializes a new AggregateSummaryStatistics with the specified statistics
081 * object as a prototype for contributing statistics and for the internal
082 * aggregate statistics. This provides for customized statistics implementations
083 * to be used by contributing and aggregate statistics.
084 *
085 * @param prototypeStatistics a {@code SummaryStatistics} serving as a
086 * prototype both for the internal aggregate statistics and for
087 * contributing statistics obtained via the
088 * {@code createContributingStatistics()} method. Being a prototype
089 * means that other objects are initialized by copying this object's state.
090 * If {@code null}, a new, default statistics object is used. Any statistic
091 * values in the prototype are propagated to contributing statistics
092 * objects and (once) into these aggregate statistics.
093 * @see #createContributingStatistics()
094 */
095 public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics) {
096 this(prototypeStatistics,
097 prototypeStatistics == null ? null : new SummaryStatistics(prototypeStatistics));
098 }
099
100 /**
101 * Initializes a new AggregateSummaryStatistics with the specified statistics
102 * object as a prototype for contributing statistics and for the internal
103 * aggregate statistics. This provides for different statistics implementations
104 * to be used by contributing and aggregate statistics and for an initial
105 * state to be supplied for the aggregate statistics.
106 *
107 * @param prototypeStatistics a {@code SummaryStatistics} serving as a
108 * prototype both for the internal aggregate statistics and for
109 * contributing statistics obtained via the
110 * {@code createContributingStatistics()} method. Being a prototype
111 * means that other objects are initialized by copying this object's state.
112 * If {@code null}, a new, default statistics object is used. Any statistic
113 * values in the prototype are propagated to contributing statistics
114 * objects, but not into these aggregate statistics.
115 * @param initialStatistics a {@code SummaryStatistics} to serve as the
116 * internal aggregate statistics object. If {@code null}, a new, default
117 * statistics object is used.
118 * @see #createContributingStatistics()
119 */
120 public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics,
121 SummaryStatistics initialStatistics) {
122 this.statisticsPrototype =
123 (prototypeStatistics == null) ? new SummaryStatistics() : prototypeStatistics;
124 this.statistics =
125 (initialStatistics == null) ? new SummaryStatistics() : initialStatistics;
126 }
127
128 /**
129 * {@inheritDoc}. This version returns the maximum over all the aggregated
130 * data.
131 *
132 * @see StatisticalSummary#getMax()
133 */
134 public double getMax() {
135 synchronized (statistics) {
136 return statistics.getMax();
137 }
138 }
139
140 /**
141 * {@inheritDoc}. This version returns the mean of all the aggregated data.
142 *
143 * @see StatisticalSummary#getMean()
144 */
145 public double getMean() {
146 synchronized (statistics) {
147 return statistics.getMean();
148 }
149 }
150
151 /**
152 * {@inheritDoc}. This version returns the minimum over all the aggregated
153 * data.
154 *
155 * @see StatisticalSummary#getMin()
156 */
157 public double getMin() {
158 synchronized (statistics) {
159 return statistics.getMin();
160 }
161 }
162
163 /**
164 * {@inheritDoc}. This version returns a count of all the aggregated data.
165 *
166 * @see StatisticalSummary#getN()
167 */
168 public long getN() {
169 synchronized (statistics) {
170 return statistics.getN();
171 }
172 }
173
174 /**
175 * {@inheritDoc}. This version returns the standard deviation of all the
176 * aggregated data.
177 *
178 * @see StatisticalSummary#getStandardDeviation()
179 */
180 public double getStandardDeviation() {
181 synchronized (statistics) {
182 return statistics.getStandardDeviation();
183 }
184 }
185
186 /**
187 * {@inheritDoc}. This version returns a sum of all the aggregated data.
188 *
189 * @see StatisticalSummary#getSum()
190 */
191 public double getSum() {
192 synchronized (statistics) {
193 return statistics.getSum();
194 }
195 }
196
197 /**
198 * {@inheritDoc}. This version returns the variance of all the aggregated
199 * data.
200 *
201 * @see StatisticalSummary#getVariance()
202 */
203 public double getVariance() {
204 synchronized (statistics) {
205 return statistics.getVariance();
206 }
207 }
208
209 /**
210 * Returns the sum of the logs of all the aggregated data.
211 *
212 * @return the sum of logs
213 * @see SummaryStatistics#getSumOfLogs()
214 */
215 public double getSumOfLogs() {
216 synchronized (statistics) {
217 return statistics.getSumOfLogs();
218 }
219 }
220
221 /**
222 * Returns the geometric mean of all the aggregated data.
223 *
224 * @return the geometric mean
225 * @see SummaryStatistics#getGeometricMean()
226 */
227 public double getGeometricMean() {
228 synchronized (statistics) {
229 return statistics.getGeometricMean();
230 }
231 }
232
233 /**
234 * Returns the sum of the squares of all the aggregated data.
235 *
236 * @return The sum of squares
237 * @see SummaryStatistics#getSumsq()
238 */
239 public double getSumsq() {
240 synchronized (statistics) {
241 return statistics.getSumsq();
242 }
243 }
244
245 /**
246 * Returns a statistic related to the Second Central Moment. Specifically,
247 * what is returned is the sum of squared deviations from the sample mean
248 * among the all of the aggregated data.
249 *
250 * @return second central moment statistic
251 * @see SummaryStatistics#getSecondMoment()
252 */
253 public double getSecondMoment() {
254 synchronized (statistics) {
255 return statistics.getSecondMoment();
256 }
257 }
258
259 /**
260 * Return a {@link StatisticalSummaryValues} instance reporting current
261 * aggregate statistics.
262 *
263 * @return Current values of aggregate statistics
264 */
265 public StatisticalSummary getSummary() {
266 synchronized (statistics) {
267 return new StatisticalSummaryValues(getMean(), getVariance(), getN(),
268 getMax(), getMin(), getSum());
269 }
270 }
271
272 /**
273 * Creates and returns a {@code SummaryStatistics} whose data will be
274 * aggregated with those of this {@code AggregateSummaryStatistics}.
275 *
276 * @return a {@code SummaryStatistics} whose data will be aggregated with
277 * those of this {@code AggregateSummaryStatistics}. The initial state
278 * is a copy of the configured prototype statistics.
279 */
280 public SummaryStatistics createContributingStatistics() {
281 SummaryStatistics contributingStatistics
282 = new AggregatingSummaryStatistics(statistics);
283
284 SummaryStatistics.copy(statisticsPrototype, contributingStatistics);
285
286 return contributingStatistics;
287 }
288
289 /**
290 * Computes aggregate summary statistics. This method can be used to combine statistics
291 * computed over partitions or subsamples - i.e., the StatisticalSummaryValues returned
292 * should contain the same values that would have been obtained by computing a single
293 * StatisticalSummary over the combined dataset.
294 * <p>
295 * Returns null if the collection is empty or null.
296 * </p>
297 *
298 * @param statistics collection of SummaryStatistics to aggregate
299 * @return summary statistics for the combined dataset
300 */
301 public static StatisticalSummaryValues aggregate(Collection<SummaryStatistics> statistics) {
302 if (statistics == null) {
303 return null;
304 }
305 Iterator<SummaryStatistics> iterator = statistics.iterator();
306 if (!iterator.hasNext()) {
307 return null;
308 }
309 SummaryStatistics current = iterator.next();
310 long n = current.getN();
311 double min = current.getMin();
312 double sum = current.getSum();
313 double max = current.getMax();
314 double m2 = current.getSecondMoment();
315 double mean = current.getMean();
316 while (iterator.hasNext()) {
317 current = iterator.next();
318 if (current.getMin() < min || Double.isNaN(min)) {
319 min = current.getMin();
320 }
321 if (current.getMax() > max || Double.isNaN(max)) {
322 max = current.getMax();
323 }
324 sum += current.getSum();
325 final double oldN = n;
326 final double curN = current.getN();
327 n += curN;
328 final double meanDiff = current.getMean() - mean;
329 mean = sum / n;
330 m2 = m2 + current.getSecondMoment() + meanDiff * meanDiff * oldN * curN / n;
331 }
332 final double variance;
333 if (n == 0) {
334 variance = Double.NaN;
335 } else if (n == 1) {
336 variance = 0d;
337 } else {
338 variance = m2 / (n - 1);
339 }
340 return new StatisticalSummaryValues(mean, variance, n, max, min, sum);
341 }
342
343 /**
344 * A SummaryStatistics that also forwards all values added to it to a second
345 * {@code SummaryStatistics} for aggregation.
346 *
347 * @since 2.0
348 */
349 private static class AggregatingSummaryStatistics extends SummaryStatistics {
350
351 /**
352 * The serialization version of this class
353 */
354 private static final long serialVersionUID = 1L;
355
356 /**
357 * An additional SummaryStatistics into which values added to these
358 * statistics (and possibly others) are aggregated
359 */
360 private final SummaryStatistics aggregateStatistics;
361
362 /**
363 * Initializes a new AggregatingSummaryStatistics with the specified
364 * aggregate statistics object
365 *
366 * @param aggregateStatistics a {@code SummaryStatistics} into which
367 * values added to this statistics object should be aggregated
368 */
369 public AggregatingSummaryStatistics(SummaryStatistics aggregateStatistics) {
370 this.aggregateStatistics = aggregateStatistics;
371 }
372
373 /**
374 * {@inheritDoc}. This version adds the provided value to the configured
375 * aggregate after adding it to these statistics.
376 *
377 * @see SummaryStatistics#addValue(double)
378 */
379 @Override
380 public void addValue(double value) {
381 super.addValue(value);
382 synchronized (aggregateStatistics) {
383 aggregateStatistics.addValue(value);
384 }
385 }
386
387 /**
388 * Returns true iff <code>object</code> is a
389 * <code>SummaryStatistics</code> instance and all statistics have the
390 * same values as this.
391 * @param object the object to test equality against.
392 * @return true if object equals this
393 */
394 @Override
395 public boolean equals(Object object) {
396 if (object == this) {
397 return true;
398 }
399 if (object instanceof AggregatingSummaryStatistics == false) {
400 return false;
401 }
402 AggregatingSummaryStatistics stat = (AggregatingSummaryStatistics)object;
403 return super.equals(stat) &&
404 aggregateStatistics.equals(stat.aggregateStatistics);
405 }
406
407 /**
408 * Returns hash code based on values of statistics
409 * @return hash code
410 */
411 @Override
412 public int hashCode() {
413 return 123 + super.hashCode() + aggregateStatistics.hashCode();
414 }
415 }
416 }