001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.math.optimization;
019
020 import org.apache.commons.math.FunctionEvaluationException;
021 import org.apache.commons.math.MathRuntimeException;
022 import org.apache.commons.math.analysis.MultivariateRealFunction;
023 import org.apache.commons.math.analysis.MultivariateVectorialFunction;
024 import org.apache.commons.math.exception.util.LocalizedFormats;
025 import org.apache.commons.math.linear.RealMatrix;
026
027 /** This class converts {@link MultivariateVectorialFunction vectorial
028 * objective functions} to {@link MultivariateRealFunction scalar objective functions}
029 * when the goal is to minimize them.
030 * <p>
031 * This class is mostly used when the vectorial objective function represents
032 * a theoretical result computed from a point set applied to a model and
033 * the models point must be adjusted to fit the theoretical result to some
034 * reference observations. The observations may be obtained for example from
035 * physical measurements whether the model is built from theoretical
036 * considerations.
037 * </p>
038 * <p>
039 * This class computes a possibly weighted squared sum of the residuals, which is
040 * a scalar value. The residuals are the difference between the theoretical model
041 * (i.e. the output of the vectorial objective function) and the observations. The
042 * class implements the {@link MultivariateRealFunction} interface and can therefore be
043 * minimized by any optimizer supporting scalar objectives functions.This is one way
044 * to perform a least square estimation. There are other ways to do this without using
045 * this converter, as some optimization algorithms directly support vectorial objective
046 * functions.
047 * </p>
048 * <p>
049 * This class support combination of residuals with or without weights and correlations.
050 * </p>
051 *
052 * @see MultivariateRealFunction
053 * @see MultivariateVectorialFunction
054 * @version $Revision: 1070725 $ $Date: 2011-02-15 02:31:12 +0100 (mar. 15 f??vr. 2011) $
055 * @since 2.0
056 */
057
058 public class LeastSquaresConverter implements MultivariateRealFunction {
059
060 /** Underlying vectorial function. */
061 private final MultivariateVectorialFunction function;
062
063 /** Observations to be compared to objective function to compute residuals. */
064 private final double[] observations;
065
066 /** Optional weights for the residuals. */
067 private final double[] weights;
068
069 /** Optional scaling matrix (weight and correlations) for the residuals. */
070 private final RealMatrix scale;
071
072 /** Build a simple converter for uncorrelated residuals with the same weight.
073 * @param function vectorial residuals function to wrap
074 * @param observations observations to be compared to objective function to compute residuals
075 */
076 public LeastSquaresConverter(final MultivariateVectorialFunction function,
077 final double[] observations) {
078 this.function = function;
079 this.observations = observations.clone();
080 this.weights = null;
081 this.scale = null;
082 }
083
084 /** Build a simple converter for uncorrelated residuals with the specific weights.
085 * <p>
086 * The scalar objective function value is computed as:
087 * <pre>
088 * objective = ∑weight<sub>i</sub>(observation<sub>i</sub>-objective<sub>i</sub>)<sup>2</sup>
089 * </pre>
090 * </p>
091 * <p>
092 * Weights can be used for example to combine residuals with different standard
093 * deviations. As an example, consider a residuals array in which even elements
094 * are angular measurements in degrees with a 0.01° standard deviation and
095 * odd elements are distance measurements in meters with a 15m standard deviation.
096 * In this case, the weights array should be initialized with value
097 * 1.0/(0.01<sup>2</sup>) in the even elements and 1.0/(15.0<sup>2</sup>) in the
098 * odd elements (i.e. reciprocals of variances).
099 * </p>
100 * <p>
101 * The array computed by the objective function, the observations array and the
102 * weights array must have consistent sizes or a {@link FunctionEvaluationException} will be
103 * triggered while computing the scalar objective.
104 * </p>
105 * @param function vectorial residuals function to wrap
106 * @param observations observations to be compared to objective function to compute residuals
107 * @param weights weights to apply to the residuals
108 * @exception IllegalArgumentException if the observations vector and the weights
109 * vector dimensions don't match (objective function dimension is checked only when
110 * the {@link #value(double[])} method is called)
111 */
112 public LeastSquaresConverter(final MultivariateVectorialFunction function,
113 final double[] observations, final double[] weights)
114 throws IllegalArgumentException {
115 if (observations.length != weights.length) {
116 throw MathRuntimeException.createIllegalArgumentException(
117 LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE,
118 observations.length, weights.length);
119 }
120 this.function = function;
121 this.observations = observations.clone();
122 this.weights = weights.clone();
123 this.scale = null;
124 }
125
126 /** Build a simple converter for correlated residuals with the specific weights.
127 * <p>
128 * The scalar objective function value is computed as:
129 * <pre>
130 * objective = y<sup>T</sup>y with y = scale×(observation-objective)
131 * </pre>
132 * </p>
133 * <p>
134 * The array computed by the objective function, the observations array and the
135 * the scaling matrix must have consistent sizes or a {@link FunctionEvaluationException}
136 * will be triggered while computing the scalar objective.
137 * </p>
138 * @param function vectorial residuals function to wrap
139 * @param observations observations to be compared to objective function to compute residuals
140 * @param scale scaling matrix
141 * @exception IllegalArgumentException if the observations vector and the scale
142 * matrix dimensions don't match (objective function dimension is checked only when
143 * the {@link #value(double[])} method is called)
144 */
145 public LeastSquaresConverter(final MultivariateVectorialFunction function,
146 final double[] observations, final RealMatrix scale)
147 throws IllegalArgumentException {
148 if (observations.length != scale.getColumnDimension()) {
149 throw MathRuntimeException.createIllegalArgumentException(
150 LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE,
151 observations.length, scale.getColumnDimension());
152 }
153 this.function = function;
154 this.observations = observations.clone();
155 this.weights = null;
156 this.scale = scale.copy();
157 }
158
159 /** {@inheritDoc} */
160 public double value(final double[] point) throws FunctionEvaluationException {
161
162 // compute residuals
163 final double[] residuals = function.value(point);
164 if (residuals.length != observations.length) {
165 throw new FunctionEvaluationException(point,LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE,
166 residuals.length, observations.length);
167 }
168 for (int i = 0; i < residuals.length; ++i) {
169 residuals[i] -= observations[i];
170 }
171
172 // compute sum of squares
173 double sumSquares = 0;
174 if (weights != null) {
175 for (int i = 0; i < residuals.length; ++i) {
176 final double ri = residuals[i];
177 sumSquares += weights[i] * ri * ri;
178 }
179 } else if (scale != null) {
180 for (final double yi : scale.operate(residuals)) {
181 sumSquares += yi * yi;
182 }
183 } else {
184 for (final double ri : residuals) {
185 sumSquares += ri * ri;
186 }
187 }
188
189 return sumSquares;
190
191 }
192
193 }