001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math.stat.inference;
018
019 import org.apache.commons.math.MathException;
020
021 /**
022 * An interface for Chi-Square tests.
023 * <p>This interface handles only known distributions. If the distribution is
024 * unknown and should be provided by a sample, then the {@link UnknownDistributionChiSquareTest
025 * UnknownDistributionChiSquareTest} extended interface should be used instead.</p>
026 * @version $Revision: 811685 $ $Date: 2009-09-05 19:36:48 +0200 (sam. 05 sept. 2009) $
027 */
028 public interface ChiSquareTest {
029
030 /**
031 * Computes the <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
032 * Chi-Square statistic</a> comparing <code>observed</code> and <code>expected</code>
033 * frequency counts.
034 * <p>
035 * This statistic can be used to perform a Chi-Square test evaluating the null hypothesis that
036 * the observed counts follow the expected distribution.</p>
037 * <p>
038 * <strong>Preconditions</strong>: <ul>
039 * <li>Expected counts must all be positive.
040 * </li>
041 * <li>Observed counts must all be >= 0.
042 * </li>
043 * <li>The observed and expected arrays must have the same length and
044 * their common length must be at least 2.
045 * </li></ul></p><p>
046 * If any of the preconditions are not met, an
047 * <code>IllegalArgumentException</code> is thrown.</p>
048 *
049 * @param observed array of observed frequency counts
050 * @param expected array of expected frequency counts
051 * @return chiSquare statistic
052 * @throws IllegalArgumentException if preconditions are not met
053 */
054 double chiSquare(double[] expected, long[] observed)
055 throws IllegalArgumentException;
056
057 /**
058 * Returns the <i>observed significance level</i>, or <a href=
059 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
060 * p-value</a>, associated with a
061 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
062 * Chi-square goodness of fit test</a> comparing the <code>observed</code>
063 * frequency counts to those in the <code>expected</code> array.
064 * <p>
065 * The number returned is the smallest significance level at which one can reject
066 * the null hypothesis that the observed counts conform to the frequency distribution
067 * described by the expected counts.</p>
068 * <p>
069 * <strong>Preconditions</strong>: <ul>
070 * <li>Expected counts must all be positive.
071 * </li>
072 * <li>Observed counts must all be >= 0.
073 * </li>
074 * <li>The observed and expected arrays must have the same length and
075 * their common length must be at least 2.
076 * </li></ul></p><p>
077 * If any of the preconditions are not met, an
078 * <code>IllegalArgumentException</code> is thrown.</p>
079 *
080 * @param observed array of observed frequency counts
081 * @param expected array of expected frequency counts
082 * @return p-value
083 * @throws IllegalArgumentException if preconditions are not met
084 * @throws MathException if an error occurs computing the p-value
085 */
086 double chiSquareTest(double[] expected, long[] observed)
087 throws IllegalArgumentException, MathException;
088
089 /**
090 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
091 * Chi-square goodness of fit test</a> evaluating the null hypothesis that the observed counts
092 * conform to the frequency distribution described by the expected counts, with
093 * significance level <code>alpha</code>. Returns true iff the null hypothesis can be rejected
094 * with 100 * (1 - alpha) percent confidence.
095 * <p>
096 * <strong>Example:</strong><br>
097 * To test the hypothesis that <code>observed</code> follows
098 * <code>expected</code> at the 99% level, use </p><p>
099 * <code>chiSquareTest(expected, observed, 0.01) </code></p>
100 * <p>
101 * <strong>Preconditions</strong>: <ul>
102 * <li>Expected counts must all be positive.
103 * </li>
104 * <li>Observed counts must all be >= 0.
105 * </li>
106 * <li>The observed and expected arrays must have the same length and
107 * their common length must be at least 2.
108 * <li> <code> 0 < alpha < 0.5 </code>
109 * </li></ul></p><p>
110 * If any of the preconditions are not met, an
111 * <code>IllegalArgumentException</code> is thrown.</p>
112 *
113 * @param observed array of observed frequency counts
114 * @param expected array of expected frequency counts
115 * @param alpha significance level of the test
116 * @return true iff null hypothesis can be rejected with confidence
117 * 1 - alpha
118 * @throws IllegalArgumentException if preconditions are not met
119 * @throws MathException if an error occurs performing the test
120 */
121 boolean chiSquareTest(double[] expected, long[] observed, double alpha)
122 throws IllegalArgumentException, MathException;
123
124 /**
125 * Computes the Chi-Square statistic associated with a
126 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
127 * chi-square test of independence</a> based on the input <code>counts</code>
128 * array, viewed as a two-way table.
129 * <p>
130 * The rows of the 2-way table are
131 * <code>count[0], ... , count[count.length - 1] </code></p>
132 * <p>
133 * <strong>Preconditions</strong>: <ul>
134 * <li>All counts must be >= 0.
135 * </li>
136 * <li>The count array must be rectangular (i.e. all count[i] subarrays
137 * must have the same length).
138 * </li>
139 * <li>The 2-way table represented by <code>counts</code> must have at
140 * least 2 columns and at least 2 rows.
141 * </li>
142 * </li></ul></p><p>
143 * If any of the preconditions are not met, an
144 * <code>IllegalArgumentException</code> is thrown.</p>
145 *
146 * @param counts array representation of 2-way table
147 * @return chiSquare statistic
148 * @throws IllegalArgumentException if preconditions are not met
149 */
150 double chiSquare(long[][] counts)
151 throws IllegalArgumentException;
152
153 /**
154 * Returns the <i>observed significance level</i>, or <a href=
155 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
156 * p-value</a>, associated with a
157 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
158 * chi-square test of independence</a> based on the input <code>counts</code>
159 * array, viewed as a two-way table.
160 * <p>
161 * The rows of the 2-way table are
162 * <code>count[0], ... , count[count.length - 1] </code></p>
163 * <p>
164 * <strong>Preconditions</strong>: <ul>
165 * <li>All counts must be >= 0.
166 * </li>
167 * <li>The count array must be rectangular (i.e. all count[i] subarrays must have the same length).
168 * </li>
169 * <li>The 2-way table represented by <code>counts</code> must have at least 2 columns and
170 * at least 2 rows.
171 * </li>
172 * </li></ul></p><p>
173 * If any of the preconditions are not met, an
174 * <code>IllegalArgumentException</code> is thrown.</p>
175 *
176 * @param counts array representation of 2-way table
177 * @return p-value
178 * @throws IllegalArgumentException if preconditions are not met
179 * @throws MathException if an error occurs computing the p-value
180 */
181 double chiSquareTest(long[][] counts)
182 throws IllegalArgumentException, MathException;
183
184 /**
185 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
186 * chi-square test of independence</a> evaluating the null hypothesis that the classifications
187 * represented by the counts in the columns of the input 2-way table are independent of the rows,
188 * with significance level <code>alpha</code>. Returns true iff the null hypothesis can be rejected
189 * with 100 * (1 - alpha) percent confidence.
190 * <p>
191 * The rows of the 2-way table are
192 * <code>count[0], ... , count[count.length - 1] </code></p>
193 * <p>
194 * <strong>Example:</strong><br>
195 * To test the null hypothesis that the counts in
196 * <code>count[0], ... , count[count.length - 1] </code>
197 * all correspond to the same underlying probability distribution at the 99% level, use </p><p>
198 * <code>chiSquareTest(counts, 0.01) </code></p>
199 * <p>
200 * <strong>Preconditions</strong>: <ul>
201 * <li>All counts must be >= 0.
202 * </li>
203 * <li>The count array must be rectangular (i.e. all count[i] subarrays must have the same length).
204 * </li>
205 * <li>The 2-way table represented by <code>counts</code> must have at least 2 columns and
206 * at least 2 rows.
207 * </li>
208 * </li></ul></p><p>
209 * If any of the preconditions are not met, an
210 * <code>IllegalArgumentException</code> is thrown.</p>
211 *
212 * @param counts array representation of 2-way table
213 * @param alpha significance level of the test
214 * @return true iff null hypothesis can be rejected with confidence
215 * 1 - alpha
216 * @throws IllegalArgumentException if preconditions are not met
217 * @throws MathException if an error occurs performing the test
218 */
219 boolean chiSquareTest(long[][] counts, double alpha)
220 throws IllegalArgumentException, MathException;
221
222 }