TukeyKramerConfidence result is wrong

Question

TukeyKramerConfidence result is wrong

fkurth opened this issue 10 years ago · comments

The result of the TukeyKramerConfidence test is wrong. We confirmed this with Statistika and SAS JMP.

The proplem is within the calculaction of the test statistics/standardError.
One should not use the totalVariance, but this term instead:

1/(N-K) * SUM( Variance_i * (TreatementCount_i -1))

Also there is no need to mulitply the test statistic with an extra Sqrt(2), since its already in the caclulation of the standard error.

Attached is a fixed version and a UnitTest from a Textbook example.
This example has been verified with Statistika and SAS JMP.

/*
 * File:                TukeyKramerConfidence.java
 * Authors:             Kevin R. Dixon
 * Company:             Sandia National Laboratories
 * Project:             Cognitive Foundry
 * 
 * Copyright May 16, 2011, Sandia Corporation.
 * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
 * license for use of this work by or on behalf of the U.S. Government.
 * Export of this program may require a license from the United States
 * Government. See CopyrightHistory.txt for complete details.
 * 
 */


package com.gf.ye.yes.service.plot.statistics;

import gov.sandia.cognition.annotation.PublicationReference;
import gov.sandia.cognition.annotation.PublicationType;
import gov.sandia.cognition.math.UnivariateStatisticsUtil;
import gov.sandia.cognition.math.matrix.Matrix;
import gov.sandia.cognition.math.matrix.MatrixFactory;
import gov.sandia.cognition.statistics.distribution.StudentizedRangeDistribution;
import gov.sandia.cognition.statistics.method.AbstractMultipleHypothesisComparison;
import gov.sandia.cognition.statistics.method.ConfidenceTestAssumptions;
import gov.sandia.cognition.statistics.method.TukeyKramerConfidence;
import gov.sandia.cognition.util.ObjectUtil;
import gov.sandia.cognition.util.Pair;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

/**
 * Tukey-Kramer test is the multiple-comparison generalization of the unpaired
 * Student's t-test when conducting multiple comparisons.  The t-test and
 * Tukey's Range test are coincident when a single comparison is made.
 * Tukey's Range test is typically used as the post-hoc analysis technique
 * after detecting a difference using a 1-way ANOVA.  This class implements
 * Kramer's generalization to unequal subjects in different treatments.
 * @author Kevin R. Dixon
 * @since 3.1
 */
@ConfidenceTestAssumptions(
    name="Tukey-Kramer Range test",
    alsoKnownAs={
        "Tukey's Range test",
        "Tukey's Honestly Significant Difference test",
        "Tukey's HSD test"
    },
    description={
        "Tukey's test determines which treatment is statistically different from a multiple comparison.",
        "Tukey's test is a generalization of the paired Student's t-test for multiple comparisons using a population-correction factor."
    },
    assumptions={
        "All data came from same distribution, without considering treatment effects.",
        "The observations have equal variance.",
        "Measurements are independent and equivalent within a treatment.",
        "All observations are independent."
    },
    nullHypothesis="Each treatment has no effect on the mean outcome of the subjects",
    dataPaired=false,
    dataSameSize=false,
    distribution=StudentizedRangeDistribution.class,
    reference={
        @PublicationReference(
            author="Wikipedia",
            title="Tukey's range test",
            type=PublicationType.WebPage,
            year=2011,
            url="http://en.wikipedia.org/wiki/Tukey's_range_test"
        )
    }
)

public class TukeyKramerConfidenceN1 extends AbstractMultipleHypothesisComparison<Collection<? extends Number>, TukeyKramerConfidenceN1.Statistic> {

    private static final long serialVersionUID = 1L;

    /**
     * Creates a new instance of TukeyKramerConfidence
     */
    public TukeyKramerConfidenceN1() {
        super();
    }

    @Override
    public TukeyKramerConfidence clone() {
        return (TukeyKramerConfidence) super.clone();
    }

    @Override
    public TukeyKramerConfidenceN1.Statistic evaluateNullHypotheses(Collection<? extends Collection<? extends Number>> data, double uncompensatedAlpha) {
        // There are "K" treatments
        final int K = data.size();

        // Each treatment can have a different number of subjects
        List<Integer> subjectCounts = new ArrayList<Integer>(K);
        List<Double> treatmentMeans = new ArrayList<Double>(K);

        double treatmentVariancesSum = 0;
        // This is the total subject count.
        int N =0;  
        for (Collection<? extends Number> treatment : data) {
            final int Ni = treatment.size();
            N += Ni;
            subjectCounts.add(Ni);
            Pair<Double,Double> meanAndVariance = UnivariateStatisticsUtil.computeMeanAndVariance(treatment);
            treatmentMeans.add(meanAndVariance.getFirst());
            treatmentVariancesSum += meanAndVariance.getSecond() * (Ni-1);
        }

        final double meanSquaredResiduals = treatmentVariancesSum / (N -K );

        return new TukeyKramerConfidenceN1.Statistic(uncompensatedAlpha, subjectCounts, treatmentMeans, meanSquaredResiduals);
    }

    /**
     * Statistic from Tukey-Kramer's multiple comparison test
     */
    public static class Statistic extends AbstractMultipleHypothesisComparison.Statistic {

        /**
         * 
         */
        private static final long serialVersionUID = 1L;

        /**
         * Number of subjects in each treatment
         */
        protected List<Integer> subjectCounts;

        /**
         * Mean for each treatment
         */
        protected List<Double> treatmentMeans;


        protected List<Double> treatmentVariances;

        /**
         * Gets the standard errors in the experiment
         */
        protected Matrix standardErrors;

        /**
         * 
         */
        protected Matrix meanDifferences;

            /**
         * Creates a new instance of StudentizedMultipleComparisonStatistic
         * 
         * @param uncompensatedAlpha
         *            Uncompensated alpha (p-value threshold) for the multiple comparison test
         * @param subjectCounts
         *            Number of subjects in each treatment
         * @param treatmentMeans
         *            Mean for each treatment
         * @param treatmentVariances 
         * @param totalVariance
         *            Variance over all subjects in the experiment
         */
        public Statistic(final double uncompensatedAlpha, final List<Integer> subjectCounts, final List<Double> treatmentMeans, final double meanSquaredResiduals) {
            this.treatmentCount = treatmentMeans.size();
            this.uncompensatedAlpha = uncompensatedAlpha;
            this.subjectCounts = subjectCounts;
            this.treatmentMeans = treatmentMeans;
            this.testStatistics = this.computeTestStatistics(subjectCounts, treatmentMeans, meanSquaredResiduals);
            this.nullHypothesisProbabilities = this.computeNullHypothesisProbabilities(subjectCounts, this.testStatistics);
        }

        /**
         * Computes the test statistic for all treatments
         * 
         * @param subjectCounts
         *            Number of subjects in each treatment
         * @param treatmentMeans
         *            Mean for each treatment
         * @param totalVariance
         *            Variance over all subjects in the experiment
         * @return Test statistics, where the (i,j) element compares treatment "i" to treatment "j", the statistic is symmetric
         */
        public Matrix computeTestStatistics(final List<Integer> subjectCounts, final List<Double> treatmentMeans, final double meanSquaredResiduals) {
            int K = treatmentMeans.size();
            Matrix Z = MatrixFactory.getDefault().createMatrix(K, K);
            this.standardErrors = MatrixFactory.getDefault().createMatrix(K, K);

            for (int i = 0; i < K; i++) {
                final double yi = treatmentMeans.get(i);
                final int ni = subjectCounts.get(i);
                for (int j = i + 1; j < K; j++) {
                    final int nj = subjectCounts.get(j);
                    final double yj = treatmentMeans.get(j);
                    double standardError = Math.sqrt( meanSquaredResiduals  * 0.5 * ((1.0 / ni) + (1.0 / nj)));
                    final double zij = Math.abs(yi - yj) / standardError;
                    Z.setElement(i, j, zij);
                    Z.setElement(j, i, zij);
                    this.standardErrors.setElement(i, j, standardError);
                    this.standardErrors.setElement(j, i, standardError);
                }
            }
            return Z;
        }

        /**
         * Computes null-hypothesis probability for the (i,j) treatment comparison
         * 
         * @param subjectCounts
         *            Number of subjects in the experiment
         * @param Z
         *            Test statistic for the (i,j) treatment comparison
         * @return Null-hypothesis probability for the (i,j) treatment comparison
         */
        public Matrix computeNullHypothesisProbabilities(final List<Integer> subjectCounts, final Matrix Z) {
            final int K = Z.getNumRows();
            final double N = UnivariateStatisticsUtil.computeSum(subjectCounts);

            Matrix P = MatrixFactory.getDefault().createMatrix(K, K);
            StudentizedRangeDistribution.CDF cdf = new StudentizedRangeDistribution.CDF(K, N - K);
            for (int i = 0; i < K; i++) {
                // A classifier is equal to itself.
                P.setElement(i, i, 1.0);
                for (int j = i + 1; j < K; j++) {
                    // The difference is symmetric
                    double zij = Z.getElement(i, j);
                    double pij = 1.0 - cdf.evaluate(zij ); // * Math.sqrt(2) 
                    P.setElement(i, j, pij);
                    P.setElement(j, i, pij);
                }
            }

            return P;

        }

        @Override
        public Statistic clone() {
            Statistic clone = (Statistic) super.clone();
            clone.treatmentMeans = ObjectUtil.cloneSmartElementsAsArrayList(this.getTreatmentMeans());
            clone.subjectCounts = ObjectUtil.cloneSmartElementsAsArrayList(this.getSubjectCounts());
            return clone;
        }

        /**
         * Getter for subjectCounts
         * 
         * @return Number of subjects in the experiment
         */
        public List<Integer> getSubjectCounts() {
            return this.subjectCounts;
        }

        /**
         * Getter for treatmentMeans
         * 
         * @return Mean for each treatment
         */
        public List<Double> getTreatmentMeans() {
            return this.treatmentMeans;
        }

        @Override
        public boolean acceptNullHypothesis(final int i, final int j) {
            return this.getNullHypothesisProbability(i, j) >= this.getUncompensatedAlpha();
        }

        /**
         * Getter for standardErrors
         * 
         * @return Gets the standard errors in the experiment
         */
        public Matrix getStandardErrors() {
            return this.standardErrors;
        }


    }

}



/**
 * 
 */
package com.gf.ye.yes.service.plot;

import static org.junit.Assert.assertEquals;
import gov.sandia.cognition.math.UnivariateStatisticsUtil;

import java.util.List;

import org.junit.Test;

import com.gf.ye.yes.service.plot.statistics.TukeyKramerConfidenceN1;
import com.google.common.collect.ImmutableList;

/**
 * @author fkurth
 *
 */
public class TukeyTestTest {


    /**
     * 
     * From
     * 
     * Rasch, Herrendoerfer, Bock, Victor, Guiard
     * ISBN 3-486-23146-4
     * 
     * (In German)
     * 
     * Verfahrensbibliothek. Band 1.
     * Page 851
     * 
     * Verified with Statistica
     * 
     */
    List<List<Double>> testData = ImmutableList.of(
            (List<Double>)ImmutableList.of( 529d, 508d, 501d, 534d, 510d, 504d ),
            (List<Double>)ImmutableList.of( 505d, 521d, 560d, 516d, 598d, 552d ),
            (List<Double>)ImmutableList.of( 537d, 569d, 499d, 501d, 506d, 600d ),
            (List<Double>)ImmutableList.of( 619d, 632d, 644d, 638d, 623d ),
            (List<Double>)ImmutableList.of( 565d, 596d, 631d, 667d, 613d, 580d )
            );

    final TukeyKramerConfidenceN1 t = new TukeyKramerConfidenceN1();



    /**
     * Test method for {@link com.gf.ye.yes.service.plot.statistics.TukeyKramerConfidenceN1#evaluateNullHypotheses(java.util.Collection, double)}.
     */
    @Test
    public final void testEvaluateNullHypothesesCollectionOfQextendsCollectionOfQextendsNumberDouble() {

        TukeyKramerConfidenceN1.Statistic stat = t.evaluateNullHypotheses(testData);

        Integer treatments = stat.getTreatmentCount();

        assertEquals(Integer.valueOf(5) , treatments );

        List<Double> means =  stat.getTreatmentMeans();

        assertEquals( Double.valueOf( 514.33d ),  means.get(0), 0.005 );
        assertEquals( Double.valueOf( 542.00d ),  means.get(1), 0.005 );
        assertEquals( Double.valueOf( 535.33d ),  means.get(2), 0.005 );
        assertEquals( Double.valueOf( 631.20d ),  means.get(3), 0.005 );
        assertEquals( Double.valueOf( 608.67d ),  means.get(4), 0.005 );

        Integer subjects = (int) UnivariateStatisticsUtil.computeSum(stat.getSubjectCounts() );
        assertEquals( Integer.valueOf(29), subjects);

        Integer degOfFreedom = subjects - treatments; 
        assertEquals( Integer.valueOf(24), degOfFreedom);


        // diagonals
        assertEquals( Double.valueOf( 1d ),  stat.getNullHypothesisProbability(0, 0)  , 0.00005 );
        assertEquals( Double.valueOf( 1d ),  stat.getNullHypothesisProbability(1, 1)  , 0.00005 );
        assertEquals( Double.valueOf( 1d ),  stat.getNullHypothesisProbability(2, 2)  , 0.00005 );
        assertEquals( Double.valueOf( 1d ),  stat.getNullHypothesisProbability(3, 3)  , 0.00005 );
        assertEquals( Double.valueOf( 1d ),  stat.getNullHypothesisProbability(4, 4)  , 0.00005 );

        assertEquals( Double.valueOf( 0.541176d ),  stat.getNullHypothesisProbability(0, 1)  , 0.000001 );
        assertEquals( Double.valueOf( 0.541176d ),  stat.getNullHypothesisProbability(1, 0)  , 0.000001 );

        assertEquals( Double.valueOf( 0.763884d ),  stat.getNullHypothesisProbability(0, 2)  , 0.000001 );
        assertEquals( Double.valueOf( 0.763884d ),  stat.getNullHypothesisProbability(2, 0)  , 0.000001 );

        assertEquals( Double.valueOf( 0.000145d ),  stat.getNullHypothesisProbability(0, 3)  , 0.000001 );
        assertEquals( Double.valueOf( 0.000145d ),  stat.getNullHypothesisProbability(3, 0)  , 0.000001 );

        assertEquals( Double.valueOf( 0.000300d ),  stat.getNullHypothesisProbability(0, 4)  , 0.000001 );
        assertEquals( Double.valueOf( 0.000300d ),  stat.getNullHypothesisProbability(4, 0)  , 0.000001 );

        assertEquals( Double.valueOf( 0.995624d ),  stat.getNullHypothesisProbability(1, 2)  , 0.000001 );
        assertEquals( Double.valueOf( 0.995624d ),  stat.getNullHypothesisProbability(2, 1)  , 0.000001 );

        assertEquals( Double.valueOf( 0.000766d ),  stat.getNullHypothesisProbability(1, 3)  , 0.000001 );
        assertEquals( Double.valueOf( 0.000766d ),  stat.getNullHypothesisProbability(3, 1)  , 0.000001 );

        assertEquals( Double.valueOf( 0.008328d ),  stat.getNullHypothesisProbability(1, 4)  , 0.000001 );
        assertEquals( Double.valueOf( 0.008328d ),  stat.getNullHypothesisProbability(4, 1)  , 0.000001 );

        assertEquals( Double.valueOf( 0.000391d ),  stat.getNullHypothesisProbability(2, 3)  , 0.000001 );
        assertEquals( Double.valueOf( 0.000391d ),  stat.getNullHypothesisProbability(3, 2)  , 0.000001 );

        assertEquals( Double.valueOf( 0.748831d ),  stat.getNullHypothesisProbability(4, 3)  , 0.000001 );
        assertEquals( Double.valueOf( 0.748831d ),  stat.getNullHypothesisProbability(3, 4)  , 0.000001 );






    }

}

Justin Basilico · Answer 1 · Fri Sep 05 2014 13:22:29 GMT+0800 (China Standard Time)

Good catch. Kevin submitted a fix and added this to the unit tests.