1 /* PSPP - A program for statistical analysis . -*-c-*-
3 Copyright (C) 2004 Free Software Foundation, Inc.
4 Author: John Darrington 2004
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 This program is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 #include "factor_stats.h"
25 #include "algorithm.h"
36 metrics_precalc(struct metrics *fs)
44 fs->ordered_data = hsh_create(20,
45 (hsh_compare_func *) compare_values,
46 (hsh_hash_func *) hash_value,
52 metrics_calc(struct metrics *fs, const union value *val, double weight)
56 struct weighted_value **wv;
57 const double x = val->f;
60 fs->ssq += x * x * weight;
61 fs->sum += x * weight;
63 if ( x < fs->min) fs->min = x;
64 if ( x > fs->max) fs->max = x;
67 wv = (struct weighted_value **) hsh_probe (fs->ordered_data,(void *) val );
71 /* If this value has already been seen, then simply
72 increase its weight */
74 assert( (*wv)->v.f == val->f );
79 *wv = xmalloc( sizeof (struct weighted_value) );
82 hsh_insert(fs->ordered_data,(void *) *wv);
88 metrics_postcalc(struct metrics *fs)
97 struct weighted_value **data;
102 fs->mean = fs->sum / fs->n;
104 sample_var = ( fs->ssq / fs->n - fs->mean * fs->mean );
106 fs->var = fs->n * sample_var / ( fs->n - 1) ;
107 fs->stddev = sqrt(fs->var);
110 /* FIXME: Check this is correct ???
111 Shouldn't we use the sample variance ??? */
112 fs->stderr = sqrt (fs->var / fs->n) ;
114 data = (struct weighted_value **) hsh_data(fs->ordered_data);
115 n_data = hsh_count(fs->ordered_data);
117 fs->wv = xmalloc ( sizeof (struct weighted_value) * n_data);
119 for ( i = 0 ; i < n_data ; ++i )
120 fs->wv[i] = *(data[i]);
122 sort (fs->wv, n_data, sizeof (struct weighted_value) ,
123 (algo_compare_func *) compare_values, 0);
132 for ( i = 0 ; i < n_data ; ++i )
137 fs->wv[i].rank = j + (fs->wv[i].w - 1) / 2.0 ;
147 for ( i = n_data -1 ; i >= 0; --i )
149 if ( tc > fs->n - fs->wv[i].cc)
154 fs->trimmed_mean = 0;
155 for ( i = k1 + 2 ; i <= k2 - 1 ; ++i )
157 fs->trimmed_mean += fs->wv[i].v.f * fs->wv[i].w;
161 fs->trimmed_mean += (fs->n - fs->wv[k2 - 1].cc - tc) * fs->wv[k2].v.f ;
162 fs->trimmed_mean += (fs->wv[k1 + 1].cc - tc) * fs->wv[k1 + 1].v.f ;
163 fs->trimmed_mean /= 0.9 * fs->n ;
168 /* Functions for hashes */
171 free_factor_stats(struct factor_statistics *f, int width UNUSED)
177 compare_indep_values(const struct factor_statistics *f1,
178 const struct factor_statistics *f2,
181 return compare_values(f1->id, f2->id, width);
186 hash_indep_value(const struct factor_statistics *f, int width)
188 return hash_value(f->id, width);