From: John Darrington Date: Sat, 23 Aug 2008 06:08:28 +0000 (+0800) Subject: Rewrote the EXAMINE command. X-Git-Tag: v0.7.1~50^2~72 X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=bb611ecc6f1c33a0a7c691785927c545d18696f6;p=pspp-builds.git Rewrote the EXAMINE command. Made use of the new casegrouper construct instead of relying on hashes. Rewrote the percentiles calculations so as not to require dynamically allocating memory for all the data. Encapsulated many of the other statistics' calculations, so as to have similar interfaces. --- diff --git a/src/language/stats/examine.q b/src/language/stats/examine.q index 7f197ec3..94f36405 100644 --- a/src/language/stats/examine.q +++ b/src/language/stats/examine.q @@ -110,16 +110,16 @@ struct factor_metrics struct statistic *histogram; struct order_stats *np; - /* Three quartiles indexing into PTL */ + /* Tukey_Hingesree quartiles indexing into PTL */ struct percentile **quartiles; /* A reader sorted in ASCENDING order */ struct casereader *up_reader; - /* The minimum value of all the weights */ + /* Tukey_Hingese minimum value of all tukey_hingese weights */ double cmin; - /* Sum of all weights, including those for missing values */ + /* Sum of all weights, including tukey_hingesose for missing values */ double n; double mean; @@ -148,13 +148,13 @@ struct factor_result struct xfactor { - /* We need to make a list of this structure */ + /* We need to make a list of tukey_hingesis structure */ struct ll ll; - /* The independent variable */ + /* Tukey_Hingese independent variable */ const struct variable const* indep_var[2]; - /* A list of results for this factor */ + /* A list of results for tukey_hingesis factor */ struct ll_list result_list ; }; @@ -197,7 +197,7 @@ factor_destroy (struct xfactor *fctr) static struct xfactor level0_factor; static struct ll_list factor_list = LL_INITIALIZER (factor_list); -/* Parse the clause specifying the factors */ +/* Parse tukey_hingese clause specifying tukey_hingese factors */ static int examine_parse_independent_vars (struct lexer *lexer, const struct dictionary *dict, struct cmd_examine *cmd); @@ -243,7 +243,7 @@ static void factor_to_string (const struct xfactor *fctr, /* Represent a factor as a string, so it can be printed in a human readable fashion, - but sacrificing some readablility for the sake of brevity */ + but sacrificing some readablility for tukey_hingese sake of brevity */ static void factor_to_string_concise (const struct xfactor *fctr, const struct factor_result *result, @@ -271,7 +271,7 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) return CMD_FAILURE; } - /* If /MISSING=INCLUDE is set, then user missing values are ignored */ + /* If /MISSING=INCLUDE is set, tukey_hingesen user missing values are ignored */ exclude_values = cmd.incl == XMN_INCLUDE ? MV_SYSTEM : MV_ANY; if ( cmd.st_n == SYSMIS ) @@ -280,7 +280,7 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) if ( ! cmd.sbc_cinterval) cmd.n_cinterval[0] = 95.0; - /* If descriptives have been requested, make sure the + /* If descriptives have been requested, make sure tukey_hingese quartiles are calculated */ if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES] ) { @@ -1020,7 +1020,7 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, order_stats_accumulate (os, n_os, casereader_clone (metric->up_reader), - wv, dependent_vars[v], MV_ANY); + wv, dependent_vars[v]); free (os); } } @@ -1091,7 +1091,7 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, order_stats_accumulate ((struct order_stats **) &metric->box_whisker, 1, casereader_clone (metric->up_reader), - wv, dependent_vars[v], MV_ANY); + wv, dependent_vars[v]); } } diff --git a/src/math/automake.mk b/src/math/automake.mk index 94a3a0a8..c9e1cddd 100644 --- a/src/math/automake.mk +++ b/src/math/automake.mk @@ -5,10 +5,14 @@ include $(top_srcdir)/src/math/ts/automake.mk noinst_LIBRARIES += src/math/libpspp_math.a src_math_libpspp_math_a_SOURCES = \ - src/math/box-whisker.c src/math/box-whiske.h \ - src/math/chart-geometry.c src/math/chart-geometry.h \ - src/math/coefficient.c src/math/coefficient.h \ - src/math/covariance-matrix.c src/math/covariance-matrix.h \ + src/math/chart-geometry.c \ + src/math/chart-geometry.h \ + src/math/box-whisker.c src/math/box-whisker.h \ + src/math/coefficient.c \ + src/math/coefficient.h \ + src/math/covariance-matrix.c \ + src/math/covariance-matrix.h \ + src/math/design-matrix.c src/math/design-matrix.h \ src/math/extrema.c src/math/extrema.h \ src/math/group.c src/math/group.h \ src/math/group-proc.h \ @@ -16,15 +20,12 @@ src_math_libpspp_math_a_SOURCES = \ src/math/interaction.c src/math/interaction.h \ src/math/levene.c src/math/levene.h \ src/math/linreg.c src/math/linreg.h \ - src/math/merge.c src/math/merge.h \ + src/math/merge.c src/math/merge.h \ src/math/moments.c src/math/moments.h \ src/math/np.c src/math/np.h \ src/math/order-stats.c src/math/order-stats.h \ src/math/percentiles.c src/math/percentiles.h \ - src/math/design-matrix.c src/math/design-matrix.h \ src/math/random.c src/math/random.h \ src/math/sort.c src/math/sort.h \ src/math/trimmed-mean.c src/math/trimmed-mean.h \ - src/math/tukey-hinges.c src/math/tukey-hinges.h - -EXTRA_DIST += src/math/OChangeLog + src/math/tukey-hinges.c src/math/tukey-hinges.h diff --git a/src/math/order-stats.c b/src/math/order-stats.c index ca4160f4..f5b6851a 100644 --- a/src/math/order-stats.c +++ b/src/math/order-stats.c @@ -23,36 +23,6 @@ #include #include -#if 0 - -#include - -static void -order_stats_dump_k1 (const struct order_stats *os) -{ - struct k *k = &os->k[0]; - printf ("K1: tc %g; c %g cc %g ccp %g\n", - k->tc, k->c, k->cc, k->cc_p1); - -} - -static void -order_stats_dump_k2 (const struct order_stats *os) -{ - struct k *k = &os->k[1]; - printf ("K2: tc %g; c %g cc %g ccp %g\n", - k->tc, k->c, k->cc, k->cc_p1); -} - - -void -order_stats_dump (const struct order_stats *os) -{ - order_stats_dump_k1 (os); - order_stats_dump_k2 (os); -} - -#endif static void update_k_lower (struct k *kk, @@ -85,7 +55,6 @@ update_k_values (const struct ccase *cx, double y_i, double c_i, double cc_i, struct order_stats **os, size_t n_os) { int j; - for (j = 0 ; j < n_os ; ++j) { int k; @@ -100,18 +69,15 @@ update_k_values (const struct ccase *cx, double y_i, double c_i, double cc_i, if ( stat->accumulate ) stat->accumulate (stat, cx, c_i, cc_i, y_i); - - tos->cc = cc_i; } } void order_stats_accumulate (struct order_stats **os, size_t nos, - struct casereader *reader, - const struct variable *wv, - const struct variable *var, - enum mv_class exclude) + struct casereader *reader, + const struct variable *wv, + const struct variable *var) { struct ccase cx; struct ccase prev_cx; @@ -130,9 +96,6 @@ order_stats_accumulate (struct order_stats **os, size_t nos, /* The casereader MUST be sorted */ assert (this_value >= prev_value); - if ( var_is_value_missing (var, case_data (&cx, var), exclude)) - continue; - case_destroy (&prev_cx); if ( prev_value == -DBL_MAX || prev_value == this_value) @@ -154,6 +117,3 @@ order_stats_accumulate (struct order_stats **os, size_t nos, casereader_destroy (reader); } - - - diff --git a/src/math/order-stats.h b/src/math/order-stats.h index cea50ed8..fc588950 100644 --- a/src/math/order-stats.h +++ b/src/math/order-stats.h @@ -29,11 +29,17 @@ struct variable; struct k { double tc; + double cc; + double cc_p1; + double c; + double c_p1; + double y; + double y_p1; }; @@ -43,18 +49,18 @@ struct order_stats struct statistic parent; int n_k; struct k *k; - - double cc; }; -enum mv_class; -void order_stats_dump (const struct order_stats *os); + +void dump_ptile_k1 (const struct order_stats *ptl); + +void dump_ptile_k2 (const struct order_stats *ptl); + void order_stats_accumulate (struct order_stats **ptl, size_t nos, - struct casereader *reader, - const struct variable *wv, - const struct variable *var, - enum mv_class exclude); + struct casereader *reader, + const struct variable *wv, + const struct variable *var); #endif diff --git a/src/math/percentiles.c b/src/math/percentiles.c index bf99de16..53d704d6 100644 --- a/src/math/percentiles.c +++ b/src/math/percentiles.c @@ -47,8 +47,6 @@ percentile_calculate (const struct percentile *ptl, enum pc_alg alg) struct percentile *mutable = (struct percentile *) ptl; const struct order_stats *os = &ptl->parent; - assert (os->cc == ptl->w); - if ( ptl->g1 == SYSMIS) mutable->g1 = (os->k[0].tc - os->k[0].cc) / os->k[0].c_p1; @@ -171,7 +169,6 @@ percentile_create (double p, double W) assert (p <= 1.0); ptl->ptile = p; - ptl->w = W; os->n_k = 2; os->k = xcalloc (sizeof (*os->k), 2); @@ -189,3 +186,15 @@ percentile_create (double p, double W) return os; } +#if 0 +void +percentile_dump (const struct percentile *ptl) +{ + printf ("Percentile %g:\n\tk1: ", ptl->ptile); + + dump_os_k1 ((const struct os *)ptl); + printf ("\tk2: "); + dump_os_k2 ((const struct os *)ptl); + printf ("\n"); +} +#endif diff --git a/src/math/percentiles.h b/src/math/percentiles.h index 0dd09820..93558463 100644 --- a/src/math/percentiles.h +++ b/src/math/percentiles.h @@ -39,7 +39,6 @@ struct percentile struct order_stats parent; double ptile; - double w; /* Mutable */ double g1; diff --git a/src/math/trimmed-mean.c b/src/math/trimmed-mean.c index da3d4240..fa205017 100644 --- a/src/math/trimmed-mean.c +++ b/src/math/trimmed-mean.c @@ -77,8 +77,6 @@ trimmed_mean_calculate (const struct trimmed_mean *tm) { const struct order_stats *os = (const struct order_stats *) tm; - assert (os->cc == tm->w); - return ( (os->k[0].cc_p1 - os->k[0].tc) * os->k[0].y_p1