X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Ffactor_stats.h;h=f6394a8a95aa1a8358b90e1812edb32d521bfc12;hb=4239c455e7b1061b7c960b793f9080e113123845;hp=67fd5f520ebcbf52ff54c5293959b38da312aa95;hpb=d2f8593a1f1d39a3264682af0da898a3d67b68cf;p=pspp-builds.git diff --git a/src/factor_stats.h b/src/factor_stats.h index 67fd5f52..f6394a8a 100644 --- a/src/factor_stats.h +++ b/src/factor_stats.h @@ -25,60 +25,137 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA /* FIXME: These things should probably be amalgamated with the group_statistics struct */ +#include "hash.h" +#include "val.h" +#include +#include "subclist.h" +#include "percentiles.h" + +struct moments1; struct metrics { double n; - - double ssq; - - double sum; + double n_missing; + double min; double max; double mean; - double stderr; + double se_mean; double var; double stddev; + + struct moments1 *moments; + + gsl_histogram *histogram; + + double skewness; + double kurtosis; + + double trimmed_mean; + + /* A hash of data for this factor. */ + struct hsh_table *ordered_data; + + /* A Pointer to this hash table AFTER it has been SORTED and crunched */ + struct weighted_value **wvp; + + /* The number of values in the above array + (if all the weights are 1, then this will + be the same as n) */ + int n_data; + + /* Percentile stuff */ + + /* A hash of struct percentiles */ + struct hsh_table *ptile_hash; + + /* Algorithm to be used for calculating percentiles */ + enum pc_alg ptile_alg; + + /* Tukey's Hinges */ + double hinges[3]; + }; -struct factor_statistics { - /* The value of the independent variable for this factor */ - const union value *id; +void metrics_precalc(struct metrics *m); - /* An array of metrics indexed by dependent variable */ - struct metrics *stats; +void metrics_calc(struct metrics *m, const union value *f, double weight, + int case_no); +void metrics_postcalc(struct metrics *m); + + +/* Linked list of case nos */ +struct case_node +{ + int num; + struct case_node *next; }; +struct weighted_value +{ + union value v; + + /* The weight */ + double w; + /* The cumulative weight */ + double cc; + + /* The rank */ + double rank; + + /* Linked list of cases nos which have this value */ + struct case_node *case_nos; + +}; -void metrics_precalc(struct metrics *fs); -void metrics_calc(struct metrics *fs, double x, double weight); +struct weighted_value *weighted_value_create(void); -void metrics_postcalc(struct metrics *fs); +void weighted_value_free(struct weighted_value *wv); + + + +struct factor_statistics { + + /* The values of the independent variables */ + union value id[2]; + + /* The an array stats for this factor, one for each dependent var */ + struct metrics *m; + +}; +/* Create a factor statistics object with for N dependent vars + and ID as the value of the independent variable */ +struct factor_statistics * +create_factor_statistics (int n, union value *id0, union value *id1); -/* These functions are necessary for creating hashes */ +void factor_statistics_free(struct factor_statistics *f); -int compare_indep_values(const struct factor_statistics *f1, - const struct factor_statistics *f2, - int width); -unsigned hash_indep_value(const struct factor_statistics *f, int width) ; +/* Compare f0 and f1. + width is the width of the independent variable */ +int +factor_statistics_compare(const struct factor_statistics *f0, + const struct factor_statistics *f1, int width); -void free_factor_stats(struct factor_statistics *f, int width ); + +unsigned int +factor_statistics_hash(const struct factor_statistics *f, int width); #endif