X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Ffactor_stats.h;h=d29ca4d80a39ae530d6de7415ad70a38d18fb3c1;hb=5156fa5a8323a16f6b4bbc8950221cdc1d0e023d;hp=67fd5f520ebcbf52ff54c5293959b38da312aa95;hpb=d2f8593a1f1d39a3264682af0da898a3d67b68cf;p=pspp-builds.git diff --git a/src/factor_stats.h b/src/factor_stats.h index 67fd5f52..d29ca4d8 100644 --- a/src/factor_stats.h +++ b/src/factor_stats.h @@ -25,60 +25,125 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA /* FIXME: These things should probably be amalgamated with the group_statistics struct */ +#include "hash.h" +#include "val.h" +#include + +struct moments1; struct metrics { double n; - - double ssq; - - double sum; + double n_missing; + double min; double max; double mean; - double stderr; + double se_mean; double var; double stddev; + + struct moments1 *moments; + + gsl_histogram *histogram; + + double skewness; + double kurtosis; + + double trimmed_mean; + + /* A hash of data for this factor. + */ + struct hsh_table *ordered_data; + + /* A Pointer to this hash table AFTER it has been SORTED and crunched */ + struct weighted_value **wvp; + + + /* The number of values in the above array + (if all the weights are 1, then this will + be the same as n) */ + int n_data; }; -struct factor_statistics { - /* The value of the independent variable for this factor */ - const union value *id; +void metrics_precalc(struct metrics *m); + +void metrics_calc(struct metrics *m, const union value *f, double weight, + int case_no); + +void metrics_postcalc(struct metrics *m); - /* An array of metrics indexed by dependent variable */ - struct metrics *stats; +/* Linked list of case nos */ +struct case_node +{ + int num; + struct case_node *next; }; +struct weighted_value +{ + union value v; + + /* The weight */ + double w; + + /* The cumulative weight */ + double cc; + + /* The rank */ + double rank; + + /* Linked list of cases nos which have this value */ + struct case_node *case_nos; + +}; -void metrics_precalc(struct metrics *fs); +struct weighted_value *weighted_value_create(void); -void metrics_calc(struct metrics *fs, double x, double weight); +void weighted_value_free(struct weighted_value *wv); -void metrics_postcalc(struct metrics *fs); +struct factor_statistics { + + /* The values of the independent variables */ + union value id[2]; + + /* The an array stats for this factor, one for each dependent var */ + struct metrics *m; + +}; + + +/* Create a factor statistics object with for N dependent vars + and ID as the value of the independent variable */ +struct factor_statistics * +create_factor_statistics (int n, union value *id0, union value *id1); -/* These functions are necessary for creating hashes */ +void factor_statistics_free(struct factor_statistics *f); -int compare_indep_values(const struct factor_statistics *f1, - const struct factor_statistics *f2, - int width); -unsigned hash_indep_value(const struct factor_statistics *f, int width) ; +/* Compare f0 and f1. + width is the width of the independent variable */ +int +factor_statistics_compare(const struct factor_statistics *f0, + const struct factor_statistics *f1, int width); -void free_factor_stats(struct factor_statistics *f, int width ); + +unsigned int +factor_statistics_hash(const struct factor_statistics *f, int width); #endif