X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Ffactor_stats.h;h=259694e8fc5aafd2d982404b57f958a1f2e86d6e;hb=b7e33825d30a18360f24a18faf4b7d2e9efb8142;hp=c7f1216221f5401fa8444f4c0888c9483012ac54;hpb=9c01f251cf0e5b5eb3899fc7c62cc595f3d48511;p=pspp diff --git a/src/factor_stats.h b/src/factor_stats.h index c7f1216221..259694e8fc 100644 --- a/src/factor_stats.h +++ b/src/factor_stats.h @@ -27,84 +27,141 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "hash.h" #include "val.h" +#include +#include +#include "subclist.h" +#include "percentiles.h" -struct weighted_value -{ - union value v; - - /* The weight */ - double w; - - /* The cumulative weight */ - double cc; - - /* The rank */ - double rank; -}; - - +struct moments1; struct metrics { double n; - - double ssq; - - double sum; + double n_missing; + double min; double max; double mean; - double stderr; + double se_mean; double var; double stddev; + struct moments1 *moments; + + gsl_histogram *histogram; + + double skewness; + double kurtosis; + double trimmed_mean; - /* An ordered arary of data for this factor */ + /* A hash of data for this factor. */ struct hsh_table *ordered_data; - /* An SORTED array of weighted values */ - struct weighted_value *wv; + /* A Pointer to this hash table AFTER it has been SORTED and crunched */ + struct weighted_value **wvp; + + /* The number of values in the above array + (if all the weights are 1, then this will + be the same as n) */ + int n_data; + + /* Percentile stuff */ + + /* A hash of struct percentiles */ + struct hsh_table *ptile_hash; + + /* Algorithm to be used for calculating percentiles */ + enum pc_alg ptile_alg; + + /* Tukey's Hinges */ + double hinge[3]; + }; +struct metrics * metrics_create(void); -struct factor_statistics { +void metrics_precalc(struct metrics *m); + +void metrics_calc(struct metrics *m, const union value *f, double weight, + int case_no); + +void metrics_postcalc(struct metrics *m); + +void metrics_destroy(struct metrics *m); - /* The value of the independent variable for this factor */ - const union value *id; - /* An array of metrics indexed by dependent variable */ - struct metrics *stats; +/* Linked list of case nos */ +struct case_node +{ + int num; + struct case_node *next; }; +struct weighted_value +{ + union value v; + /* The weight */ + double w; -void metrics_precalc(struct metrics *fs); + /* The cumulative weight */ + double cc; -void metrics_calc(struct metrics *fs, const union value *f, double weight); + /* The rank */ + double rank; -void metrics_postcalc(struct metrics *fs); + /* Linked list of cases nos which have this value */ + struct case_node *case_nos; + +}; + + +struct weighted_value *weighted_value_create(void); + +void weighted_value_free(struct weighted_value *wv); + + + +struct factor_statistics { + + /* The values of the independent variables */ + union value id[2]; + + /* The an array stats for this factor, one for each dependent var */ + struct metrics *m; + + /* The number of dependent variables */ + int n_var; +}; +/* Create a factor statistics object with for N dependent vars + and ID as the value of the independent variable */ +struct factor_statistics * +create_factor_statistics (int n, union value *id0, union value *id1); -/* These functions are necessary for creating hashes */ +void factor_statistics_free(struct factor_statistics *f); -int compare_indep_values(const struct factor_statistics *f1, - const struct factor_statistics *f2, - int width); -unsigned hash_indep_value(const struct factor_statistics *f, int width) ; +/* Compare f0 and f1. + width is the width of the independent variable */ +int +factor_statistics_compare(const struct factor_statistics *f0, + const struct factor_statistics *f1, int width); -void free_factor_stats(struct factor_statistics *f, int width ); + +unsigned int +factor_statistics_hash(const struct factor_statistics *f, int width); #endif