From 9c01f251cf0e5b5eb3899fc7c62cc595f3d48511 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Thu, 11 Nov 2004 13:23:44 +0000 Subject: [PATCH] Added npplot and detrended np plots --- src/ChangeLog | 6 ++ src/cartesian.c | 261 ++++++++++++++------------------------------- src/chart.c | 48 ++++++++- src/chart.h | 43 ++++++-- src/examine.q | 198 +++++++++++++++++++++++++++++----- src/factor_stats.c | 100 ++++++++++++++++- src/factor_stats.h | 28 ++++- src/histogram.c | 34 +----- 8 files changed, 469 insertions(+), 249 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index bc9e29cf..bc6ee51e 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,9 @@ +Thu Nov 11 21:01:31 WST 2004 John Darrington + + * examine.q cartesian.c chart.[ch] Added normal and detrended normal + plots. Changed the API of the cartesian plot to be a much lower level + thing. + Sun Nov 7 17:25:04 WST 2004 John Darrington * examine.q Added some of the parametric calculations diff --git a/src/cartesian.c b/src/cartesian.c index 7fc74678..a2396abc 100644 --- a/src/cartesian.c +++ b/src/cartesian.c @@ -24,186 +24,100 @@ -static const double y_min = 15; -static const double y_max = 120.0; -static const double y_tick = 20.0; - - - -static const double x_min = -11.0; -static const double x_max = 19.0; -static const double x_tick = 5.0; - - -struct datum -{ - double x; - double y; -}; - - -static const struct datum data1[]= - { - { -8.0, 29 }, - { -3.7, 45 }, - { -3.3, 67 }, - { -0.8, 89 }, - { -0.2, 93 }, - { 1.0, 100}, - { 2.3, 103}, - { 4.0, 103.4}, - { 5.2, 104}, - { 5.9, 106}, - { 10.3, 106}, - { 13.8, 108}, - { 15.8, 109}, - }; - - - - -static const struct datum data2[]= - { - { -9.1, 20 }, - { -8.2, 17 }, - { -5.0, 19 }, - { -3.7, 25 }, - { -1.6, 49 }, - { -1.3, 61 }, - { -1.1, 81 }, - { 3.5, 91}, - { 5.4, 93}, - { 9.3, 94}, - { 14.3, 92} - }; - - - - struct dataset { - const struct datum *data; int n_data; char *label; }; + #define DATASETS 2 static const struct dataset dataset[DATASETS] = { - {data1, 13, "male"}, - {data2, 11, "female"}, + { 13, "male"}, + { 11, "female"}, }; -typedef void (*plot_func) (struct chart *ch, const struct dataset *dataset); - - -void plot_line(struct chart *ch, const struct dataset *dataset); - -void plot_scatter(struct chart *ch, const struct dataset *dataset); - - static void write_legend(struct chart *chart, const char *heading, int n); -void draw_cartesian(struct chart *ch, const char *title, - const char *xlabel, const char *ylabel, plot_func pf); - -void -draw_scatterplot(struct chart *ch, const char *title, - const char *xlabel, const char *ylabel) +/* Write the abscissa label */ +void +chart_write_xlabel(struct chart *ch, const char *label) { - draw_cartesian(ch, title, xlabel, ylabel, plot_scatter); -} + pl_savestate_r(ch->lp); -void -draw_lineplot(struct chart *ch, const char *title, - const char *xlabel, const char *ylabel) -{ - draw_cartesian(ch, title, xlabel, ylabel, plot_scatter); -} + pl_move_r(ch->lp,ch->data_left, ch->abscissa_top); + pl_alabel_r(ch->lp,0,'t',label); + pl_restorestate_r(ch->lp); -void -draw_cartesian(struct chart *ch, const char *title, - const char *xlabel, const char *ylabel, plot_func pf) +} + +/* Set the scale for the abscissa */ +void +chart_write_xscale(struct chart *ch, double min, double max, double tick) { double x; - double y; - - - int d; - - const double ordinate_scale = - fabs(ch->data_top - ch->data_bottom) - / fabs(y_max - y_min) ; + ch->x_max = ceil( max / tick ) * tick ; + ch->x_min = floor ( min / tick ) * tick ; + ch->abscissa_scale = fabs(ch->data_right - ch->data_left) / + fabs(ch->x_max - ch->x_min); - const double abscissa_scale = - fabs(ch->data_right - ch->data_left) - / - fabs(x_max - x_min); + for(x = ch->x_min ; x <= ch->x_max; x += tick ) + draw_tick (ch, TICK_ABSCISSA, (x - ch->x_min) * ch->abscissa_scale, "%g", x); - - /* Move to data bottom-left */ - pl_move_r(ch->lp, ch->data_left, ch->data_bottom); - - pl_savestate_r(ch->lp); +} - for(x = x_tick * ceil(x_min / x_tick ) ; - x < x_max; - x += x_tick ) - draw_tick (ch, TICK_ABSCISSA, (x - x_min) * abscissa_scale, "%g", x); +/* Set the scale for the ordinate */ +void +chart_write_yscale(struct chart *ch, double min, double max, double tick) +{ + double y; - for(y = y_tick * ceil(y_min / y_tick ) ; - y < y_max; - y += y_tick ) - draw_tick (ch, TICK_ORDINATE, (y - y_min) * ordinate_scale, "%g", y); + ch->y_max = ceil( max / tick ) * tick ; + ch->y_min = floor ( min / tick ) * tick ; - pl_savestate_r(ch->lp); + ch->ordinate_scale = + fabs(ch->data_top - ch->data_bottom) / fabs(ch->y_max - ch->y_min) ; - for (d = 0 ; d < DATASETS ; ++d ) + for(y = ch->y_min ; y <= ch->y_max; y += tick ) { - pl_pencolorname_r(ch->lp,data_colour[d]); - pf(ch, &dataset[d]); + draw_tick (ch, TICK_ORDINATE, + (y - ch->y_min) * ch->ordinate_scale, "%g", y); } - - pl_restorestate_r(ch->lp); - /* Write the abscissa label */ - pl_move_r(ch->lp,ch->data_left, ch->abscissa_top); - pl_alabel_r(ch->lp,0,'t',xlabel); +} - - /* Write the ordinate label */ - pl_savestate_r(ch->lp); - pl_move_r(ch->lp,ch->data_bottom, ch->ordinate_right); - pl_textangle_r(ch->lp,90); - pl_alabel_r(ch->lp,0,0,ylabel); - pl_restorestate_r(ch->lp); - chart_write_title(ch, title); +/* Write the ordinate label */ +void +chart_write_ylabel(struct chart *ch, const char *label) +{ + pl_savestate_r(ch->lp); - write_legend(ch,"Key:",DATASETS); + pl_move_r(ch->lp, ch->data_bottom, ch->ordinate_right); + pl_textangle_r(ch->lp, 90); + pl_alabel_r(ch->lp, 0, 0, label); pl_restorestate_r(ch->lp); - } - static void write_legend(struct chart *chart, const char *heading, int n) @@ -244,70 +158,61 @@ write_legend(struct chart *chart, const char *heading, } - +/* Plot a data point */ void -plot_line(struct chart *ch, const struct dataset *dataset) +chart_datum(struct chart *ch, int dataset, double x, double y) { - int i; - - const struct datum *data = dataset->data; - - const double ordinate_scale = - fabs(ch->data_top - ch->data_bottom) - / fabs(y_max - y_min) ; + const double x_pos = + (x - ch->x_min) * ch->abscissa_scale + ch->data_left ; + const double y_pos = + (y - ch->y_min) * ch->ordinate_scale + ch->data_bottom ; - const double abscissa_scale = - fabs(ch->data_right - ch->data_left) - / - fabs(x_max - x_min); + pl_savestate_r(ch->lp); + + pl_fmarker_r(ch->lp, x_pos, y_pos, 6, 15); - for( i = 0 ; i < dataset->n_data ; ++i ) - { - const double x = - (data[i].x - x_min) * abscissa_scale + ch->data_left ; - const double y = - (data[i].y - y_min) * ordinate_scale + ch->data_bottom; - - if (i == 0 ) - pl_move_r(ch->lp, x, y ); - else - pl_fcont_r(ch->lp, x, y); - } - pl_endpath_r(ch->lp); + pl_restorestate_r(ch->lp); } - - - +/* Draw a line with slope SLOPE and intercept INTERCEPT. + between the points limit1 and limit2. + If lim_dim is CHART_DIM_Y then the limit{1,2} are on the + y axis otherwise the x axis +*/ void -plot_scatter(struct chart *ch, const struct dataset *dataset) +chart_line(struct chart *ch, double slope, double intercept, + double limit1, double limit2, enum CHART_DIM lim_dim) { - int i; - - const struct datum *data = dataset->data; - - const double ordinate_scale = - fabs(ch->data_top - ch->data_bottom) - / fabs(y_max - y_min) ; + double x1, y1; + double x2, y2 ; + if ( lim_dim == CHART_DIM_Y ) + { + x1 = ( limit1 - intercept ) / slope ; + x2 = ( limit2 - intercept ) / slope ; + y1 = limit1; + y2 = limit2; + } + else + { + x1 = limit1; + x2 = limit2; + y1 = slope * x1 + intercept; + y2 = slope * x2 + intercept; + } - const double abscissa_scale = - fabs(ch->data_right - ch->data_left) - / - fabs(x_max - x_min); + y1 = (y1 - ch->y_min) * ch->ordinate_scale + ch->data_bottom ; + y2 = (y2 - ch->y_min) * ch->ordinate_scale + ch->data_bottom ; + x1 = (x1 - ch->x_min) * ch->abscissa_scale + ch->data_left ; + x2 = (x2 - ch->x_min) * ch->abscissa_scale + ch->data_left ; + pl_savestate_r(ch->lp); - for( i = 0 ; i < dataset->n_data ; ++i ) - { - const double x = - (data[i].x - x_min) * abscissa_scale + ch->data_left ; - const double y = - (data[i].y - y_min) * ordinate_scale + ch->data_bottom; + pl_fline_r(ch->lp, x1, y1, x2, y2); - pl_fmarker_r(ch->lp, x, y, 6, 15); - } + pl_restorestate_r(ch->lp); } diff --git a/src/chart.c b/src/chart.c index abf4e70b..c550edf1 100644 --- a/src/chart.c +++ b/src/chart.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -141,14 +142,22 @@ draw_tick(struct chart *chart, +/* Write the title on a chart*/ void -chart_write_title(struct chart *chart, const char *title) +chart_write_title(struct chart *chart, const char *title, ...) { - /* Write the title */ + va_list ap; + char buf[100]; + pl_savestate_r(chart->lp); pl_ffontsize_r(chart->lp,chart->font_size * 1.5); pl_move_r(chart->lp,chart->data_left, chart->title_bottom); - pl_alabel_r(chart->lp,0,0,title); + + va_start(ap,title); + vsnprintf(buf,100,title,ap); + pl_alabel_r(chart->lp,0,0,buf); + va_end(ap); + pl_restorestate_r(chart->lp); } @@ -171,3 +180,36 @@ chart_finalise(struct chart *chart) } + + + +/* Adjust tick to be a sensible value + ie: ... 0.1,0.2,0.5, 1,2,5, 10,20,50 ... */ +double +chart_rounded_tick(double tick) +{ + + int i; + + double diff = DBL_MAX; + double t = tick; + + static const double standard_ticks[] = {1, 2, 5, 10}; + + const double factor = pow(10,ceil(log10(standard_ticks[0] / tick))) ; + + for (i = 3 ; i >= 0 ; --i) + { + const double d = fabs( tick - standard_ticks[i] / factor ) ; + + if ( d < diff ) + { + diff = d; + t = standard_ticks[i] / factor ; + } + } + + return t; + +} + diff --git a/src/chart.h b/src/chart.h index 47c6c7d8..aaf7abc8 100644 --- a/src/chart.h +++ b/src/chart.h @@ -59,6 +59,14 @@ struct chart { char fill_colour[10]; + /* Stuff Particular to Cartesians */ + double ordinate_scale; + double abscissa_scale; + double x_min; + double x_max; + double y_min; + double y_max; + }; @@ -68,8 +76,10 @@ void chart_finalise(struct chart *ch); -void chart_write_title(struct chart *ch, - const char *title); +void chart_write_xlabel(struct chart *ch, const char *label); +void chart_write_ylabel(struct chart *ch, const char *label); + +void chart_write_title(struct chart *ch, const char *title, ...); enum tick_orientation { TICK_ABSCISSA=0, @@ -110,16 +120,37 @@ void draw_histogram(struct chart *ch, int show_normal); +double chart_rounded_tick(double tick); + void draw_piechart(struct chart *ch, const struct variable *v); -void draw_scatterplot(struct chart *ch, const char *title, - const char *xlabel, const char *ylabel); +void draw_scatterplot(struct chart *ch); + + +void draw_lineplot(struct chart *ch); + + +void chart_write_xscale(struct chart *ch, + double min, double max, double tick); + +void chart_write_yscale(struct chart *ch, + double min, double max, double tick); + + +void chart_datum(struct chart *ch, int dataset, double x, double y); + + +enum CHART_DIM + { + CHART_DIM_X, + CHART_DIM_Y + }; -void draw_lineplot(struct chart *ch, const char *title, - const char *xlabel, const char *ylabel); +void chart_line(struct chart *ch, double slope, double intercept, + double limit1, double limit2, enum CHART_DIM limit_d); #endif diff --git a/src/examine.q b/src/examine.q index 91855e42..844542ad 100644 --- a/src/examine.q +++ b/src/examine.q @@ -40,6 +40,7 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "hash.h" #include "casefile.h" #include "factor_stats.h" +#include "chart.h" /* (specification) "EXAMINE" (xmn_): @@ -67,7 +68,7 @@ static struct variable **dependent_vars; static int n_dependent_vars; -static struct hsh_table *hash_table_factors; +static struct hsh_table *hash_table_factors=0; @@ -119,6 +120,10 @@ static void show_extremes(struct variable **dependent_var, int n_extremities); +void np_plot(const struct metrics *m, const char *varname); + + + /* Per Split function */ static void run_examine(const struct casefile *cf, void *cmd_); @@ -178,8 +183,24 @@ output_examine(void) if ( cmd.a_statistics[XMN_ST_EXTREME]) show_extremes(dependent_vars, n_dependent_vars, 0, cmd.st_n); } + + if ( cmd.sbc_plot) + { + if ( cmd.a_plot[XMN_PLT_NPPLOT] ) + { + int v; + + for ( v = 0 ; v < n_dependent_vars; ++v ) + { + np_plot(&totals->stats[v], var_to_string(dependent_vars[v])); + } + + } + } + } + /* Show grouped statistics if appropriate */ if ( hash_table_factors && 0 != hsh_count (hash_table_factors)) { @@ -200,10 +221,34 @@ output_examine(void) if ( cmd.a_statistics[XMN_ST_EXTREME]) show_extremes(dependent_vars, n_dependent_vars, f, cmd.st_n); } + + + if ( cmd.sbc_plot) + { + if ( cmd.a_plot[XMN_PLT_NPPLOT] ) + { + struct hsh_iterator h2; + struct factor_statistics *foo ; + for (foo = hsh_first(f->hash_table_val,&h2); + foo != 0 ; + foo = hsh_next(f->hash_table_val,&h2)) + { + int v; + for ( v = 0 ; v < n_dependent_vars; ++ v) + { + char buf[100]; + sprintf(buf, "%s (%s = %s)", + var_to_string(dependent_vars[v]), + var_to_string(f->indep_var), + value_to_string(foo->id,f->indep_var)); + np_plot(&foo->stats[v], buf); + } + } + } + } } } - } @@ -630,6 +675,12 @@ populate_descriptives(struct tab_table *tbl, int col, int row, TAB_LEFT | TAT_TITLE, _("5% Trimmed Mean")); + tab_float (tbl, col + 2, + row + 3, + TAB_CENTER, + m->trimmed_mean, + 8,2); + tab_text (tbl, col, row + 4, TAB_LEFT | TAT_TITLE, @@ -922,7 +973,7 @@ run_examine(const struct casefile *cf, void *cmd_) for(r = casefile_get_reader (cf); casereader_read (r, &c) ; - case_destroy (&c)) + case_destroy (&c) ) { const double weight = dict_get_case_weight(default_dict, &c, &bad_weight_warn); @@ -932,7 +983,7 @@ run_examine(const struct casefile *cf, void *cmd_) const struct variable *var = dependent_vars[v]; const union value *val = case_data (&c, var->fv); - metrics_calc(&totals->stats[v], val->f, weight); + metrics_calc(&totals->stats[v], val, weight); } if ( hash_table_factors ) @@ -965,7 +1016,7 @@ run_examine(const struct casefile *cf, void *cmd_) const struct variable *var = dependent_vars[v]; const union value *val = case_data (&c, var->fv); - metrics_calc( &(*foo)->stats[v], val->f, weight ); + metrics_calc( &(*foo)->stats[v], val, weight ); } if ( fctr->subfactor ) @@ -998,7 +1049,7 @@ run_examine(const struct casefile *cf, void *cmd_) const struct variable *var = dependent_vars[v]; const union value *val = case_data (&c, var->fv); - metrics_calc( &(*bar)->stats[v], val->f, weight ); + metrics_calc( &(*bar)->stats[v], val, weight ); } } } @@ -1008,32 +1059,35 @@ run_examine(const struct casefile *cf, void *cmd_) for ( v = 0 ; v < n_dependent_vars ; ++v) { - for ( fctr = hsh_first(hash_table_factors, &hi); - fctr != 0; - fctr = hsh_next (hash_table_factors, &hi) ) + if ( hash_table_factors ) { - struct hsh_iterator h2; - struct factor_statistics *fs; - - for ( fs = hsh_first(fctr->hash_table_val,&h2); - fs != 0; - fs = hsh_next(fctr->hash_table_val,&h2)) + for ( fctr = hsh_first(hash_table_factors, &hi); + fctr != 0; + fctr = hsh_next (hash_table_factors, &hi) ) + { + struct hsh_iterator h2; + struct factor_statistics *fs; + + for ( fs = hsh_first(fctr->hash_table_val,&h2); + fs != 0; + fs = hsh_next(fctr->hash_table_val,&h2)) { metrics_postcalc( &fs->stats[v] ); } - if ( fctr->subfactor) - { - struct hsh_iterator hsf; - struct factor_statistics *fss; - - for ( fss = hsh_first(fctr->subfactor->hash_table_val,&hsf); - fss != 0; - fss = hsh_next(fctr->subfactor->hash_table_val,&hsf)) - { - metrics_postcalc( &fss->stats[v] ); - } - } + if ( fctr->subfactor) + { + struct hsh_iterator hsf; + struct factor_statistics *fss; + + for ( fss = hsh_first(fctr->subfactor->hash_table_val,&hsf); + fss != 0; + fss = hsh_next(fctr->subfactor->hash_table_val,&hsf)) + { + metrics_postcalc( &fss->stats[v] ); + } + } + } } metrics_postcalc(&totals->stats[v]); @@ -1270,3 +1324,93 @@ populate_extremities(struct tab_table *t, int col, int row, int n) } + +/* Plot the normal and detrended normal plots for m + Label the plots with factorname */ +void +np_plot(const struct metrics *m, const char *factorname) +{ + int i; + double yfirst=0, ylast=0; + + /* Normal Plot */ + struct chart np_chart; + + /* Detrended Normal Plot */ + struct chart dnp_chart; + + const struct weighted_value *wv = m->wv; + const int n_data = hsh_count(m->ordered_data) ; + + /* The slope and intercept of the ideal normal probability line */ + const double slope = 1.0 / m->stddev; + const double intercept = - m->mean / m->stddev; + + chart_initialise(&np_chart); + chart_write_title(&np_chart, _("Normal Q-Q Plot of %s"), factorname); + chart_write_xlabel(&np_chart, _("Observed Value")); + chart_write_ylabel(&np_chart, _("Expected Normal")); + + chart_initialise(&dnp_chart); + chart_write_title(&dnp_chart, _("Detrended Normal Q-Q Plot of %s"), + factorname); + chart_write_xlabel(&dnp_chart, _("Observed Value")); + chart_write_ylabel(&dnp_chart, _("Dev from Normal")); + + yfirst = gsl_cdf_ugaussian_Pinv (wv[0].rank / ( m->n + 1)); + ylast = gsl_cdf_ugaussian_Pinv (wv[n_data-1].rank / ( m->n + 1)); + + { + /* Need to make sure that both the scatter plot and the ideal fit into the + plot */ + double x_lower = min(m->min, (yfirst - intercept) / slope) ; + double x_upper = max(m->max, (ylast - intercept) / slope) ; + double slack = (x_upper - x_lower) * 0.05 ; + + chart_write_xscale(&np_chart, x_lower - slack, x_upper + slack, + chart_rounded_tick((m->max - m->min) / 5.0)); + + + chart_write_xscale(&dnp_chart, m->min, m->max, + chart_rounded_tick((m->max - m->min) / 5.0)); + + } + + chart_write_yscale(&np_chart, yfirst, ylast, + chart_rounded_tick((ylast - yfirst)/5.0) ); + + { + /* We have to cache the detrended data, beacause we need to + find its limits before we can plot it */ + double *d_data; + d_data = xmalloc (n_data * sizeof(double)); + double d_max = -DBL_MAX; + double d_min = DBL_MAX; + for ( i = 0 ; i < n_data; ++i ) + { + const double ns = gsl_cdf_ugaussian_Pinv (wv[i].rank / ( m->n + 1)); + + chart_datum(&np_chart, 0, wv[i].v.f, ns); + + d_data[i] = (wv[i].v.f - m->mean) / m->stddev - ns; + + if ( d_data[i] < d_min ) d_min = d_data[i]; + if ( d_data[i] > d_max ) d_max = d_data[i]; + } + + chart_write_yscale(&dnp_chart, d_min, d_max, + chart_rounded_tick((d_max - d_min) / 5.0)); + + for ( i = 0 ; i < n_data; ++i ) + chart_datum(&dnp_chart, 0, wv[i].v.f, d_data[i]); + + free(d_data); + } + + chart_line(&np_chart, slope, intercept, yfirst, ylast , CHART_DIM_Y); + chart_line(&dnp_chart, 0, 0, m->min, m->max , CHART_DIM_X); + + chart_finalise(&np_chart); + chart_finalise(&dnp_chart); + +} diff --git a/src/factor_stats.c b/src/factor_stats.c index fe259ff4..cb2197ad 100644 --- a/src/factor_stats.c +++ b/src/factor_stats.c @@ -21,10 +21,16 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "factor_stats.h" #include "config.h" #include "val.h" +#include "hash.h" +#include "algorithm.h" +#include "alloc.h" #include #include #include +#include + + void metrics_precalc(struct metrics *fs) @@ -34,11 +40,22 @@ metrics_precalc(struct metrics *fs) fs->sum = 0; fs->min = DBL_MAX; fs->max = -DBL_MAX; + + fs->ordered_data = hsh_create(20, + (hsh_compare_func *) compare_values, + (hsh_hash_func *) hash_value, + 0, + (void *) 0); } void -metrics_calc(struct metrics *fs, double x, double weight) +metrics_calc(struct metrics *fs, const union value *val, double weight) { + + + struct weighted_value **wv; + const double x = val->f; + fs->n += weight; fs->ssq += x * x * weight; fs->sum += x * weight; @@ -46,12 +63,42 @@ metrics_calc(struct metrics *fs, double x, double weight) if ( x < fs->min) fs->min = x; if ( x > fs->max) fs->max = x; + + wv = (struct weighted_value **) hsh_probe (fs->ordered_data,(void *) val ); + + if ( *wv ) + { + /* If this value has already been seen, then simply + increase its weight */ + + assert( (*wv)->v.f == val->f ); + (*wv)->w += weight; + } + else + { + *wv = xmalloc( sizeof (struct weighted_value) ); + (*wv)->v = *val; + (*wv)->w = weight; + hsh_insert(fs->ordered_data,(void *) *wv); + } + } void metrics_postcalc(struct metrics *fs) { double sample_var; + double cc = 0.0; + double tc ; + int k1, k2 ; + int i; + int j = 1; + + struct weighted_value **data; + + + int n_data; + fs->mean = fs->sum / fs->n; sample_var = ( fs->ssq / fs->n - fs->mean * fs->mean ); @@ -64,6 +111,57 @@ metrics_postcalc(struct metrics *fs) Shouldn't we use the sample variance ??? */ fs->stderr = sqrt (fs->var / fs->n) ; + data = (struct weighted_value **) hsh_data(fs->ordered_data); + n_data = hsh_count(fs->ordered_data); + + fs->wv = xmalloc ( sizeof (struct weighted_value) * n_data); + + for ( i = 0 ; i < n_data ; ++i ) + fs->wv[i] = *(data[i]); + + sort (fs->wv, n_data, sizeof (struct weighted_value) , + (algo_compare_func *) compare_values, 0); + + + + tc = fs->n * 0.05 ; + k1 = -1; + k2 = -1; + + + for ( i = 0 ; i < n_data ; ++i ) + { + cc += fs->wv[i].w; + fs->wv[i].cc = cc; + + fs->wv[i].rank = j + (fs->wv[i].w - 1) / 2.0 ; + + j += fs->wv[i].w; + + if ( cc < tc ) + k1 = i; + + } + + k2 = n_data; + for ( i = n_data -1 ; i >= 0; --i ) + { + if ( tc > fs->n - fs->wv[i].cc) + k2 = i; + } + + + fs->trimmed_mean = 0; + for ( i = k1 + 2 ; i <= k2 - 1 ; ++i ) + { + fs->trimmed_mean += fs->wv[i].v.f * fs->wv[i].w; + } + + + fs->trimmed_mean += (fs->n - fs->wv[k2 - 1].cc - tc) * fs->wv[k2].v.f ; + fs->trimmed_mean += (fs->wv[k1 + 1].cc - tc) * fs->wv[k1 + 1].v.f ; + fs->trimmed_mean /= 0.9 * fs->n ; + } diff --git a/src/factor_stats.h b/src/factor_stats.h index 67fd5f52..c7f12162 100644 --- a/src/factor_stats.h +++ b/src/factor_stats.h @@ -25,6 +25,24 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA /* FIXME: These things should probably be amalgamated with the group_statistics struct */ +#include "hash.h" +#include "val.h" + +struct weighted_value +{ + union value v; + + /* The weight */ + double w; + + /* The cumulative weight */ + double cc; + + /* The rank */ + double rank; +}; + + struct metrics { @@ -45,6 +63,14 @@ struct metrics double var; double stddev; + + double trimmed_mean; + + /* An ordered arary of data for this factor */ + struct hsh_table *ordered_data; + + /* An SORTED array of weighted values */ + struct weighted_value *wv; }; @@ -63,7 +89,7 @@ struct factor_statistics { void metrics_precalc(struct metrics *fs); -void metrics_calc(struct metrics *fs, double x, double weight); +void metrics_calc(struct metrics *fs, const union value *f, double weight); void metrics_postcalc(struct metrics *fs); diff --git a/src/histogram.c b/src/histogram.c index d4403416..27e7b4b1 100644 --- a/src/histogram.c +++ b/src/histogram.c @@ -46,9 +46,6 @@ gaussian(double x, double mu, double sigma ) } -/* Adjust tick to be a sensible value */ -void adjust_tick(double *tick); - /* Write the legend of the chart */ static void @@ -142,9 +139,8 @@ draw_histogram(struct chart *ch, if ( y < y_min ) y_min = y; } - y_tick = ( y_max - y_min ) / (double) (YTICKS - 1) ; + y_tick = chart_rounded_tick( ( y_max - y_min ) / (double) (YTICKS - 1)); - adjust_tick(&y_tick); y_min = floor( y_min / y_tick ) * y_tick ; y_max = ceil( y_max / y_tick ) * y_tick ; @@ -228,31 +224,3 @@ draw_histogram(struct chart *ch, } - -double -log10(double x) -{ - return log(x) / log(10.0) ; -} - - -/* Adjust tick to be a sensible value */ -void -adjust_tick(double *tick) -{ - int i; - const double standard_ticks[] = {1, 2, 5}; - - const double factor = pow(10,ceil(log10(standard_ticks[0] / *tick))) ; - - for (i = 2 ; i >=0 ; --i) - { - if ( *tick > standard_ticks[i] / factor ) - { - *tick = standard_ticks[i] / factor ; - break; - } - } - - } - -- 2.30.2