X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Ffrequencies.q;h=5754be7e61e6de277491488c814ba99af96c2057;hb=5ff91bd55867848d448c2f09bc7057cc1fb77b18;hp=8393819c0a0d2bda5a33eb722ac8267a1e3945b0;hpb=b321086267ad1014dc5d09886396cde30f094437;p=pspp diff --git a/src/frequencies.q b/src/frequencies.q index 8393819c0a..5754be7e61 100644 --- a/src/frequencies.q +++ b/src/frequencies.q @@ -14,8 +14,8 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - 02111-1307, USA. */ + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ /* TODO: @@ -27,6 +27,8 @@ #include "error.h" #include #include +#include + #include "alloc.h" #include "bitvector.h" #include "case.h" @@ -49,6 +51,11 @@ #include "vfm.h" #include "settings.h" #include "chart.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) +#define N_(msgid) msgid + /* (headers) */ #include "debug-print.h" @@ -181,13 +188,56 @@ static double scale, incr; /* FIXME */ static int normal; /* FIXME */ /* Variables for which to calculate statistics. */ -static int n_variables; +static size_t n_variables; static struct variable **v_variables; /* Arenas used to store semi-permanent storage. */ static struct pool *int_pool; /* Integer mode. */ static struct pool *gen_pool; /* General mode. */ +/* Frequency tables. */ + +/* Frequency table entry. */ +struct freq + { + union value v; /* The value. */ + double c; /* The number of occurrences of the value. */ + }; + +/* Types of frequency tables. */ +enum + { + FRQM_GENERAL, + FRQM_INTEGER + }; + +/* Entire frequency table. */ +struct freq_tab + { + int mode; /* FRQM_GENERAL or FRQM_INTEGER. */ + + /* General mode. */ + struct hsh_table *data; /* Undifferentiated data. */ + + /* Integer mode. */ + double *vector; /* Frequencies proper. */ + int min, max; /* The boundaries of the table. */ + double out_of_range; /* Sum of weights of out-of-range values. */ + double sysmis; /* Sum of weights of SYSMIS values. */ + + /* All modes. */ + struct freq *valid; /* Valid freqs. */ + int n_valid; /* Number of total freqs. */ + + struct freq *missing; /* Missing freqs. */ + int n_missing; /* Number of missing freqs. */ + + /* Statistics. */ + double total_cases; /* Sum of weights of all cases. */ + double valid_cases; /* Sum of weights of valid cases. */ + }; + + /* Per-variable frequency data. */ struct var_freqs { @@ -229,6 +279,15 @@ static hsh_compare_func compare_value_numeric_a, compare_value_alpha_a; static hsh_compare_func compare_value_numeric_d, compare_value_alpha_d; static hsh_compare_func compare_freq_numeric_a, compare_freq_alpha_a; static hsh_compare_func compare_freq_numeric_d, compare_freq_alpha_d; + + +static void do_piechart(const struct variable *var, + const struct freq_tab *frq_tab); + +gsl_histogram * +freq_tab_to_hist(const struct freq_tab *ft, const struct variable *var); + + /* Parser and outline. */ @@ -319,6 +378,8 @@ internal_cmd_frequencies (void) /* Do it! */ procedure_with_splits (precalc, calc, postcalc, NULL); + free_frequencies(&cmd); + return CMD_SUCCESS; } @@ -432,7 +493,7 @@ static int calc (struct ccase *c, void *aux UNUSED) { double weight; - int i; + size_t i; int bad_warn = 1; weight = dict_get_case_weight (default_dict, c, &bad_warn); @@ -486,7 +547,7 @@ calc (struct ccase *c, void *aux UNUSED) static void precalc (void *aux UNUSED) { - int i; + size_t i; pool_destroy (gen_pool); gen_pool = pool_create (); @@ -530,7 +591,7 @@ precalc (void *aux UNUSED) static void postcalc (void *aux UNUSED) { - int i; + size_t i; for (i = 0; i < n_variables; i++) { @@ -571,36 +632,35 @@ postcalc (void *aux UNUSED) dump_statistics (v, !dumped_freq_tab); + if ( chart == GFT_HIST) { - struct chart ch; double d[frq_n_stats]; - struct normal_curve norm; - norm.N = vf->tab.total_cases; + gsl_histogram *hist ; + + + norm.N = vf->tab.valid_cases; calc_stats(v,d); norm.mean = d[frq_mean]; norm.stddev = d[frq_stddev]; - chart_initialise(&ch); - draw_histogram(&ch, v_variables[i], ft, "HISTOGRAM",&norm,normal); - chart_finalise(&ch); + hist = freq_tab_to_hist(ft,v); + + histogram_plot(hist, var_to_string(v), &norm, normal); + + gsl_histogram_free(hist); } if ( chart == GFT_PIE) { - struct chart ch; - - chart_initialise(&ch); - - draw_piechart(&ch, v_variables[i], ft); - - chart_finalise(&ch); + do_piechart(v_variables[i], ft); } + cleanup_freq_tab (v); } @@ -637,7 +697,7 @@ not_missing (const void *f_, void *v_) const struct freq *f = f_; struct variable *v = v_; - return !is_missing (&f->v, v); + return !mv_is_value_missing (&v->miss, &f->v); } /* Summarizes the frequency table data for variable V. */ @@ -647,7 +707,7 @@ postprocess_freq_tab (struct variable *v) hsh_compare_func *compare; struct freq_tab *ft; size_t count; - void **data; + void *const *data; struct freq *freqs, *f; size_t i; @@ -660,7 +720,7 @@ postprocess_freq_tab (struct variable *v) data = hsh_data (ft->data); /* Copy dereferenced data into freqs. */ - freqs = xmalloc (count * sizeof *freqs); + freqs = xnmalloc (count, sizeof *freqs); for (i = 0; i < count; i++) { struct freq *f = data[i]; @@ -713,8 +773,8 @@ frq_custom_variables (struct cmd_frequencies *cmd UNUSED) int mode; int min = 0, max = 0; - int old_n_variables = n_variables; - int i; + size_t old_n_variables = n_variables; + size_t i; lex_match ('='); if (token != T_ALL && (token != T_ID @@ -775,8 +835,8 @@ frq_custom_variables (struct cmd_frequencies *cmd UNUSED) { vf->tab.min = min; vf->tab.max = max; - vf->tab.vector = pool_alloc (int_pool, - sizeof (struct freq) * (max - min + 1)); + vf->tab.vector = pool_nalloc (int_pool, + max - min + 1, sizeof *vf->tab.vector); } else vf->tab.vector = NULL; @@ -796,14 +856,14 @@ frq_custom_grouped (struct cmd_frequencies *cmd UNUSED) || token == T_ID) for (;;) { - int i; + size_t i; /* Max, current size of list; list itself. */ int nl, ml; double *dl; /* Variable list. */ - int n; + size_t n; struct variable **v; if (!parse_variables (default_dict, &v, &n, @@ -813,12 +873,12 @@ frq_custom_grouped (struct cmd_frequencies *cmd UNUSED) { nl = ml = 0; dl = NULL; - while (token == T_NUM) + while (lex_integer ()) { if (nl >= ml) { ml += 16; - dl = pool_realloc (int_pool, dl, ml * sizeof (double)); + dl = pool_nrealloc (int_pool, dl, ml, sizeof *dl); } dl[nl++] = tokval; lex_get (); @@ -889,9 +949,8 @@ add_percentile (double x) if (i >= n_percentiles || tokval != percentiles[i].p) { - percentiles - = pool_realloc (int_pool, percentiles, - (n_percentiles + 1) * sizeof (struct percentile )); + percentiles = pool_nrealloc (int_pool, percentiles, + n_percentiles + 1, sizeof *percentiles); if (i < n_percentiles) memmove (&percentiles[i + 1], &percentiles[i], @@ -1484,6 +1543,96 @@ dump_statistics (struct variable *v, int show_varname) tab_submit (t); } + + +/* Create a gsl_histogram from a freq_tab */ +gsl_histogram * +freq_tab_to_hist(const struct freq_tab *ft, const struct variable *var) +{ + int i; + double x_min = DBL_MAX; + double x_max = -DBL_MAX; + + gsl_histogram *hist; + const double bins = 11; + + struct hsh_iterator hi; + struct hsh_table *fh = ft->data; + struct freq *frq; + + /* Find out the extremes of the x value */ + for ( frq = hsh_first(fh, &hi); frq != 0; frq = hsh_next(fh, &hi) ) + { + if ( mv_is_value_missing(&var->miss, &frq->v)) + continue; + + if ( frq->v.f < x_min ) x_min = frq->v.f ; + if ( frq->v.f > x_max ) x_max = frq->v.f ; + } + + hist = histogram_create(bins, x_min, x_max); + + for( i = 0 ; i < ft->n_valid ; ++i ) + { + frq = &ft->valid[i]; + gsl_histogram_accumulate(hist, frq->v.f, frq->c); + } + + return hist; +} + + +static struct slice * +freq_tab_to_slice_array(const struct freq_tab *frq_tab, + const struct variable *var, + int *n_slices); + + +/* Allocate an array of slices and fill them from the data in frq_tab + n_slices will contain the number of slices allocated. + The caller is responsible for freeing slices +*/ +static struct slice * +freq_tab_to_slice_array(const struct freq_tab *frq_tab, + const struct variable *var, + int *n_slices) +{ + int i; + struct slice *slices; + + *n_slices = frq_tab->n_valid; + + slices = xnmalloc (*n_slices, sizeof *slices); + + for (i = 0 ; i < *n_slices ; ++i ) + { + const struct freq *frq = &frq_tab->valid[i]; + + slices[i].label = value_to_string(&frq->v, var); + + slices[i].magnetude = frq->c; + } + + return slices; +} + + + + +static void +do_piechart(const struct variable *var, const struct freq_tab *frq_tab) +{ + struct slice *slices; + int n_slices; + + slices = freq_tab_to_slice_array(frq_tab, var, &n_slices); + + piechart_plot(var_to_string(var), slices, n_slices); + + free(slices); +} + + /* Local Variables: mode: c