projects
/
pspp
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
parsing works, at least the basics
[pspp]
/
src
/
language
/
stats
/
frequencies.c
diff --git
a/src/language/stats/frequencies.c
b/src/language/stats/frequencies.c
index e5462d083d2fd44d6e24e2e23960d360c6ad8f6f..307a869ba2d1e17d723c80443f19b8ba7a7a7240 100644
(file)
--- a/
src/language/stats/frequencies.c
+++ b/
src/language/stats/frequencies.c
@@
-45,7
+45,6
@@
#include "libpspp/hmap.h"
#include "libpspp/message.h"
#include "libpspp/misc.h"
#include "libpspp/hmap.h"
#include "libpspp/message.h"
#include "libpspp/misc.h"
-#include "libpspp/pool.h"
#include "math/histogram.h"
#include "math/moments.h"
#include "math/histogram.h"
#include "math/moments.h"
@@
-69,7
+68,6
@@
struct percentile
{
double p; /* the %ile to be calculated */
struct percentile
{
double p; /* the %ile to be calculated */
- double value; /* the %ile's value */
bool show; /* True to show this percentile in the statistics box. */
};
bool show; /* True to show this percentile in the statistics box. */
};
@@
-190,12
+188,9
@@
struct var_freqs
/* Freqency table. */
struct freq_tab tab; /* Frequencies table to use. */
/* Freqency table. */
struct freq_tab tab; /* Frequencies table to use. */
- /* Percentiles. */
- int n_groups; /* Number of groups. */
- double *groups; /* Groups. */
-
/* Statistics. */
double stat[FRQ_ST_count];
/* Statistics. */
double stat[FRQ_ST_count];
+ double *percentiles;
/* Variable attributes. */
int width;
/* Variable attributes. */
int width;
@@
-203,15
+198,13
@@
struct var_freqs
struct frq_proc
{
struct frq_proc
{
- struct pool *pool;
-
struct var_freqs *vars;
size_t n_vars;
/* Percentiles to calculate and possibly display. */
struct percentile *percentiles;
struct var_freqs *vars;
size_t n_vars;
/* Percentiles to calculate and possibly display. */
struct percentile *percentiles;
-
const struct percentile *median
;
-
in
t n_percentiles;
+
size_t median_idx
;
+
size_
t n_percentiles;
/* Frequency table display. */
long int max_categories; /* Maximum categories to show. */
/* Frequency table display. */
long int max_categories; /* Maximum categories to show. */
@@
-272,9
+265,8
@@
compare_freq (const void *a_, const void *b_, const void *aux_)
}
/* Create a gsl_histogram from a freq_tab */
}
/* Create a gsl_histogram from a freq_tab */
-static struct histogram *
-freq_tab_to_hist (const struct frq_proc *frq, const struct freq_tab *ft,
- const struct variable *var);
+static struct histogram *freq_tab_to_hist (const struct frq_proc *,
+ const struct var_freqs *);
static void
put_freq_row (struct pivot_table *table, int var_idx,
static void
put_freq_row (struct pivot_table *table, int var_idx,
@@
-362,20
+354,24
@@
calc_percentile (double p, double valid_cases, double x1, double x2)
/* Calculates all of the percentiles for VF within FRQ. */
static void
/* Calculates all of the percentiles for VF within FRQ. */
static void
-calc_percentiles (const struct frq_proc *frq,
const
struct var_freqs *vf)
+calc_percentiles (const struct frq_proc *frq, struct var_freqs *vf)
{
{
+ if (!frq->n_percentiles)
+ return;
+
+ vf->percentiles = xnmalloc (frq->n_percentiles, sizeof *vf->percentiles);
+
const struct freq_tab *ft = &vf->tab;
const struct freq_tab *ft = &vf->tab;
- double W = ft->valid_cases;
- const struct freq *f;
- int percentile_idx = 0;
- double rank = 0;
+ const double W = ft->valid_cases;
+ size_t idx = 0;
- for (f = ft->valid; f < ft->missing; f++)
+ double rank = 0;
+ for (const struct freq *f = ft->valid; f < ft->missing; f++)
{
rank += f->count;
{
rank += f->count;
- for (;
percentile_idx < frq->n_percentiles; percentile_
idx++)
+ for (;
idx < frq->n_percentiles;
idx++)
{
{
- struct percentile *pc = &frq->percentiles[
percentile_
idx];
+ struct percentile *pc = &frq->percentiles[idx];
double tp;
tp = (settings_get_algorithm () == ENHANCED
double tp;
tp = (settings_get_algorithm () == ENHANCED
@@
-386,18
+382,16
@@
calc_percentiles (const struct frq_proc *frq, const struct var_freqs *vf)
break;
if (tp + 1 < rank || f + 1 >= ft->missing)
break;
if (tp + 1 < rank || f + 1 >= ft->missing)
-
pc->value
= f->values[0].f;
+
vf->percentiles[idx]
= f->values[0].f;
else
else
- pc->value = calc_percentile (pc->p, W, f->values[0].f, f[1].values[0].f);
+ vf->percentiles[idx] = calc_percentile (pc->p, W, f->values[0].f,
+ f[1].values[0].f);
}
}
}
}
- for (; percentile_idx < frq->n_percentiles; percentile_idx++)
- {
- struct percentile *pc = &frq->percentiles[percentile_idx];
- pc->value = (ft->n_valid > 0
- ? ft->valid[ft->n_valid - 1].values[0].f
- : SYSMIS);
- }
+ for (; idx < frq->n_percentiles; idx++)
+ vf->percentiles[idx] = (ft->n_valid > 0
+ ? ft->valid[ft->n_valid - 1].values[0].f
+ : SYSMIS);
}
/* Returns true iff the value in struct freq F is non-missing
}
/* Returns true iff the value in struct freq F is non-missing
@@
-408,7
+402,7
@@
not_missing (const void *f_, const void *v_)
const struct freq *f = f_;
const struct variable *v = v_;
const struct freq *f = f_;
const struct variable *v = v_;
- return !var_is_value_missing (v, f->values
, MV_ANY
);
+ return !var_is_value_missing (v, f->values);
}
}
@@
-542,7
+536,7
@@
postcalc (struct frq_proc *frq, const struct dataset *ds)
calc_stats (frq, vf, d);
calc_stats (frq, vf, d);
- histogram = freq_tab_to_hist (frq,
&vf->tab, vf->var
);
+ histogram = freq_tab_to_hist (frq,
vf
);
if (histogram)
{
if (histogram)
{
@@
-593,7
+587,6
@@
cmd_frequencies (struct lexer *lexer, struct dataset *ds)
int hi_pcnt = INT_MIN;
int hi_norm = FRQ_NONORMAL;
int hi_pcnt = INT_MIN;
int hi_norm = FRQ_NONORMAL;
- frq.pool = pool_create ();
frq.sort = FRQ_AVALUE;
frq.vars = NULL;
frq.sort = FRQ_AVALUE;
frq.vars = NULL;
@@
-1208,11
+1201,11
@@
cmd_frequencies (struct lexer *lexer, struct dataset *ds)
frq.n_percentiles = o;
frq.n_percentiles = o;
- frq.median
= NULL
;
+ frq.median
_idx = SIZE_MAX
;
for (i = 0; i < frq.n_percentiles; i++)
if (frq.percentiles[i].p == 0.5)
{
for (i = 0; i < frq.n_percentiles; i++)
if (frq.percentiles[i].p == 0.5)
{
- frq.median
= &frq.percentiles[i]
;
+ frq.median
_idx = i
;
break;
}
}
break;
}
}
@@
-1237,14
+1230,14
@@
cmd_frequencies (struct lexer *lexer, struct dataset *ds)
ok = proc_commit (ds) && ok;
}
ok = proc_commit (ds) && ok;
}
-
free (vars);
free (vars);
+ for (size_t i = 0; i < frq.n_vars; i++)
+ free (frq.vars[i].percentiles);
free (frq.vars);
free (frq.bar);
free (frq.pie);
free (frq.hist);
free (frq.percentiles);
free (frq.vars);
free (frq.bar);
free (frq.pie);
free (frq.hist);
free (frq.percentiles);
- pool_destroy (frq.pool);
return CMD_SUCCESS;
return CMD_SUCCESS;
@@
-1252,17
+1245,18
@@
cmd_frequencies (struct lexer *lexer, struct dataset *ds)
free (vars);
free (frq.vars);
free (vars);
free (frq.vars);
+ for (size_t i = 0; i < frq.n_vars; i++)
+ free (frq.vars[i].percentiles);
free (frq.bar);
free (frq.pie);
free (frq.hist);
free (frq.percentiles);
free (frq.bar);
free (frq.pie);
free (frq.hist);
free (frq.percentiles);
- pool_destroy (frq.pool);
return CMD_FAILURE;
}
static double
return CMD_FAILURE;
}
static double
-calculate_iqr (const struct frq_proc *frq)
+calculate_iqr (const struct frq_proc *frq
, const struct var_freqs *vf
)
{
double q1 = SYSMIS;
double q3 = SYSMIS;
{
double q1 = SYSMIS;
double q3 = SYSMIS;
@@
-1275,9
+1269,9
@@
calculate_iqr (const struct frq_proc *frq)
struct percentile *pc = &frq->percentiles[i];
if (fabs (0.25 - pc->p) < DBL_EPSILON)
struct percentile *pc = &frq->percentiles[i];
if (fabs (0.25 - pc->p) < DBL_EPSILON)
- q1 =
pc->value
;
+ q1 =
vf->percentiles[i]
;
else if (fabs (0.75 - pc->p) < DBL_EPSILON)
else if (fabs (0.75 - pc->p) < DBL_EPSILON)
- q3 =
pc->value
;
+ q3 =
vf->percentiles[i]
;
}
return q1 == SYSMIS || q3 == SYSMIS ? SYSMIS : q3 - q1;
}
return q1 == SYSMIS || q3 == SYSMIS ? SYSMIS : q3 - q1;
@@
-1288,7
+1282,7
@@
chart_includes_value (const struct frq_chart *chart,
const struct variable *var,
const union value *value)
{
const struct variable *var,
const union value *value)
{
- if (!chart->include_missing && var_is_value_missing (var, value
, MV_ANY
))
+ if (!chart->include_missing && var_is_value_missing (var, value))
return false;
if (var_is_numeric (var)
return false;
if (var_is_numeric (var)
@@
-1301,24
+1295,17
@@
chart_includes_value (const struct frq_chart *chart,
/* Create a gsl_histogram from a freq_tab */
static struct histogram *
/* Create a gsl_histogram from a freq_tab */
static struct histogram *
-freq_tab_to_hist (const struct frq_proc *frq, const struct freq_tab *ft,
- const struct variable *var)
+freq_tab_to_hist (const struct frq_proc *frq, const struct var_freqs *vf)
{
{
- double x_min, x_max, valid_freq;
- int i;
- double bin_width;
- struct histogram *histogram;
- double iqr;
-
/* Find out the extremes of the x value, within the range to be included in
the histogram, and sum the total frequency of those values. */
/* Find out the extremes of the x value, within the range to be included in
the histogram, and sum the total frequency of those values. */
- x_min = DBL_MAX;
- x_max = -DBL_MAX;
- valid_freq = 0;
- for (i
= 0; i < ft->
n_valid; i++)
+
double
x_min = DBL_MAX;
+
double
x_max = -DBL_MAX;
+
double
valid_freq = 0;
+ for (i
nt i = 0; i < vf->tab.
n_valid; i++)
{
{
- const struct freq *f = &
ft->
valid[i];
- if (chart_includes_value (frq->hist, var, f->values))
+ const struct freq *f = &
vf->tab.
valid[i];
+ if (chart_includes_value (frq->hist, v
f->v
ar, f->values))
{
x_min = MIN (x_min, f->values[0].f);
x_max = MAX (x_max, f->values[0].f);
{
x_min = MIN (x_min, f->values[0].f);
x_max = MAX (x_max, f->values[0].f);
@@
-1329,25
+1316,21
@@
freq_tab_to_hist (const struct frq_proc *frq, const struct freq_tab *ft,
if (valid_freq <= 0)
return NULL;
if (valid_freq <= 0)
return NULL;
- iqr = calculate_iqr (frq);
-
- if (iqr > 0)
- /* Freedman-Diaconis' choice of bin width. */
- bin_width = 2 * iqr / pow (valid_freq, 1.0 / 3.0);
-
- else
- /* Sturges Rule */
- bin_width = (x_max - x_min) / (1 + log2 (valid_freq));
+ double iqr = calculate_iqr (frq, vf);
- histogram = histogram_create (bin_width, x_min, x_max);
+ double bin_width =
+ (iqr > 0
+ ? 2 * iqr / pow (valid_freq, 1.0 / 3.0) /* Freedman-Diaconis. */
+ : (x_max - x_min) / (1 + log2 (valid_freq))); /* Sturges */
+ struct histogram *histogram = histogram_create (bin_width, x_min, x_max);
if (histogram == NULL)
return NULL;
if (histogram == NULL)
return NULL;
- for (i
= 0; i < ft->
n_valid; i++)
+ for (i
nt i = 0; i < vf->tab.
n_valid; i++)
{
{
- const struct freq *f = &
ft->
valid[i];
- if (chart_includes_value (frq->hist, var, f->values))
+ const struct freq *f = &
vf->tab.
valid[i];
+ if (chart_includes_value (frq->hist, v
f->v
ar, f->values))
histogram_add (histogram, f->values[0].f, f->count);
}
histogram_add (histogram, f->values[0].f, f->count);
}
@@
-1549,7
+1532,9
@@
calc_stats (const struct frq_proc *frq, const struct var_freqs *vf,
d[FRQ_ST_SEMEAN] = d[FRQ_ST_STDDEV] / sqrt (W);
d[FRQ_ST_SESKEWNESS] = calc_seskew (W);
d[FRQ_ST_SEKURTOSIS] = calc_sekurt (W);
d[FRQ_ST_SEMEAN] = d[FRQ_ST_STDDEV] / sqrt (W);
d[FRQ_ST_SESKEWNESS] = calc_seskew (W);
d[FRQ_ST_SEKURTOSIS] = calc_sekurt (W);
- d[FRQ_ST_MEDIAN] = frq->median ? frq->median->value : SYSMIS;
+ d[FRQ_ST_MEDIAN] = (frq->median_idx != SIZE_MAX
+ ? vf->percentiles[frq->median_idx]
+ : SYSMIS);
}
static bool
}
static bool
@@
-1640,7
+1625,9
@@
dump_statistics (const struct frq_proc *frq, const struct variable *wv)
if (!pc->show)
continue;
if (!pc->show)
continue;
- union value v = { .f = vf->tab.n_valid ? pc->value : SYSMIS };
+ union value v = {
+ .f = vf->tab.n_valid ? vf->percentiles[j] : SYSMIS
+ };
pivot_table_put2 (table, var_idx, row++,
pivot_value_new_var_value (vf->var, &v));
}
pivot_table_put2 (table, var_idx, row++,
pivot_value_new_var_value (vf->var, &v));
}