The calculations here were using the same space each time for
calculating percentiles and median, but different areas are needed. This
commit fixes the problem.
Bug #60728.
Thanks to elias tsolis for reporting the problem.
struct percentile
{
double p; /* the %ile to be calculated */
struct percentile
{
double p; /* the %ile to be calculated */
- double value; /* the %ile's value */
bool show; /* True to show this percentile in the statistics box. */
};
bool show; /* True to show this percentile in the statistics box. */
};
/* Statistics. */
double stat[FRQ_ST_count];
/* Statistics. */
double stat[FRQ_ST_count];
/* Variable attributes. */
int width;
/* Variable attributes. */
int width;
struct var_freqs *vars;
size_t n_vars;
/* Percentiles to calculate and possibly display. */
struct percentile *percentiles;
struct var_freqs *vars;
size_t n_vars;
/* Percentiles to calculate and possibly display. */
struct percentile *percentiles;
- const struct percentile *median;
- int n_percentiles;
+ size_t median_idx;
+ size_t n_percentiles;
/* Frequency table display. */
long int max_categories; /* Maximum categories to show. */
/* Frequency table display. */
long int max_categories; /* Maximum categories to show. */
}
/* Create a gsl_histogram from a freq_tab */
}
/* Create a gsl_histogram from a freq_tab */
-static struct histogram *
-freq_tab_to_hist (const struct frq_proc *frq, const struct freq_tab *ft,
- const struct variable *var);
+static struct histogram *freq_tab_to_hist (const struct frq_proc *,
+ const struct var_freqs *);
static void
put_freq_row (struct pivot_table *table, int var_idx,
static void
put_freq_row (struct pivot_table *table, int var_idx,
/* Calculates all of the percentiles for VF within FRQ. */
static void
/* Calculates all of the percentiles for VF within FRQ. */
static void
-calc_percentiles (const struct frq_proc *frq, const struct var_freqs *vf)
+calc_percentiles (const struct frq_proc *frq, struct var_freqs *vf)
+ if (!frq->n_percentiles)
+ return;
+
+ vf->percentiles = xnmalloc (frq->n_percentiles, sizeof *vf->percentiles);
+
const struct freq_tab *ft = &vf->tab;
const struct freq_tab *ft = &vf->tab;
- double W = ft->valid_cases;
- const struct freq *f;
- int percentile_idx = 0;
- double rank = 0;
+ const double W = ft->valid_cases;
+ size_t idx = 0;
- for (f = ft->valid; f < ft->missing; f++)
+ double rank = 0;
+ for (const struct freq *f = ft->valid; f < ft->missing; f++)
- for (; percentile_idx < frq->n_percentiles; percentile_idx++)
+ for (; idx < frq->n_percentiles; idx++)
- struct percentile *pc = &frq->percentiles[percentile_idx];
+ struct percentile *pc = &frq->percentiles[idx];
double tp;
tp = (settings_get_algorithm () == ENHANCED
double tp;
tp = (settings_get_algorithm () == ENHANCED
break;
if (tp + 1 < rank || f + 1 >= ft->missing)
break;
if (tp + 1 < rank || f + 1 >= ft->missing)
- pc->value = f->values[0].f;
+ vf->percentiles[idx] = f->values[0].f;
- pc->value = calc_percentile (pc->p, W, f->values[0].f, f[1].values[0].f);
+ vf->percentiles[idx] = calc_percentile (pc->p, W, f->values[0].f,
+ f[1].values[0].f);
- for (; percentile_idx < frq->n_percentiles; percentile_idx++)
- {
- struct percentile *pc = &frq->percentiles[percentile_idx];
- pc->value = (ft->n_valid > 0
- ? ft->valid[ft->n_valid - 1].values[0].f
- : SYSMIS);
- }
+ for (; idx < frq->n_percentiles; idx++)
+ vf->percentiles[idx] = (ft->n_valid > 0
+ ? ft->valid[ft->n_valid - 1].values[0].f
+ : SYSMIS);
}
/* Returns true iff the value in struct freq F is non-missing
}
/* Returns true iff the value in struct freq F is non-missing
- histogram = freq_tab_to_hist (frq, &vf->tab, vf->var);
+ histogram = freq_tab_to_hist (frq, vf);
+ frq.median_idx = SIZE_MAX;
for (i = 0; i < frq.n_percentiles; i++)
if (frq.percentiles[i].p == 0.5)
{
for (i = 0; i < frq.n_percentiles; i++)
if (frq.percentiles[i].p == 0.5)
{
- frq.median = &frq.percentiles[i];
ok = proc_commit (ds) && ok;
}
ok = proc_commit (ds) && ok;
}
+ for (size_t i = 0; i < frq.n_vars; i++)
+ free (frq.vars[i].percentiles);
free (frq.vars);
free (frq.bar);
free (frq.pie);
free (frq.vars);
free (frq.bar);
free (frq.pie);
free (vars);
free (frq.vars);
free (vars);
free (frq.vars);
+ for (size_t i = 0; i < frq.n_vars; i++)
+ free (frq.vars[i].percentiles);
free (frq.bar);
free (frq.pie);
free (frq.hist);
free (frq.bar);
free (frq.pie);
free (frq.hist);
-calculate_iqr (const struct frq_proc *frq)
+calculate_iqr (const struct frq_proc *frq, const struct var_freqs *vf)
{
double q1 = SYSMIS;
double q3 = SYSMIS;
{
double q1 = SYSMIS;
double q3 = SYSMIS;
struct percentile *pc = &frq->percentiles[i];
if (fabs (0.25 - pc->p) < DBL_EPSILON)
struct percentile *pc = &frq->percentiles[i];
if (fabs (0.25 - pc->p) < DBL_EPSILON)
+ q1 = vf->percentiles[i];
else if (fabs (0.75 - pc->p) < DBL_EPSILON)
else if (fabs (0.75 - pc->p) < DBL_EPSILON)
+ q3 = vf->percentiles[i];
}
return q1 == SYSMIS || q3 == SYSMIS ? SYSMIS : q3 - q1;
}
return q1 == SYSMIS || q3 == SYSMIS ? SYSMIS : q3 - q1;
/* Create a gsl_histogram from a freq_tab */
static struct histogram *
/* Create a gsl_histogram from a freq_tab */
static struct histogram *
-freq_tab_to_hist (const struct frq_proc *frq, const struct freq_tab *ft,
- const struct variable *var)
+freq_tab_to_hist (const struct frq_proc *frq, const struct var_freqs *vf)
- double x_min, x_max, valid_freq;
- int i;
- double bin_width;
- struct histogram *histogram;
- double iqr;
-
/* Find out the extremes of the x value, within the range to be included in
the histogram, and sum the total frequency of those values. */
/* Find out the extremes of the x value, within the range to be included in
the histogram, and sum the total frequency of those values. */
- x_min = DBL_MAX;
- x_max = -DBL_MAX;
- valid_freq = 0;
- for (i = 0; i < ft->n_valid; i++)
+ double x_min = DBL_MAX;
+ double x_max = -DBL_MAX;
+ double valid_freq = 0;
+ for (int i = 0; i < vf->tab.n_valid; i++)
- const struct freq *f = &ft->valid[i];
- if (chart_includes_value (frq->hist, var, f->values))
+ const struct freq *f = &vf->tab.valid[i];
+ if (chart_includes_value (frq->hist, vf->var, f->values))
{
x_min = MIN (x_min, f->values[0].f);
x_max = MAX (x_max, f->values[0].f);
{
x_min = MIN (x_min, f->values[0].f);
x_max = MAX (x_max, f->values[0].f);
if (valid_freq <= 0)
return NULL;
if (valid_freq <= 0)
return NULL;
- iqr = calculate_iqr (frq);
-
- if (iqr > 0)
- /* Freedman-Diaconis' choice of bin width. */
- bin_width = 2 * iqr / pow (valid_freq, 1.0 / 3.0);
-
- else
- /* Sturges Rule */
- bin_width = (x_max - x_min) / (1 + log2 (valid_freq));
+ double iqr = calculate_iqr (frq, vf);
- histogram = histogram_create (bin_width, x_min, x_max);
+ double bin_width =
+ (iqr > 0
+ ? 2 * iqr / pow (valid_freq, 1.0 / 3.0) /* Freedman-Diaconis. */
+ : (x_max - x_min) / (1 + log2 (valid_freq))); /* Sturges */
+ struct histogram *histogram = histogram_create (bin_width, x_min, x_max);
if (histogram == NULL)
return NULL;
if (histogram == NULL)
return NULL;
- for (i = 0; i < ft->n_valid; i++)
+ for (int i = 0; i < vf->tab.n_valid; i++)
- const struct freq *f = &ft->valid[i];
- if (chart_includes_value (frq->hist, var, f->values))
+ const struct freq *f = &vf->tab.valid[i];
+ if (chart_includes_value (frq->hist, vf->var, f->values))
histogram_add (histogram, f->values[0].f, f->count);
}
histogram_add (histogram, f->values[0].f, f->count);
}
d[FRQ_ST_SEMEAN] = d[FRQ_ST_STDDEV] / sqrt (W);
d[FRQ_ST_SESKEWNESS] = calc_seskew (W);
d[FRQ_ST_SEKURTOSIS] = calc_sekurt (W);
d[FRQ_ST_SEMEAN] = d[FRQ_ST_STDDEV] / sqrt (W);
d[FRQ_ST_SESKEWNESS] = calc_seskew (W);
d[FRQ_ST_SEKURTOSIS] = calc_sekurt (W);
- d[FRQ_ST_MEDIAN] = frq->median ? frq->median->value : SYSMIS;
+ d[FRQ_ST_MEDIAN] = (frq->median_idx != SIZE_MAX
+ ? vf->percentiles[frq->median_idx]
+ : SYSMIS);
- union value v = { .f = vf->tab.n_valid ? pc->value : SYSMIS };
+ union value v = {
+ .f = vf->tab.n_valid ? vf->percentiles[j] : SYSMIS
+ };
pivot_table_put2 (table, var_idx, row++,
pivot_value_new_var_value (vf->var, &v));
}
pivot_table_put2 (table, var_idx, row++,
pivot_value_new_var_value (vf->var, &v));
}
m4_define([FREQUENCIES_NTILES_OUTPUT], [dnl
Table: Statistics
m4_define([FREQUENCIES_NTILES_OUTPUT], [dnl
Table: Statistics
-,,x
-N,Valid,5
-,Missing,0
-Mean,,3.00
-Std Dev,,1.58
-Minimum,,1.00
-Maximum,,5.00
-Percentiles,0,1.00
-,25,2.00
-,33,2.33
-,50,3.00
-,67,3.67
-,75,4.00
-,100,5.00
+,,x,y
+N,Valid,5,5
+,Missing,0,0
+Mean,,3.00,30.00
+Std Dev,,1.58,15.81
+Minimum,,1.00,10.00
+Maximum,,5.00,50.00
+Percentiles,0,1.00,10.00
+,25,2.00,20.00
+,33,2.33,23.33
+,50,3.00,30.00
+,67,3.67,36.67
+,75,4.00,40.00
+,100,5.00,50.00
])
AT_SETUP([FREQUENCIES basic percentiles])
AT_DATA([frequencies.sps],
])
AT_SETUP([FREQUENCIES basic percentiles])
AT_DATA([frequencies.sps],
- [DATA LIST LIST notable /x * .
+ [DATA LIST LIST notable /x y.
+1 10
+2 20
+3 30
+4 40
+5 50
/FORMAT=NOTABLE
/PERCENTILES = 0 25 33.333 50 66.666 75 100.
])
/FORMAT=NOTABLE
/PERCENTILES = 0 25 33.333 50 66.666 75 100.
])
AT_SETUP([FREQUENCIES basic n-tiles])
AT_DATA([frequencies.sps],
AT_SETUP([FREQUENCIES basic n-tiles])
AT_DATA([frequencies.sps],
- [DATA LIST LIST notable /x * .
+ [DATA LIST LIST notable /x y.
+1 10
+2 20
+3 30
+4 40
+5 50
/FORMAT=NOTABLE
/NTILES = 3
/NTILES = 4.
/FORMAT=NOTABLE
/NTILES = 3
/NTILES = 4.