-/* PSPP - EXAMINE data for normality . -*-c-*-
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2004 Free Software Foundation, Inc.
-Copyright (C) 2004 Free Software Foundation, Inc.
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License as
-published by the Free Software Foundation; either version 2 of the
-License, or (at your option) any later version.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-02110-1301, USA. */
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
#include <stdlib.h>
#include <data/case.h>
-#include <data/casefile.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
#include <data/dictionary.h>
#include <data/procedure.h>
#include <data/value-labels.h>
#include <language/command.h>
#include <language/dictionary/split-file.h>
#include <language/lexer/lexer.h>
-#include <libpspp/alloc.h>
#include <libpspp/compiler.h>
#include <libpspp/hash.h>
-#include <libpspp/magic.h>
#include <libpspp/message.h>
#include <libpspp/misc.h>
#include <libpspp/str.h>
#include <output/table.h>
#include "minmax.h"
+#include "xalloc.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
static struct cmd_examine cmd;
-static struct variable **dependent_vars;
+static const struct variable **dependent_vars;
static size_t n_dependent_vars;
/* Output functions */
-static void show_summary (struct variable **dependent_var, int n_dep_var,
+static void show_summary (const struct variable **dependent_var, int n_dep_var,
const struct factor *f);
-static void show_extremes (struct variable **dependent_var,
+static void show_extremes (const struct variable **dependent_var,
int n_dep_var,
const struct factor *factor,
int n_extremities);
-static void show_descriptives (struct variable **dependent_var,
+static void show_descriptives (const struct variable **dependent_var,
int n_dep_var,
struct factor *factor);
-static void show_percentiles (struct variable **dependent_var,
+static void show_percentiles (const struct variable **dependent_var,
int n_dep_var,
struct factor *factor);
/* Per Split function */
-static bool run_examine (const struct ccase *,
- const struct casefile *cf, void *cmd_, const struct dataset *);
+static void run_examine (struct cmd_examine *, struct casereader *,
+ struct dataset *);
static void output_examine (void);
/* Represent a factor as a string, so it can be
printed in a human readable fashion */
-const char * factor_to_string (const struct factor *fctr,
+static void factor_to_string (const struct factor *fctr,
const struct factor_statistics *fs,
- const struct variable *var);
-
+ const struct variable *var,
+ struct string *str
+ );
/* Represent a factor as a string, so it can be
printed in a human readable fashion,
but sacrificing some readablility for the sake of brevity */
-const char *factor_to_string_concise (const struct factor *fctr,
- struct factor_statistics *fs);
+static void factor_to_string_concise (const struct factor *fctr,
+ const struct factor_statistics *fs,
+ struct string *);
-/* Function to use for testing for missing values */
-static var_is_missing_func *value_is_missing;
-
+/* Categories of missing values to exclude. */
+static enum mv_class exclude_values;
/* PERCENTILES */
int
cmd_examine (struct lexer *lexer, struct dataset *ds)
{
+ struct casegrouper *grouper;
+ struct casereader *group;
bool ok;
subc_list_double_create (&percentile_list);
}
/* If /MISSING=INCLUDE is set, then user missing values are ignored */
- if (cmd.incl == XMN_INCLUDE )
- value_is_missing = var_is_value_system_missing;
- else
- value_is_missing = var_is_value_missing;
+ exclude_values = cmd.incl == XMN_INCLUDE ? MV_SYSTEM : MV_ANY;
if ( cmd.st_n == SYSMIS )
cmd.st_n = 5;
subc_list_double_push (&percentile_list, 75);
}
- ok = multipass_procedure_with_splits (ds, run_examine, &cmd);
+ grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds));
+ while (casegrouper_get_next_group (grouper, &group))
+ run_examine (&cmd, group, ds);
+ ok = casegrouper_destroy (grouper);
+ ok = proc_commit (ds) && ok;
if ( totals )
{
for ( fs = fctr->fs ; *fs ; ++fs )
{
- const char *s = factor_to_string (fctr, *fs, dependent_vars[v]);
+ struct string str;
+ ds_init_empty (&str);
+ factor_to_string (fctr, *fs, dependent_vars[v], &str);
if ( cmd.a_plot[XMN_PLT_NPPLOT] )
- np_plot (& (*fs)->m[v], s);
+ np_plot (& (*fs)->m[v], ds_cstr (&str));
if ( cmd.a_plot[XMN_PLT_HISTOGRAM] )
{
normal.stddev = (*fs)->m[v].stddev;
histogram_plot ((*fs)->m[v].histogram,
- s, &normal, 0);
+ ds_cstr (&str) , &normal, 0);
}
+ ds_destroy (&str);
+
} /* for ( fs .... */
} /* for ( v = 0 ..... */
return 2;
}
- if (!parse_variables (lexer, dict, &dependent_vars, &n_dependent_vars,
+ if (!parse_variables_const (lexer, dict, &dependent_vars, &n_dependent_vars,
PV_NO_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH) )
{
free (dependent_vars);
-static bool bad_weight_warn = true;
-
-
/* Perform calculations for the sub factors */
void
factor_calc (const struct ccase *c, int case_no, double weight,
var_get_width (var)
);
- if ( value_is_missing (var, val) || case_missing )
+ if (case_missing || var_is_value_missing (var, val, exclude_values))
{
free (val);
continue;
}
}
-static bool
-run_examine (const struct ccase *first, const struct casefile *cf,
- void *cmd_, const struct dataset *ds)
+static void
+run_examine (struct cmd_examine *cmd, struct casereader *input,
+ struct dataset *ds)
{
struct dictionary *dict = dataset_dict (ds);
- struct casereader *r;
+ casenumber case_no;
struct ccase c;
int v;
-
- const struct cmd_examine *cmd = (struct cmd_examine *) cmd_;
+ bool ok;
struct factor *fctr;
- output_split_file_values (ds, first);
+ if (!casereader_peek (input, 0, &c))
+ {
+ casereader_destroy (input);
+ return;
+ }
+ output_split_file_values (ds, &c);
+ case_destroy (&c);
+
+ input = casereader_create_filter_weight (input, dict, NULL, NULL);
+ input = casereader_create_counter (input, &case_no, 0);
/* Make sure we haven't got rubbish left over from a
- previous split */
+ previous split. */
fctr = factors;
while (fctr)
{
for ( v = 0 ; v < n_dependent_vars ; ++v )
metrics_precalc (&totals[v]);
- for (r = casefile_get_reader (cf, NULL);
- casereader_read (r, &c) ;
- case_destroy (&c) )
+ for (; casereader_read (input, &c); case_destroy (&c))
{
- int case_missing=0;
- const int case_no = casereader_cnum (r);
-
- const double weight =
- dict_get_case_weight (dict, &c, &bad_weight_warn);
+ int case_missing = 0;
+ const double weight = dict_get_case_weight (dict, &c, NULL);
if ( cmd->miss == XMN_LISTWISE )
{
var_get_width (var)
);
- if ( value_is_missing (var, val))
+ if ( var_is_value_missing (var, val, exclude_values))
case_missing = 1;
free (val);
var_get_width (var)
);
- if ( value_is_missing (var, val) || case_missing )
+ if ( var_is_value_missing (var, val, exclude_values)
+ || case_missing )
{
free (val) ;
continue ;
factor_calc (&c, case_no, weight, case_missing);
}
+ ok = casereader_destroy (input);
for ( v = 0 ; v < n_dependent_vars ; ++v)
{
fctr = fctr->next;
}
- output_examine ();
+ if (ok)
+ output_examine ();
if ( totals )
metrics_destroy (&totals[i]);
}
}
-
- return true;
}
static void
-show_summary (struct variable **dependent_var, int n_dep_var,
+show_summary (const struct variable **dependent_var, int n_dep_var,
const struct factor *fctr)
{
static const char *subtitle[]=
0 != compare_values (prev, (*fs)->id[0],
var_get_width (fctr->indep_var[0])))
{
+ struct string vstr;
+ ds_init_empty (&vstr);
+ var_append_value_name (fctr->indep_var[0],
+ (*fs)->id[0], &vstr);
+
tab_text (tbl,
1,
(i * n_factors ) + count +
heading_rows,
TAB_LEFT | TAT_TITLE,
- var_get_value_name (fctr->indep_var[0],
- (*fs)->id[0])
+ ds_cstr (&vstr)
);
+ ds_destroy (&vstr);
+
if (fctr->indep_var[1] && count > 0 )
tab_hline (tbl, TAL_1, 1, n_cols - 1,
(i * n_factors ) + count + heading_rows);
prev = (*fs)->id[0];
-
if ( fctr->indep_var[1])
+ {
+ struct string vstr;
+ ds_init_empty (&vstr);
+ var_append_value_name (fctr->indep_var[1],
+ (*fs)->id[1], &vstr);
tab_text (tbl,
2,
(i * n_factors ) + count +
heading_rows,
TAB_LEFT | TAT_TITLE,
- var_get_value_name (fctr->indep_var[1], (*fs)->id[1])
+ ds_cstr (&vstr)
);
+ ds_destroy (&vstr);
+ }
populate_summary (tbl, heading_columns,
(i * n_factors) + count
static void
-show_extremes (struct variable **dependent_var, int n_dep_var,
+show_extremes (const struct variable **dependent_var, int n_dep_var,
const struct factor *fctr, int n_extremities)
{
int i;
if ( !prev || 0 != compare_values (prev, (*fs)->id[0],
var_get_width (fctr->indep_var[0])))
{
+ struct string vstr;
+ ds_init_empty (&vstr);
+ var_append_value_name (fctr->indep_var[0],
+ (*fs)->id[0], &vstr);
if ( count > 0 )
tab_hline (tbl, TAL_1, 1, n_cols - 1, row);
tab_text (tbl,
1, row,
TAB_LEFT | TAT_TITLE,
- var_get_value_name (fctr->indep_var[0],
- (*fs)->id[0])
+ ds_cstr (&vstr)
);
+
+ ds_destroy (&vstr);
}
prev = (*fs)->id[0];
tab_hline (tbl, TAL_1, 2, n_cols - 1, row);
if ( fctr->indep_var[1])
+ {
+ struct string vstr;
+ ds_init_empty (&vstr);
+ var_append_value_name (fctr->indep_var[1], (*fs)->id[1], &vstr);
+
tab_text (tbl, 2, row,
TAB_LEFT | TAT_TITLE,
- var_get_value_name (fctr->indep_var[1], (*fs)->id[1])
+ ds_cstr (&vstr)
);
+ ds_destroy (&vstr);
+ }
+
populate_extremes (tbl, heading_columns - 2,
row, n_extremities,
& (*fs)->m[i]);
/* Show the descriptives table */
void
-show_descriptives (struct variable **dependent_var,
+show_descriptives (const struct variable **dependent_var,
int n_dep_var,
struct factor *fctr)
{
if ( !prev || 0 != compare_values (prev, (*fs)->id[0],
var_get_width (fctr->indep_var[0])))
{
+ struct string vstr;
+ ds_init_empty (&vstr);
+ var_append_value_name (fctr->indep_var[0],
+ (*fs)->id[0], &vstr);
if ( count > 0 )
tab_hline (tbl, TAL_1, 1, n_cols - 1, row);
tab_text (tbl,
1, row,
TAB_LEFT | TAT_TITLE,
- var_get_value_name (fctr->indep_var[0],
- (*fs)->id[0])
+ ds_cstr (&vstr)
);
+
+ ds_destroy (&vstr);
}
prev = (*fs)->id[0];
tab_hline (tbl, TAL_1, 2, n_cols - 1, row);
if ( fctr->indep_var[1])
+ {
+ struct string vstr;
+ ds_init_empty (&vstr);
+ var_append_value_name (fctr->indep_var[1], (*fs)->id[1], &vstr);
+
tab_text (tbl, 2, row,
TAB_LEFT | TAT_TITLE,
- var_get_value_name (fctr->indep_var[1], (*fs)->id[1])
+ ds_cstr (&vstr)
);
+ ds_destroy (&vstr);
+ }
+
populate_descriptives (tbl, heading_columns - 2,
row, & (*fs)->m[i]);
}
-
-
/* Fill in the descriptives data */
void
populate_descriptives (struct tab_table *tbl, int col, int row,
const struct metrics *m)
{
-
- const double t = gsl_cdf_tdist_Qinv (1 - cmd.n_cinterval[0]/100.0/2.0, \
+ const double t = gsl_cdf_tdist_Qinv ((1 - cmd.n_cinterval[0] / 100.0)/2.0,
m->n -1);
-
tab_text (tbl, col,
row,
TAB_LEFT | TAT_TITLE,
for ( fs = fctr->fs ; *fs ; ++fs )
{
+ struct string str;
double y_min = DBL_MAX;
double y_max = -DBL_MAX;
struct chart *ch = chart_create ();
- const char *s = factor_to_string (fctr, *fs, 0 );
+ ds_init_empty (&str);
+ factor_to_string (fctr, *fs, 0, &str );
- chart_write_title (ch, s);
+ chart_write_title (ch, ds_cstr (&str));
for ( i = 0 ; i < n_vars ; ++i )
{
}
chart_submit (ch);
-
+ ds_destroy (&str);
}
}
for ( fs = fctr->fs ; *fs ; ++fs )
{
-
- const char *s = factor_to_string_concise (fctr, *fs);
-
+ struct string str;
const double box_width = (ch->data_right - ch->data_left)
/ (n_factors * 2.0 ) ;
const double box_centre = ( f++ * 2 + 1) * box_width
+ ch->data_left;
+ ds_init_empty (&str);
+ factor_to_string_concise (fctr, *fs, &str);
+
boxplot_draw_boxplot (ch,
box_centre, box_width,
& (*fs)->m[i],
- s);
+ ds_cstr (&str));
+ ds_destroy (&str);
}
}
else if ( ch )
/* Show the percentiles */
void
-show_percentiles (struct variable **dependent_var,
+show_percentiles (const struct variable **dependent_var,
int n_dep_var,
struct factor *fctr)
{
if ( !prev || 0 != compare_values (prev, (*fs)->id[0],
var_get_width (fctr->indep_var[0])))
{
+ struct string vstr;
+ ds_init_empty (&vstr);
+ var_append_value_name (fctr->indep_var[0],
+ (*fs)->id[0], &vstr);
+
if ( count > 0 )
tab_hline (tbl, TAL_1, 1, n_cols - 1, row);
tab_text (tbl,
1, row,
TAB_LEFT | TAT_TITLE,
- var_get_value_name (fctr->indep_var[0],
- (*fs)->id[0])
+ ds_cstr (&vstr)
);
-
+ ds_destroy (&vstr);
}
prev = (*fs)->id[0];
tab_hline (tbl, TAL_1, 2, n_cols - 1, row);
if ( fctr->indep_var[1])
+ {
+ struct string vstr;
+ ds_init_empty (&vstr);
+ var_append_value_name (fctr->indep_var[1], (*fs)->id[1], &vstr);
+
tab_text (tbl, 2, row,
TAB_LEFT | TAT_TITLE,
- var_get_value_name (fctr->indep_var[1], (*fs)->id[1])
+ ds_cstr (&vstr)
);
+ ds_destroy (&vstr);
+ }
+
populate_percentiles (tbl, n_heading_columns - 1,
row, & (*fs)->m[i]);
}
-
-
-const char *
+static void
factor_to_string (const struct factor *fctr,
const struct factor_statistics *fs,
- const struct variable *var)
+ const struct variable *var,
+ struct string *str
+ )
{
-
- static char buf1[100];
- char buf2[100];
-
- strcpy (buf1,"");
-
if (var)
- sprintf (buf1, "%s (",var_to_string (var) );
+ ds_put_format (str, "%s (",var_to_string (var) );
- snprintf (buf2, 100, "%s = %s",
- var_to_string (fctr->indep_var[0]),
- var_get_value_name (fctr->indep_var[0], fs->id[0]));
+ ds_put_format (str, "%s = ",
+ var_to_string (fctr->indep_var[0]));
- strcat (buf1, buf2);
+ var_append_value_name (fctr->indep_var[0], fs->id[0], str);
if ( fctr->indep_var[1] )
{
- sprintf (buf2, "; %s = %s)",
- var_to_string (fctr->indep_var[1]),
- var_get_value_name (fctr->indep_var[1], fs->id[1]));
- strcat (buf1, buf2);
+ ds_put_format (str, "; %s = )",
+ var_to_string (fctr->indep_var[1]));
+
+ var_append_value_name (fctr->indep_var[1], fs->id[1], str);
}
else
{
if ( var )
- strcat (buf1, ")");
+ ds_put_cstr (str, ")");
}
-
- return buf1;
}
-
-const char *
+static void
factor_to_string_concise (const struct factor *fctr,
- struct factor_statistics *fs)
+ const struct factor_statistics *fs,
+ struct string *str
+ )
{
-
- static char buf[100];
-
- char buf2[100];
-
- snprintf (buf, 100, "%s",
- var_get_value_name (fctr->indep_var[0], fs->id[0]));
+ var_append_value_name (fctr->indep_var[0], fs->id[0], str);
if ( fctr->indep_var[1] )
{
- sprintf (buf2, ",%s)", var_get_value_name (fctr->indep_var[1],
- fs->id[1]) );
- strcat (buf, buf2);
- }
+ ds_put_cstr (str, ",");
+ var_append_value_name (fctr->indep_var[1],fs->id[1], str);
- return buf;
+ ds_put_cstr (str, ")");
+ }
}
+
+/*
+ Local Variables:
+ mode: c
+ End:
+*/