-/* PSPP - computes sample statistics.
- Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 1997-9, 2000, 2009 Free Software Foundation, Inc.
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA. */
-
-/* FIXME: Many possible optimizations. */
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <data/dictionary.h>
#include <data/procedure.h>
#include <data/transformations.h>
#include <data/dictionary.h>
#include <data/procedure.h>
#include <data/transformations.h>
#include <language/dictionary/split-file.h>
#include <language/lexer/lexer.h>
#include <language/lexer/variable-parser.h>
#include <language/dictionary/split-file.h>
#include <language/lexer/lexer.h>
#include <language/lexer/variable-parser.h>
#include <libpspp/message.h>
#include <libpspp/assertion.h>
#include <math/moments.h>
#include <output/manager.h>
#include <output/table.h>
#include <libpspp/message.h>
#include <libpspp/assertion.h>
#include <math/moments.h>
#include <output/manager.h>
#include <output/table.h>
struct variable *z_var; /* New z-score variable. */
double mean; /* Distribution mean. */
double std_dev; /* Distribution standard deviation. */
struct variable *z_var; /* New z-score variable. */
double mean; /* Distribution mean. */
double std_dev; /* Distribution standard deviation. */
size_t var_cnt; /* Number of variables. */
enum dsc_missing_type missing_type; /* Treatment of missing values. */
size_t var_cnt; /* Number of variables. */
enum dsc_missing_type missing_type; /* Treatment of missing values. */
- struct variable *v; /* Variable to calculate on. */
- char z_name[LONG_NAME_LEN + 1]; /* Name for z-score variable. */
+ const struct variable *v; /* Variable to calculate on. */
+ char z_name[VAR_NAME_LEN + 1]; /* Name for z-score variable. */
double valid, missing; /* Valid, missing counts. */
struct moments *moments; /* Moments. */
double min, max; /* Maximum and mimimum values. */
double valid, missing; /* Valid, missing counts. */
struct moments *moments; /* Moments. */
double min, max; /* Maximum and mimimum values. */
{
DSC_LINE, /* Abbreviated format. */
DSC_SERIAL /* Long format. */
};
/* A DESCRIPTIVES procedure. */
{
DSC_LINE, /* Abbreviated format. */
DSC_SERIAL /* Long format. */
};
/* A DESCRIPTIVES procedure. */
int show_var_labels; /* Nonzero to show variable labels. */
int show_index; /* Nonzero to show variable index. */
enum dsc_format format; /* Output format. */
int show_var_labels; /* Nonzero to show variable labels. */
int show_index; /* Nonzero to show variable index. */
enum dsc_format format; /* Output format. */
static void dump_z_table (struct dsc_proc *);
static void setup_z_trns (struct dsc_proc *, struct dataset *);
/* Procedure execution functions. */
static void dump_z_table (struct dsc_proc *);
static void setup_z_trns (struct dsc_proc *, struct dataset *);
/* Procedure execution functions. */
-static bool calc_descriptives (const struct ccase *first,
- const struct casefile *, void *dsc_,
- const struct dataset *);
+static void calc_descriptives (struct dsc_proc *, struct casereader *,
+ struct dataset *);
/* Create and initialize dsc. */
dsc = xmalloc (sizeof *dsc);
dsc->vars = NULL;
dsc->var_cnt = 0;
dsc->missing_type = DSC_VARIABLE;
/* Create and initialize dsc. */
dsc = xmalloc (sizeof *dsc);
dsc->vars = NULL;
dsc->var_cnt = 0;
dsc->missing_type = DSC_VARIABLE;
{
if (lex_match_id (lexer, "VARIABLE"))
dsc->missing_type = DSC_VARIABLE;
else if (lex_match_id (lexer, "LISTWISE"))
dsc->missing_type = DSC_LISTWISE;
else if (lex_match_id (lexer, "INCLUDE"))
{
if (lex_match_id (lexer, "VARIABLE"))
dsc->missing_type = DSC_VARIABLE;
else if (lex_match_id (lexer, "LISTWISE"))
dsc->missing_type = DSC_LISTWISE;
else if (lex_match_id (lexer, "INCLUDE"))
{
if (lex_match_id (lexer, "LABELS"))
dsc->show_var_labels = 1;
{
if (lex_match_id (lexer, "LABELS"))
dsc->show_var_labels = 1;
dsc->show_stats |= (1ul << DSC_N_STATS) - 1;
else if (lex_match_id (lexer, "DEFAULT"))
dsc->show_stats |= DEFAULT_STATS;
dsc->show_stats |= (1ul << DSC_N_STATS) - 1;
else if (lex_match_id (lexer, "DEFAULT"))
dsc->show_stats |= DEFAULT_STATS;
{
lex_match (lexer, '=');
if (lex_match_id (lexer, "NAME"))
dsc->sort_by_stat = DSC_NAME;
{
lex_match (lexer, '=');
if (lex_match_id (lexer, "NAME"))
dsc->sort_by_stat = DSC_NAME;
{
dsc->sort_by_stat = match_statistic (lexer);
if (dsc->sort_by_stat == DSC_NONE )
dsc->sort_by_stat = DSC_MEAN;
}
{
dsc->sort_by_stat = match_statistic (lexer);
if (dsc->sort_by_stat == DSC_NONE )
dsc->sort_by_stat = DSC_MEAN;
}
-
- if (!parse_variables (lexer, dataset_dict (ds), &vars, &var_cnt,
+
+ if (!parse_variables_const (lexer, dict, &vars, &var_cnt,
/* Construct z-score varnames, show translation table. */
if (z_cnt || save_z_scores)
{
/* Construct z-score varnames, show translation table. */
if (z_cnt || save_z_scores)
{
{
if (!generate_z_varname (dict, dsc, dsc->vars[i].z_name,
var_get_name (dsc->vars[i].v),
&gen_cnt))
goto error;
z_cnt++;
{
if (!generate_z_varname (dict, dsc, dsc->vars[i].z_name,
var_get_name (dsc->vars[i].v),
&gen_cnt))
goto error;
z_cnt++;
if (dsc->calc_stats & (1ul << i) && dsc_info[i].moment > dsc->max_moment)
dsc->max_moment = dsc_info[i].moment;
if (dsc->max_moment != MOMENT_NONE)
if (dsc->calc_stats & (1ul << i) && dsc_info[i].moment > dsc->max_moment)
dsc->max_moment = dsc_info[i].moment;
if (dsc->max_moment != MOMENT_NONE)
- ok = multipass_procedure_with_splits (ds, calc_descriptives, dsc);
+ grouper = casegrouper_create_splits (proc_open (ds), dict);
+ while (casegrouper_get_next_group (grouper, &group))
+ calc_descriptives (dsc, group, ds);
+ ok = casegrouper_destroy (grouper);
+ ok = proc_commit (ds) && ok;
/* Returns false if NAME is a duplicate of any existing variable name or
of any previously-declared z-var name; otherwise returns true. */
static bool
/* Returns false if NAME is a duplicate of any existing variable name or
of any previously-declared z-var name; otherwise returns true. */
static bool
copies the new name into Z_NAME. On failure, returns false. */
static bool
generate_z_varname (const struct dictionary *dict, struct dsc_proc *dsc, char *z_name,
copies the new name into Z_NAME. On failure, returns false. */
static bool
generate_z_varname (const struct dictionary *dict, struct dsc_proc *dsc, char *z_name,
t = tab_create (2, cnt + 1, 0);
tab_title (t, _("Mapping of variables to corresponding Z-scores."));
tab_columns (t, SOM_COL_DOWN, 1);
t = tab_create (2, cnt + 1, 0);
tab_title (t, _("Mapping of variables to corresponding Z-scores."));
tab_columns (t, SOM_COL_DOWN, 1);
tab_hline (t, TAL_2, 0, 1, 1);
tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Source"));
tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Target"));
tab_hline (t, TAL_2, 0, 1, 1);
tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Source"));
tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Target"));
for (i = 0, y = 1; i < dsc->var_cnt; i++)
if (dsc->vars[i].z_name[0] != '\0')
{
for (i = 0, y = 1; i < dsc->var_cnt; i++)
if (dsc->vars[i].z_name[0] != '\0')
{
tab_text (t, 1, y++, TAB_LEFT, dsc->vars[i].z_name);
}
}
tab_text (t, 1, y++, TAB_LEFT, dsc->vars[i].z_name);
}
}
- double score = case_num (c, *vars);
- if ( score == SYSMIS
- || (!t->include_user_missing
- && var_is_num_user_missing (*vars, score)))
+ double score = case_num (*c, *vars);
+ if (var_is_num_missing (*vars, score, t->exclude))
- double input = case_num (c, z->src_var);
- double *output = &case_data_rw (c, z->z_var)->f;
+ double input = case_num (*c, z->src_var);
+ double *output = &case_data_rw (*c, z->z_var)->f;
- if (z->mean == SYSMIS || z->std_dev == SYSMIS
- || all_sysmis || input == SYSMIS
- || (!t->include_user_missing
- && var_is_num_user_missing (z->src_var, input)))
+ if (z->mean == SYSMIS || z->std_dev == SYSMIS || all_sysmis
+ || var_is_num_missing (z->src_var, input, t->exclude))
t->z_scores = xnmalloc (cnt, sizeof *t->z_scores);
t->z_score_cnt = cnt;
t->missing_type = dsc->missing_type;
t->z_scores = xnmalloc (cnt, sizeof *t->z_scores);
t->z_score_cnt = cnt;
t->missing_type = dsc->missing_type;
-static bool
-calc_descriptives (const struct ccase *first,
- const struct casefile *cf, void *dsc_,
- const struct dataset *ds)
+static void
+calc_descriptives (struct dsc_proc *dsc, struct casereader *group,
+ struct dataset *ds)
- output_split_file_values (ds, first);
+ c = casereader_peek (group, 0);
+ if (c == NULL)
+ {
+ casereader_destroy (group);
+ return;
+ }
+ output_split_file_values (ds, c);
+ case_unref (c);
+
+ group = casereader_create_filter_weight (group, dataset_dict (ds),
+ NULL, NULL);
+
+ pass1 = group;
+ pass2 = dsc->max_moment <= MOMENT_MEAN ? NULL : casereader_clone (pass1);
- double x = case_num (&c, dv->v);
-
- if (dsc->missing_type != DSC_LISTWISE
- && (x == SYSMIS
- || (!dsc->include_user_missing
- && var_is_num_user_missing (dv->v, x))))
+ double x = case_num (c, dv->v);
+
+ if (var_is_num_missing (dv->v, x, dsc->exclude))
- double x = case_num (&c, dv->v);
-
- if (dsc->missing_type != DSC_LISTWISE
- && (x == SYSMIS
- || (!dsc->include_user_missing
- && var_is_num_user_missing (dv->v, x))))
+ double x = case_num (c, dv->v);
+
+ if (var_is_num_missing (dv->v, x, dsc->exclude))
if (dsc->calc_stats & (1ul << DSC_STDDEV)
&& dv->stats[DSC_VARIANCE] != SYSMIS)
dv->stats[DSC_STDDEV] = sqrt (dv->stats[DSC_VARIANCE]);
if (dsc->calc_stats & (1ul << DSC_STDDEV)
&& dv->stats[DSC_VARIANCE] != SYSMIS)
dv->stats[DSC_STDDEV] = sqrt (dv->stats[DSC_VARIANCE]);
if (dv->stats[DSC_KURTOSIS] != SYSMIS)
dv->stats[DSC_SEKURT] = calc_sekurt (W);
if (dsc->calc_stats & (1ul << DSC_SESKEW)
if (dv->stats[DSC_KURTOSIS] != SYSMIS)
dv->stats[DSC_SEKURT] = calc_sekurt (W);
if (dsc->calc_stats & (1ul << DSC_SESKEW)
}
/* Returns true if any of the descriptives variables in DSC's
variable list have missing values in case C, false otherwise. */
static bool
}
/* Returns true if any of the descriptives variables in DSC's
variable list have missing values in case C, false otherwise. */
static bool
struct dsc_var *dv = &dsc->vars[i];
double x = case_num (c, dv->v);
struct dsc_var *dv = &dsc->vars[i];
double x = case_num (c, dv->v);
- if (x == SYSMIS
- || (!dsc->include_user_missing
- && var_is_num_user_missing (dv->v, x)))
+ if (var_is_num_missing (dv->v, x, dsc->exclude))
tab_box (t, -1, -1, -1, TAL_1, 1, 0, nc - 1, dsc->var_cnt);
tab_hline (t, TAL_2, 0, nc - 1, 1);
tab_vline (t, TAL_2, 1, 0, dsc->var_cnt);
tab_box (t, -1, -1, -1, TAL_1, 1, 0, nc - 1, dsc->var_cnt);
tab_hline (t, TAL_2, 0, nc - 1, 1);
tab_vline (t, TAL_2, 1, 0, dsc->var_cnt);
nc = 0;
tab_text (t, nc++, 0, TAB_LEFT | TAT_TITLE, _("Variable"));
nc = 0;
tab_text (t, nc++, 0, TAB_LEFT | TAT_TITLE, _("Variable"));
nc = 0;
tab_text (t, nc++, i + 1, TAB_LEFT, var_get_name (dv->v));
nc = 0;
tab_text (t, nc++, i + 1, TAB_LEFT, var_get_name (dv->v));
- tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", dv->valid);
+ tab_text_format (t, nc++, i + 1, 0, "%g", dv->valid);
- tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", dv->missing);
+ tab_text_format (t, nc++, i + 1, 0, "%g", dv->missing);
+
- tab_float (t, nc++, i + 1, TAB_NONE, dv->stats[j], 10, 3);
+ tab_double (t, nc++, i + 1, TAB_NONE, dv->stats[j], NULL);
if (dsc->sort_by_stat == DSC_NAME)
result = strcasecmp (var_get_name (a->v), var_get_name (b->v));
if (dsc->sort_by_stat == DSC_NAME)
result = strcasecmp (var_get_name (a->v), var_get_name (b->v));