1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-2000, 2009-2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
24 #include "data/casegrouper.h"
25 #include "data/casereader.h"
26 #include "data/casewriter.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/transformations.h"
30 #include "data/variable.h"
31 #include "language/command.h"
32 #include "language/dictionary/split-file.h"
33 #include "language/lexer/lexer.h"
34 #include "language/lexer/variable-parser.h"
35 #include "libpspp/array.h"
36 #include "libpspp/assertion.h"
37 #include "libpspp/compiler.h"
38 #include "libpspp/i18n.h"
39 #include "libpspp/message.h"
40 #include "math/moments.h"
41 #include "output/pivot-table.h"
43 #include "gl/xalloc.h"
46 #define _(msgid) gettext (msgid)
47 #define N_(msgid) msgid
49 /* DESCRIPTIVES private data. */
53 /* Handling of missing values. */
56 DSC_VARIABLE, /* Handle missing values on a per-variable basis. */
57 DSC_LISTWISE /* Discard entire case if any variable is missing. */
60 /* Describes properties of a distribution for the purpose of
61 calculating a Z-score. */
64 const struct variable *src_var; /* Variable on which z-score is based. */
65 struct variable *z_var; /* New z-score variable. */
66 double mean; /* Distribution mean. */
67 double std_dev; /* Distribution standard deviation. */
70 /* DESCRIPTIVES transformation (for calculating Z-scores). */
73 struct dsc_z_score *z_scores; /* Array of Z-scores. */
74 int n_z_scores; /* Number of Z-scores. */
75 const struct variable **vars; /* Variables for listwise missing checks. */
76 size_t n_vars; /* Number of variables. */
77 enum dsc_missing_type missing_type; /* Treatment of missing values. */
78 enum mv_class exclude; /* Classes of missing values to exclude. */
79 const struct variable *filter; /* Dictionary FILTER BY variable. */
80 struct casereader *z_reader; /* Reader for count, mean, stddev. */
81 casenumber count; /* Number left in this SPLIT FILE group.*/
85 /* Statistics. Used as bit indexes, so must be 32 or fewer. */
88 DSC_MEAN = 0, DSC_SEMEAN, DSC_STDDEV, DSC_VARIANCE, DSC_KURTOSIS,
89 DSC_SEKURT, DSC_SKEWNESS, DSC_SESKEW, DSC_RANGE, DSC_MIN,
90 DSC_MAX, DSC_SUM, DSC_N_STATS,
92 /* Only valid as sort criteria. */
93 DSC_NAME = -2, /* Sort by name. */
94 DSC_NONE = -1 /* Unsorted. */
97 /* Describes one statistic. */
98 struct dsc_statistic_info
100 const char *identifier; /* Identifier. */
101 const char *name; /* Full name. */
102 enum moment moment; /* Highest moment needed to calculate. */
105 /* Table of statistics, indexed by DSC_*. */
106 static const struct dsc_statistic_info dsc_info[DSC_N_STATS] =
108 {"MEAN", N_("Mean"), MOMENT_MEAN},
109 {"SEMEAN", N_("S.E. Mean"), MOMENT_VARIANCE},
110 {"STDDEV", N_("Std Dev"), MOMENT_VARIANCE},
111 {"VARIANCE", N_("Variance"), MOMENT_VARIANCE},
112 {"KURTOSIS", N_("Kurtosis"), MOMENT_KURTOSIS},
113 {"SEKURTOSIS", N_("S.E. Kurt"), MOMENT_NONE},
114 {"SKEWNESS", N_("Skewness"), MOMENT_SKEWNESS},
115 {"SESKEWNESS", N_("S.E. Skew"), MOMENT_NONE},
116 {"RANGE", N_("Range"), MOMENT_NONE},
117 {"MINIMUM", N_("Minimum"), MOMENT_NONE},
118 {"MAXIMUM", N_("Maximum"), MOMENT_NONE},
119 {"SUM", N_("Sum"), MOMENT_MEAN},
122 /* Statistics calculated by default if none are explicitly
124 #define DEFAULT_STATS \
125 ((1ul << DSC_MEAN) | (1ul << DSC_STDDEV) | (1ul << DSC_MIN) \
128 /* A variable specified on DESCRIPTIVES. */
131 const struct variable *v; /* Variable to calculate on. */
132 char *z_name; /* Name for z-score variable. */
133 double valid, missing; /* Valid, missing counts. */
134 struct moments *moments; /* Moments. */
135 double min, max; /* Maximum and mimimum values. */
136 double stats[DSC_N_STATS]; /* All the stats' values. */
139 /* A DESCRIPTIVES procedure. */
142 /* Per-variable info. */
143 struct dictionary *dict; /* Dictionary. */
144 struct dsc_var *vars; /* Variables. */
145 size_t n_vars; /* Number of variables. */
148 enum dsc_missing_type missing_type; /* Treatment of missing values. */
149 enum mv_class exclude; /* Classes of missing values to exclude. */
151 /* Accumulated results. */
152 double missing_listwise; /* Sum of weights of cases missing listwise. */
153 double valid; /* Sum of weights of valid cases. */
154 bool bad_warn; /* Warn if bad weight found. */
155 enum dsc_statistic sort_by_stat; /* Statistic to sort by; -1: name. */
156 int sort_ascending; /* !0: ascending order; 0: descending. */
157 unsigned long show_stats; /* Statistics to display. */
158 unsigned long calc_stats; /* Statistics to calculate. */
159 enum moment max_moment; /* Highest moment needed for stats. */
162 struct casewriter *z_writer; /* Mean and stddev per SPLIT FILE group. */
166 static enum dsc_statistic match_statistic (struct lexer *);
167 static void free_dsc_proc (struct dsc_proc *);
169 /* Z-score functions. */
170 static bool try_name (const struct dictionary *dict,
171 struct dsc_proc *dsc, const char *name);
172 static char *generate_z_varname (const struct dictionary *dict,
173 struct dsc_proc *dsc,
174 const char *name, int *n_zs);
175 static void dump_z_table (struct dsc_proc *);
176 static void setup_z_trns (struct dsc_proc *, struct dataset *);
178 /* Procedure execution functions. */
179 static void calc_descriptives (struct dsc_proc *, struct casereader *,
181 static void display (struct dsc_proc *dsc);
183 /* Parser and outline. */
185 /* Handles DESCRIPTIVES. */
187 cmd_descriptives (struct lexer *lexer, struct dataset *ds)
189 struct dictionary *dict = dataset_dict (ds);
190 struct dsc_proc *dsc;
191 const struct variable **vars = NULL;
193 int save_z_scores = 0;
198 struct casegrouper *grouper;
199 struct casereader *group;
201 /* Create and initialize dsc. */
202 dsc = xmalloc (sizeof *dsc);
206 dsc->missing_type = DSC_VARIABLE;
207 dsc->exclude = MV_ANY;
208 dsc->missing_listwise = 0.;
211 dsc->sort_by_stat = DSC_NONE;
212 dsc->sort_ascending = 1;
213 dsc->show_stats = dsc->calc_stats = DEFAULT_STATS;
214 dsc->z_writer = NULL;
216 /* Parse DESCRIPTIVES. */
217 while (lex_token (lexer) != T_ENDCMD)
219 if (lex_match_id (lexer, "MISSING"))
221 lex_match (lexer, T_EQUALS);
222 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
224 if (lex_match_id (lexer, "VARIABLE"))
225 dsc->missing_type = DSC_VARIABLE;
226 else if (lex_match_id (lexer, "LISTWISE"))
227 dsc->missing_type = DSC_LISTWISE;
228 else if (lex_match_id (lexer, "INCLUDE"))
229 dsc->exclude = MV_SYSTEM;
232 lex_error (lexer, NULL);
235 lex_match (lexer, T_COMMA);
238 else if (lex_match_id (lexer, "SAVE"))
240 else if (lex_match_id (lexer, "FORMAT"))
242 lex_match (lexer, T_EQUALS);
243 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
245 if (lex_match_id (lexer, "LABELS")
246 || lex_match_id (lexer, "NOLABELS")
247 || lex_match_id (lexer, "INDEX")
248 || lex_match_id (lexer, "NOINDEX")
249 || lex_match_id (lexer, "LINE")
250 || lex_match_id (lexer, "SERIAL"))
256 lex_error (lexer, NULL);
259 lex_match (lexer, T_COMMA);
262 else if (lex_match_id (lexer, "STATISTICS"))
264 lex_match (lexer, T_EQUALS);
266 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
268 if (lex_match (lexer, T_ALL))
269 dsc->show_stats |= (1ul << DSC_N_STATS) - 1;
270 else if (lex_match_id (lexer, "DEFAULT"))
271 dsc->show_stats |= DEFAULT_STATS;
274 enum dsc_statistic s = match_statistic (lexer);
277 lex_error (lexer, NULL);
280 dsc->show_stats |= 1ul << s;
282 lex_match (lexer, T_COMMA);
284 if (dsc->show_stats == 0)
285 dsc->show_stats = DEFAULT_STATS;
287 else if (lex_match_id (lexer, "SORT"))
289 lex_match (lexer, T_EQUALS);
290 if (lex_match_id (lexer, "NAME"))
291 dsc->sort_by_stat = DSC_NAME;
294 dsc->sort_by_stat = match_statistic (lexer);
295 if (dsc->sort_by_stat == DSC_NONE)
296 dsc->sort_by_stat = DSC_MEAN;
298 if (lex_match (lexer, T_LPAREN))
300 if (lex_match_id (lexer, "A"))
301 dsc->sort_ascending = 1;
302 else if (lex_match_id (lexer, "D"))
303 dsc->sort_ascending = 0;
305 lex_error (lexer, NULL);
306 if (! lex_force_match (lexer, T_RPAREN))
310 else if (n_vars == 0)
312 if (lex_next_token (lexer, 1) == T_EQUALS)
314 lex_match_id (lexer, "VARIABLES");
315 lex_match (lexer, T_EQUALS);
318 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
322 if (!parse_variables_const (lexer, dict, &vars, &n_vars,
323 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
326 dsc->vars = xnrealloc ((void *)dsc->vars, n_vars, sizeof *dsc->vars);
327 for (i = dsc->n_vars; i < n_vars; i++)
329 struct dsc_var *dv = &dsc->vars[i];
334 dsc->n_vars = n_vars;
336 if (lex_match (lexer, T_LPAREN))
338 if (lex_token (lexer) != T_ID)
340 lex_error (lexer, NULL);
343 if (try_name (dict, dsc, lex_tokcstr (lexer)))
345 struct dsc_var *dsc_var = &dsc->vars[dsc->n_vars - 1];
346 dsc_var->z_name = xstrdup (lex_tokcstr (lexer));
350 msg (SE, _("Z-score variable name %s would be"
351 " a duplicate variable name."), lex_tokcstr (lexer));
353 if (!lex_force_match (lexer, T_RPAREN))
360 lex_error (lexer, NULL);
364 lex_match (lexer, T_SLASH);
368 msg (SE, _("No variables specified."));
372 /* Construct z-score varnames, show translation table. */
373 if (n_zs || save_z_scores)
375 struct caseproto *proto;
381 for (i = 0; i < dsc->n_vars; i++)
383 struct dsc_var *dsc_var = &dsc->vars[i];
384 if (dsc_var->z_name == NULL)
386 const char *name = var_get_name (dsc_var->v);
387 dsc_var->z_name = generate_z_varname (dict, dsc, name,
389 if (dsc_var->z_name == NULL)
397 /* It would be better to handle Z scores correctly (however we define
398 that) when TEMPORARY is in effect, but in the meantime this at least
399 prevents a use-after-free error. See bug #38786. */
400 if (proc_make_temporary_transformations_permanent (ds))
401 msg (SW, _("DESCRIPTIVES with Z scores ignores TEMPORARY. "
402 "Temporary transformations will be made permanent."));
404 proto = caseproto_create ();
405 for (i = 0; i < 1 + 2 * n_zs; i++)
406 proto = caseproto_add_width (proto, 0);
407 dsc->z_writer = autopaging_writer_create (proto);
408 caseproto_unref (proto);
413 /* Figure out statistics to display. */
414 if (dsc->show_stats & (1ul << DSC_SKEWNESS))
415 dsc->show_stats |= 1ul << DSC_SESKEW;
416 if (dsc->show_stats & (1ul << DSC_KURTOSIS))
417 dsc->show_stats |= 1ul << DSC_SEKURT;
419 /* Figure out which statistics to calculate. */
420 dsc->calc_stats = dsc->show_stats;
422 dsc->calc_stats |= (1ul << DSC_MEAN) | (1ul << DSC_STDDEV);
423 if (dsc->sort_by_stat >= 0)
424 dsc->calc_stats |= 1ul << dsc->sort_by_stat;
425 if (dsc->show_stats & (1ul << DSC_SESKEW))
426 dsc->calc_stats |= 1ul << DSC_SKEWNESS;
427 if (dsc->show_stats & (1ul << DSC_SEKURT))
428 dsc->calc_stats |= 1ul << DSC_KURTOSIS;
430 /* Figure out maximum moment needed and allocate moments for
432 dsc->max_moment = MOMENT_NONE;
433 for (i = 0; i < DSC_N_STATS; i++)
434 if (dsc->calc_stats & (1ul << i) && dsc_info[i].moment > dsc->max_moment)
435 dsc->max_moment = dsc_info[i].moment;
436 if (dsc->max_moment != MOMENT_NONE)
437 for (i = 0; i < dsc->n_vars; i++)
438 dsc->vars[i].moments = moments_create (dsc->max_moment);
441 grouper = casegrouper_create_splits (proc_open_filtering (ds, false), dict);
442 while (casegrouper_get_next_group (grouper, &group))
443 calc_descriptives (dsc, group, ds);
444 ok = casegrouper_destroy (grouper);
445 ok = proc_commit (ds) && ok;
449 setup_z_trns (dsc, ds);
454 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
462 /* Returns the statistic named by the current token and skips past the token.
463 Returns DSC_NONE if no statistic is given (e.g., subcommand with no
464 specifiers). Emits an error if the current token ID does not name a
466 static enum dsc_statistic
467 match_statistic (struct lexer *lexer)
469 if (lex_token (lexer) == T_ID)
471 enum dsc_statistic stat;
473 for (stat = 0; stat < DSC_N_STATS; stat++)
474 if (lex_match_id (lexer, dsc_info[stat].identifier))
477 lex_error (lexer, _("expecting statistic name: reverting to default"));
486 free_dsc_proc (struct dsc_proc *dsc)
493 for (i = 0; i < dsc->n_vars; i++)
495 struct dsc_var *dsc_var = &dsc->vars[i];
496 free (dsc_var->z_name);
497 moments_destroy (dsc_var->moments);
499 casewriter_destroy (dsc->z_writer);
506 /* Returns false if NAME is a duplicate of any existing variable name or
507 of any previously-declared z-var name; otherwise returns true. */
509 try_name (const struct dictionary *dict, struct dsc_proc *dsc,
514 if (dict_lookup_var (dict, name) != NULL)
516 for (i = 0; i < dsc->n_vars; i++)
518 struct dsc_var *dsc_var = &dsc->vars[i];
519 if (dsc_var->z_name != NULL && !utf8_strcasecmp (dsc_var->z_name, name))
525 /* Generates a name for a Z-score variable based on a variable
526 named VAR_NAME, given that *Z_CNT generated variable names are
527 known to already exist. If successful, returns the new name
528 as a dynamically allocated string. On failure, returns NULL. */
530 generate_z_varname (const struct dictionary *dict, struct dsc_proc *dsc,
531 const char *var_name, int *n_zs)
533 char *z_name, *trunc_name;
535 /* Try a name based on the original variable name. */
536 z_name = xasprintf ("Z%s", var_name);
537 trunc_name = utf8_encoding_trunc (z_name, dict_get_encoding (dict),
540 if (try_name (dict, dsc, trunc_name))
544 /* Generate a synthetic name. */
552 sprintf (name, "ZSC%03d", *n_zs);
553 else if (*n_zs <= 108)
554 sprintf (name, "STDZ%02d", *n_zs - 99);
555 else if (*n_zs <= 117)
556 sprintf (name, "ZZZZ%02d", *n_zs - 108);
557 else if (*n_zs <= 126)
558 sprintf (name, "ZQZQ%02d", *n_zs - 117);
561 msg (SE, _("Ran out of generic names for Z-score variables. "
562 "There are only 126 generic names: ZSC001-ZSC0999, "
563 "STDZ01-STDZ09, ZZZZ01-ZZZZ09, ZQZQ01-ZQZQ09."));
567 if (try_name (dict, dsc, name))
568 return xstrdup (name);
573 /* Outputs a table describing the mapping between source
574 variables and Z-score variables. */
576 dump_z_table (struct dsc_proc *dsc)
578 struct pivot_table *table = pivot_table_create (
579 N_("Mapping of Variables to Z-scores"));
581 pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Names"),
582 N_("Source"), N_("Target"));
584 struct pivot_dimension *names = pivot_dimension_create (
585 table, PIVOT_AXIS_ROW, N_("Variables"));
586 names->hide_all_labels = true;
588 for (size_t i = 0; i < dsc->n_vars; i++)
589 if (dsc->vars[i].z_name != NULL)
591 int row = pivot_category_create_leaf (names->root,
592 pivot_value_new_number (i));
594 pivot_table_put2 (table, 0, row,
595 pivot_value_new_variable (dsc->vars[i].v));
596 pivot_table_put2 (table, 1, row,
597 pivot_value_new_user_text (dsc->vars[i].z_name, -1));
600 pivot_table_submit (table);
604 descriptives_set_all_sysmis_zscores (const struct dsc_trns *t, struct ccase *c)
606 const struct dsc_z_score *z;
608 for (z = t->z_scores; z < t->z_scores + t->n_z_scores; z++)
609 *case_num_rw (c, z->z_var) = SYSMIS;
612 /* Transformation function to calculate Z-scores. Will return SYSMIS if any of
613 the following are true: 1) mean or standard deviation is SYSMIS 2) score is
614 SYSMIS 3) score is user missing and they were not included in the original
615 analyis. 4) any of the variables in the original analysis were missing
616 (either system or user-missing values that weren't included).
618 static enum trns_result
619 descriptives_trns_proc (void *trns_, struct ccase **c,
620 casenumber case_idx UNUSED)
622 struct dsc_trns *t = trns_;
623 struct dsc_z_score *z;
624 const struct variable **vars;
626 *c = case_unshare (*c);
630 double f = case_num (*c, t->filter);
631 if (f == 0.0 || var_is_num_missing (t->filter, f))
633 descriptives_set_all_sysmis_zscores (t, *c);
634 return TRNS_CONTINUE;
640 struct ccase *z_case;
642 z_case = casereader_read (t->z_reader);
647 t->count = case_num_idx (z_case, z_idx++);
648 for (z = t->z_scores; z < t->z_scores + t->n_z_scores; z++)
650 z->mean = case_num_idx (z_case, z_idx++);
651 z->std_dev = case_num_idx (z_case, z_idx++);
659 msg (SE, _("Internal error processing Z scores. "
660 "Please report this to %s."),
664 descriptives_set_all_sysmis_zscores (t, *c);
665 return TRNS_CONTINUE;
670 if (t->missing_type == DSC_LISTWISE)
673 for (vars = t->vars; vars < t->vars + t->n_vars; vars++)
675 double score = case_num (*c, *vars);
676 if (var_is_num_missing (*vars, score) & t->exclude)
678 descriptives_set_all_sysmis_zscores (t, *c);
679 return TRNS_CONTINUE;
684 for (z = t->z_scores; z < t->z_scores + t->n_z_scores; z++)
686 double input = case_num (*c, z->src_var);
687 double *output = case_num_rw (*c, z->z_var);
689 if (z->mean == SYSMIS || z->std_dev == SYSMIS
690 || var_is_num_missing (z->src_var, input) & t->exclude)
693 *output = (input - z->mean) / z->std_dev;
695 return TRNS_CONTINUE;
698 /* Frees a descriptives_trns struct. */
700 descriptives_trns_free (void *trns_)
702 struct dsc_trns *t = trns_;
703 bool ok = t->ok && !casereader_error (t->z_reader);
706 casereader_destroy (t->z_reader);
707 assert((t->missing_type != DSC_LISTWISE) ^ (t->vars != NULL));
714 static const struct trns_class descriptives_trns_class = {
715 .name = "DESCRIPTIVES (Z scores)",
716 .execute = descriptives_trns_proc,
717 .destroy = descriptives_trns_free,
720 /* Sets up a transformation to calculate Z scores. */
722 setup_z_trns (struct dsc_proc *dsc, struct dataset *ds)
727 for (n = i = 0; i < dsc->n_vars; i++)
728 if (dsc->vars[i].z_name != NULL)
731 t = xmalloc (sizeof *t);
732 t->z_scores = xnmalloc (n, sizeof *t->z_scores);
734 t->missing_type = dsc->missing_type;
735 t->exclude = dsc->exclude;
736 if (t->missing_type == DSC_LISTWISE)
738 t->n_vars = dsc->n_vars;
739 t->vars = xnmalloc (t->n_vars, sizeof *t->vars);
740 for (i = 0; i < t->n_vars; i++)
741 t->vars[i] = dsc->vars[i].v;
748 t->filter = dict_get_filter (dataset_dict (ds));
749 t->z_reader = casewriter_make_reader (dsc->z_writer);
752 dsc->z_writer = NULL;
754 for (n = i = 0; i < dsc->n_vars; i++)
756 struct dsc_var *dv = &dsc->vars[i];
757 if (dv->z_name != NULL)
759 struct dsc_z_score *z;
760 struct variable *dst_var;
763 dst_var = dict_create_var_assert (dataset_dict (ds), dv->z_name, 0);
765 label = xasprintf (_("Z-score of %s"),var_to_string (dv->v));
766 var_set_label (dst_var, label);
769 z = &t->z_scores[n++];
775 add_transformation (ds, &descriptives_trns_class, t);
778 /* Statistical calculation. */
780 static bool listwise_missing (struct dsc_proc *dsc, const struct ccase *c);
782 /* Calculates and displays descriptive statistics for the cases
785 calc_descriptives (struct dsc_proc *dsc, struct casereader *group,
788 const struct variable *filter = dict_get_filter (dataset_dict (ds));
789 struct casereader *pass1, *pass2;
795 c = casereader_peek (group, 0);
798 casereader_destroy (group);
801 output_split_file_values (ds, c);
804 group = casereader_create_filter_weight (group, dataset_dict (ds),
808 pass2 = dsc->max_moment <= MOMENT_MEAN ? NULL : casereader_clone (pass1);
810 for (i = 0; i < dsc->n_vars; i++)
812 struct dsc_var *dv = &dsc->vars[i];
814 dv->valid = dv->missing = 0.0;
815 if (dv->moments != NULL)
816 moments_clear (dv->moments);
820 dsc->missing_listwise = 0.;
823 /* First pass to handle most of the work. */
825 for (; (c = casereader_read (pass1)) != NULL; case_unref (c))
827 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
831 double f = case_num (c, filter);
832 if (f == 0.0 || var_is_num_missing (filter, f))
836 /* Check for missing values. */
837 if (listwise_missing (dsc, c))
839 dsc->missing_listwise += weight;
840 if (dsc->missing_type == DSC_LISTWISE)
843 dsc->valid += weight;
845 for (i = 0; i < dsc->n_vars; i++)
847 struct dsc_var *dv = &dsc->vars[i];
848 double x = case_num (c, dv->v);
850 if (var_is_num_missing (dv->v, x) & dsc->exclude)
852 dv->missing += weight;
856 if (dv->moments != NULL)
857 moments_pass_one (dv->moments, x, weight);
867 if (!casereader_destroy (pass1))
869 casereader_destroy (pass2);
873 /* Second pass for higher-order moments. */
874 if (dsc->max_moment > MOMENT_MEAN)
876 for (; (c = casereader_read (pass2)) != NULL; case_unref (c))
878 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
882 double f = case_num (c, filter);
883 if (f == 0.0 || var_is_num_missing (filter, f))
887 /* Check for missing values. */
888 if (dsc->missing_type == DSC_LISTWISE && listwise_missing (dsc, c))
891 for (i = 0; i < dsc->n_vars; i++)
893 struct dsc_var *dv = &dsc->vars[i];
894 double x = case_num (c, dv->v);
896 if (var_is_num_missing (dv->v, x) & dsc->exclude)
899 if (dv->moments != NULL)
900 moments_pass_two (dv->moments, x, weight);
903 if (!casereader_destroy (pass2))
907 /* Calculate results. */
908 if (dsc->z_writer && count > 0)
910 c = case_create (casewriter_get_proto (dsc->z_writer));
912 *case_num_rw_idx (c, z_idx++) = count;
917 for (i = 0; i < dsc->n_vars; i++)
919 struct dsc_var *dv = &dsc->vars[i];
923 for (j = 0; j < DSC_N_STATS; j++)
924 dv->stats[j] = SYSMIS;
926 dv->valid = W = dsc->valid - dv->missing;
928 if (dv->moments != NULL)
929 moments_calculate (dv->moments, NULL,
930 &dv->stats[DSC_MEAN], &dv->stats[DSC_VARIANCE],
931 &dv->stats[DSC_SKEWNESS], &dv->stats[DSC_KURTOSIS]);
932 if (dsc->calc_stats & (1ul << DSC_SEMEAN)
933 && dv->stats[DSC_VARIANCE] != SYSMIS && W > 0.)
934 dv->stats[DSC_SEMEAN] = sqrt (dv->stats[DSC_VARIANCE]) / sqrt (W);
935 if (dsc->calc_stats & (1ul << DSC_STDDEV)
936 && dv->stats[DSC_VARIANCE] != SYSMIS)
937 dv->stats[DSC_STDDEV] = sqrt (dv->stats[DSC_VARIANCE]);
938 if (dsc->calc_stats & (1ul << DSC_SEKURT))
939 if (dv->stats[DSC_KURTOSIS] != SYSMIS)
940 dv->stats[DSC_SEKURT] = calc_sekurt (W);
941 if (dsc->calc_stats & (1ul << DSC_SESKEW)
942 && dv->stats[DSC_SKEWNESS] != SYSMIS)
943 dv->stats[DSC_SESKEW] = calc_seskew (W);
944 dv->stats[DSC_RANGE] = ((dv->min == DBL_MAX || dv->max == -DBL_MAX)
945 ? SYSMIS : dv->max - dv->min);
946 dv->stats[DSC_MIN] = dv->min == DBL_MAX ? SYSMIS : dv->min;
947 dv->stats[DSC_MAX] = dv->max == -DBL_MAX ? SYSMIS : dv->max;
948 if (dsc->calc_stats & (1ul << DSC_SUM))
949 dv->stats[DSC_SUM] = W * dv->stats[DSC_MEAN];
951 if (dv->z_name && c != NULL)
953 *case_num_rw_idx (c, z_idx++) = dv->stats[DSC_MEAN];
954 *case_num_rw_idx (c, z_idx++) = dv->stats[DSC_STDDEV];
959 casewriter_write (dsc->z_writer, c);
961 /* Output results. */
965 /* Returns true if any of the descriptives variables in DSC's
966 variable list have missing values in case C, false otherwise. */
968 listwise_missing (struct dsc_proc *dsc, const struct ccase *c)
972 for (i = 0; i < dsc->n_vars; i++)
974 struct dsc_var *dv = &dsc->vars[i];
975 double x = case_num (c, dv->v);
977 if (var_is_num_missing (dv->v, x) & dsc->exclude)
983 /* Statistical display. */
985 static algo_compare_func descriptives_compare_dsc_vars;
987 /* Displays a table of descriptive statistics for DSC. */
989 display (struct dsc_proc *dsc)
991 struct pivot_table *table = pivot_table_create (
992 N_("Descriptive Statistics"));
993 pivot_table_set_weight_var (table, dict_get_weight (dsc->dict));
995 struct pivot_dimension *statistics = pivot_dimension_create (
996 table, PIVOT_AXIS_COLUMN, N_("Statistics"));
997 pivot_category_create_leaf_rc (
998 statistics->root, pivot_value_new_text (N_("N")), PIVOT_RC_COUNT);
999 for (int i = 0; i < DSC_N_STATS; i++)
1000 if (dsc->show_stats & (1ul << i))
1001 pivot_category_create_leaf (statistics->root,
1002 pivot_value_new_text (dsc_info[i].name));
1004 if (dsc->sort_by_stat != DSC_NONE)
1005 sort (dsc->vars, dsc->n_vars, sizeof *dsc->vars,
1006 descriptives_compare_dsc_vars, dsc);
1008 struct pivot_dimension *variables = pivot_dimension_create (
1009 table, PIVOT_AXIS_ROW, N_("Variable"));
1010 for (size_t i = 0; i < dsc->n_vars; i++)
1012 const struct dsc_var *dv = &dsc->vars[i];
1014 int row = pivot_category_create_leaf (variables->root,
1015 pivot_value_new_variable (dv->v));
1018 pivot_table_put2 (table, column++, row,
1019 pivot_value_new_number (dv->valid));
1021 for (int j = 0; j < DSC_N_STATS; j++)
1022 if (dsc->show_stats & (1ul << j))
1024 union value v = { .f = dv->stats[j] };
1025 struct pivot_value *pv = (j == DSC_MIN || j == DSC_MAX
1026 ? pivot_value_new_var_value (dv->v, &v)
1027 : pivot_value_new_number (dv->stats[j]));
1028 pivot_table_put2 (table, column++, row, pv);
1032 int row = pivot_category_create_leaves (
1033 variables->root, N_("Valid N (listwise)"), N_("Missing N (listwise)"));
1034 pivot_table_put2 (table, 0, row, pivot_value_new_number (dsc->valid));
1035 pivot_table_put2 (table, 0, row + 1,
1036 pivot_value_new_number (dsc->missing_listwise));
1037 pivot_table_submit (table);
1040 /* Compares `struct dsc_var's A and B according to the ordering
1041 specified by CMD. */
1043 descriptives_compare_dsc_vars (const void *a_, const void *b_, const void *dsc_)
1045 const struct dsc_var *a = a_;
1046 const struct dsc_var *b = b_;
1047 const struct dsc_proc *dsc = dsc_;
1051 if (dsc->sort_by_stat == DSC_NAME)
1052 result = utf8_strcasecmp (var_get_name (a->v), var_get_name (b->v));
1055 double as = a->stats[dsc->sort_by_stat];
1056 double bs = b->stats[dsc->sort_by_stat];
1058 result = as < bs ? -1 : as > bs;
1061 if (!dsc->sort_ascending)