1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-2000, 2009-2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
24 #include "data/casegrouper.h"
25 #include "data/casereader.h"
26 #include "data/casewriter.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/transformations.h"
30 #include "data/variable.h"
31 #include "language/command.h"
32 #include "language/dictionary/split-file.h"
33 #include "language/lexer/lexer.h"
34 #include "language/lexer/variable-parser.h"
35 #include "libpspp/array.h"
36 #include "libpspp/assertion.h"
37 #include "libpspp/compiler.h"
38 #include "libpspp/i18n.h"
39 #include "libpspp/message.h"
40 #include "math/moments.h"
41 #include "output/pivot-table.h"
43 #include "gl/xalloc.h"
46 #define _(msgid) gettext (msgid)
47 #define N_(msgid) msgid
49 /* DESCRIPTIVES private data. */
53 /* Handling of missing values. */
56 DSC_VARIABLE, /* Handle missing values on a per-variable basis. */
57 DSC_LISTWISE /* Discard entire case if any variable is missing. */
60 /* Describes properties of a distribution for the purpose of
61 calculating a Z-score. */
64 const struct variable *src_var; /* Variable on which z-score is based. */
65 struct variable *z_var; /* New z-score variable. */
66 double mean; /* Distribution mean. */
67 double std_dev; /* Distribution standard deviation. */
70 /* DESCRIPTIVES transformation (for calculating Z-scores). */
73 struct dsc_z_score *z_scores; /* Array of Z-scores. */
74 int n_z_scores; /* Number of Z-scores. */
75 const struct variable **vars; /* Variables for listwise missing checks. */
76 size_t n_vars; /* Number of variables. */
77 enum dsc_missing_type missing_type; /* Treatment of missing values. */
78 enum mv_class exclude; /* Classes of missing values to exclude. */
79 const struct variable *filter; /* Dictionary FILTER BY variable. */
80 struct casereader *z_reader; /* Reader for count, mean, stddev. */
81 casenumber count; /* Number left in this SPLIT FILE group.*/
85 /* Statistics. Used as bit indexes, so must be 32 or fewer. */
88 DSC_MEAN = 0, DSC_SEMEAN, DSC_STDDEV, DSC_VARIANCE, DSC_KURTOSIS,
89 DSC_SEKURT, DSC_SKEWNESS, DSC_SESKEW, DSC_RANGE, DSC_MIN,
90 DSC_MAX, DSC_SUM, DSC_N_STATS,
92 /* Only valid as sort criteria. */
93 DSC_NAME = -2, /* Sort by name. */
94 DSC_NONE = -1 /* Unsorted. */
97 /* Describes one statistic. */
98 struct dsc_statistic_info
100 const char *identifier; /* Identifier. */
101 const char *name; /* Full name. */
102 enum moment moment; /* Highest moment needed to calculate. */
105 /* Table of statistics, indexed by DSC_*. */
106 static const struct dsc_statistic_info dsc_info[DSC_N_STATS] =
108 {"MEAN", N_("Mean"), MOMENT_MEAN},
109 {"SEMEAN", N_("S.E. Mean"), MOMENT_VARIANCE},
110 {"STDDEV", N_("Std Dev"), MOMENT_VARIANCE},
111 {"VARIANCE", N_("Variance"), MOMENT_VARIANCE},
112 {"KURTOSIS", N_("Kurtosis"), MOMENT_KURTOSIS},
113 {"SEKURTOSIS", N_("S.E. Kurt"), MOMENT_NONE},
114 {"SKEWNESS", N_("Skewness"), MOMENT_SKEWNESS},
115 {"SESKEWNESS", N_("S.E. Skew"), MOMENT_NONE},
116 {"RANGE", N_("Range"), MOMENT_NONE},
117 {"MINIMUM", N_("Minimum"), MOMENT_NONE},
118 {"MAXIMUM", N_("Maximum"), MOMENT_NONE},
119 {"SUM", N_("Sum"), MOMENT_MEAN},
122 /* Statistics calculated by default if none are explicitly
124 #define DEFAULT_STATS \
125 ((1ul << DSC_MEAN) | (1ul << DSC_STDDEV) | (1ul << DSC_MIN) \
128 /* A variable specified on DESCRIPTIVES. */
131 const struct variable *v; /* Variable to calculate on. */
132 char *z_name; /* Name for z-score variable. */
133 double valid, missing; /* Valid, missing counts. */
134 struct moments *moments; /* Moments. */
135 double min, max; /* Maximum and mimimum values. */
136 double stats[DSC_N_STATS]; /* All the stats' values. */
139 /* A DESCRIPTIVES procedure. */
142 /* Per-variable info. */
143 struct dictionary *dict; /* Dictionary. */
144 struct dsc_var *vars; /* Variables. */
145 size_t n_vars; /* Number of variables. */
148 enum dsc_missing_type missing_type; /* Treatment of missing values. */
149 enum mv_class exclude; /* Classes of missing values to exclude. */
151 /* Accumulated results. */
152 double missing_listwise; /* Sum of weights of cases missing listwise. */
153 double valid; /* Sum of weights of valid cases. */
154 bool bad_warn; /* Warn if bad weight found. */
155 enum dsc_statistic sort_by_stat; /* Statistic to sort by; -1: name. */
156 int sort_ascending; /* !0: ascending order; 0: descending. */
157 unsigned long show_stats; /* Statistics to display. */
158 unsigned long calc_stats; /* Statistics to calculate. */
159 enum moment max_moment; /* Highest moment needed for stats. */
162 struct casewriter *z_writer; /* Mean and stddev per SPLIT FILE group. */
166 static enum dsc_statistic match_statistic (struct lexer *);
167 static void free_dsc_proc (struct dsc_proc *);
169 /* Z-score functions. */
170 static bool try_name (const struct dictionary *dict,
171 struct dsc_proc *dsc, const char *name);
172 static char *generate_z_varname (const struct dictionary *dict,
173 struct dsc_proc *dsc,
174 const char *name, int *n_zs);
175 static void dump_z_table (struct dsc_proc *);
176 static void setup_z_trns (struct dsc_proc *, struct dataset *);
178 /* Procedure execution functions. */
179 static void calc_descriptives (struct dsc_proc *, struct casereader *,
181 static void display (struct dsc_proc *dsc);
183 /* Parser and outline. */
185 /* Handles DESCRIPTIVES. */
187 cmd_descriptives (struct lexer *lexer, struct dataset *ds)
189 struct dictionary *dict = dataset_dict (ds);
190 struct dsc_proc *dsc;
191 const struct variable **vars = NULL;
193 int save_z_scores = 0;
198 struct casegrouper *grouper;
199 struct casereader *group;
201 /* Create and initialize dsc. */
202 dsc = xmalloc (sizeof *dsc);
206 dsc->missing_type = DSC_VARIABLE;
207 dsc->exclude = MV_ANY;
208 dsc->missing_listwise = 0.;
211 dsc->sort_by_stat = DSC_NONE;
212 dsc->sort_ascending = 1;
213 dsc->show_stats = dsc->calc_stats = DEFAULT_STATS;
214 dsc->z_writer = NULL;
216 /* Parse DESCRIPTIVES. */
217 while (lex_token (lexer) != T_ENDCMD)
219 if (lex_match_id (lexer, "MISSING"))
221 lex_match (lexer, T_EQUALS);
222 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
224 if (lex_match_id (lexer, "VARIABLE"))
225 dsc->missing_type = DSC_VARIABLE;
226 else if (lex_match_id (lexer, "LISTWISE"))
227 dsc->missing_type = DSC_LISTWISE;
228 else if (lex_match_id (lexer, "INCLUDE"))
229 dsc->exclude = MV_SYSTEM;
232 lex_error (lexer, NULL);
235 lex_match (lexer, T_COMMA);
238 else if (lex_match_id (lexer, "SAVE"))
240 else if (lex_match_id (lexer, "FORMAT"))
242 lex_match (lexer, T_EQUALS);
243 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
245 if (lex_match_id (lexer, "LABELS")
246 || lex_match_id (lexer, "NOLABELS")
247 || lex_match_id (lexer, "INDEX")
248 || lex_match_id (lexer, "NOINDEX")
249 || lex_match_id (lexer, "LINE")
250 || lex_match_id (lexer, "SERIAL"))
256 lex_error (lexer, NULL);
259 lex_match (lexer, T_COMMA);
262 else if (lex_match_id (lexer, "STATISTICS"))
264 lex_match (lexer, T_EQUALS);
266 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
268 if (lex_match (lexer, T_ALL))
269 dsc->show_stats |= (1ul << DSC_N_STATS) - 1;
270 else if (lex_match_id (lexer, "DEFAULT"))
271 dsc->show_stats |= DEFAULT_STATS;
274 enum dsc_statistic s = match_statistic (lexer);
277 lex_error (lexer, NULL);
280 dsc->show_stats |= 1ul << s;
282 lex_match (lexer, T_COMMA);
284 if (dsc->show_stats == 0)
285 dsc->show_stats = DEFAULT_STATS;
287 else if (lex_match_id (lexer, "SORT"))
289 lex_match (lexer, T_EQUALS);
290 if (lex_match_id (lexer, "NAME"))
291 dsc->sort_by_stat = DSC_NAME;
294 dsc->sort_by_stat = match_statistic (lexer);
295 if (dsc->sort_by_stat == DSC_NONE)
296 dsc->sort_by_stat = DSC_MEAN;
298 if (lex_match (lexer, T_LPAREN))
300 if (lex_match_id (lexer, "A"))
301 dsc->sort_ascending = 1;
302 else if (lex_match_id (lexer, "D"))
303 dsc->sort_ascending = 0;
305 lex_error (lexer, NULL);
306 if (! lex_force_match (lexer, T_RPAREN))
310 else if (n_vars == 0)
312 if (lex_next_token (lexer, 1) == T_EQUALS)
314 lex_match_id (lexer, "VARIABLES");
315 lex_match (lexer, T_EQUALS);
318 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
322 if (!parse_variables_const (lexer, dict, &vars, &n_vars,
323 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
326 dsc->vars = xnrealloc ((void *)dsc->vars, n_vars, sizeof *dsc->vars);
327 for (i = dsc->n_vars; i < n_vars; i++)
329 struct dsc_var *dv = &dsc->vars[i];
334 dsc->n_vars = n_vars;
336 if (lex_match (lexer, T_LPAREN))
338 if (lex_token (lexer) != T_ID)
340 lex_error (lexer, NULL);
343 if (try_name (dict, dsc, lex_tokcstr (lexer)))
345 struct dsc_var *dsc_var = &dsc->vars[dsc->n_vars - 1];
346 dsc_var->z_name = xstrdup (lex_tokcstr (lexer));
350 msg (SE, _("Z-score variable name %s would be"
351 " a duplicate variable name."), lex_tokcstr (lexer));
353 if (!lex_force_match (lexer, T_RPAREN))
360 lex_error (lexer, NULL);
364 lex_match (lexer, T_SLASH);
368 msg (SE, _("No variables specified."));
372 /* Construct z-score varnames, show translation table. */
373 if (n_zs || save_z_scores)
375 struct caseproto *proto;
381 for (i = 0; i < dsc->n_vars; i++)
383 struct dsc_var *dsc_var = &dsc->vars[i];
384 if (dsc_var->z_name == NULL)
386 const char *name = var_get_name (dsc_var->v);
387 dsc_var->z_name = generate_z_varname (dict, dsc, name,
389 if (dsc_var->z_name == NULL)
397 /* It would be better to handle Z scores correctly (however we define
398 that) when TEMPORARY is in effect, but in the meantime this at least
399 prevents a use-after-free error. See bug #38786. */
400 if (proc_make_temporary_transformations_permanent (ds))
401 msg (SW, _("DESCRIPTIVES with Z scores ignores TEMPORARY. "
402 "Temporary transformations will be made permanent."));
404 proto = caseproto_create ();
405 for (i = 0; i < 1 + 2 * n_zs; i++)
406 proto = caseproto_add_width (proto, 0);
407 dsc->z_writer = autopaging_writer_create (proto);
408 caseproto_unref (proto);
413 /* Figure out statistics to display. */
414 if (dsc->show_stats & (1ul << DSC_SKEWNESS))
415 dsc->show_stats |= 1ul << DSC_SESKEW;
416 if (dsc->show_stats & (1ul << DSC_KURTOSIS))
417 dsc->show_stats |= 1ul << DSC_SEKURT;
419 /* Figure out which statistics to calculate. */
420 dsc->calc_stats = dsc->show_stats;
422 dsc->calc_stats |= (1ul << DSC_MEAN) | (1ul << DSC_STDDEV);
423 if (dsc->sort_by_stat >= 0)
424 dsc->calc_stats |= 1ul << dsc->sort_by_stat;
425 if (dsc->show_stats & (1ul << DSC_SESKEW))
426 dsc->calc_stats |= 1ul << DSC_SKEWNESS;
427 if (dsc->show_stats & (1ul << DSC_SEKURT))
428 dsc->calc_stats |= 1ul << DSC_KURTOSIS;
430 /* Figure out maximum moment needed and allocate moments for
432 dsc->max_moment = MOMENT_NONE;
433 for (i = 0; i < DSC_N_STATS; i++)
434 if (dsc->calc_stats & (1ul << i) && dsc_info[i].moment > dsc->max_moment)
435 dsc->max_moment = dsc_info[i].moment;
436 if (dsc->max_moment != MOMENT_NONE)
437 for (i = 0; i < dsc->n_vars; i++)
438 dsc->vars[i].moments = moments_create (dsc->max_moment);
441 grouper = casegrouper_create_splits (proc_open_filtering (ds, false), dict);
442 while (casegrouper_get_next_group (grouper, &group))
443 calc_descriptives (dsc, group, ds);
444 ok = casegrouper_destroy (grouper);
445 ok = proc_commit (ds) && ok;
449 setup_z_trns (dsc, ds);
454 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
462 /* Returns the statistic named by the current token and skips past the token.
463 Returns DSC_NONE if no statistic is given (e.g., subcommand with no
464 specifiers). Emits an error if the current token ID does not name a
466 static enum dsc_statistic
467 match_statistic (struct lexer *lexer)
469 if (lex_token (lexer) == T_ID)
471 enum dsc_statistic stat;
473 for (stat = 0; stat < DSC_N_STATS; stat++)
474 if (lex_match_id (lexer, dsc_info[stat].identifier))
478 lex_error (lexer, _("expecting statistic name: reverting to default"));
486 free_dsc_proc (struct dsc_proc *dsc)
493 for (i = 0; i < dsc->n_vars; i++)
495 struct dsc_var *dsc_var = &dsc->vars[i];
496 free (dsc_var->z_name);
497 moments_destroy (dsc_var->moments);
499 casewriter_destroy (dsc->z_writer);
506 /* Returns false if NAME is a duplicate of any existing variable name or
507 of any previously-declared z-var name; otherwise returns true. */
509 try_name (const struct dictionary *dict, struct dsc_proc *dsc,
514 if (dict_lookup_var (dict, name) != NULL)
516 for (i = 0; i < dsc->n_vars; i++)
518 struct dsc_var *dsc_var = &dsc->vars[i];
519 if (dsc_var->z_name != NULL && !utf8_strcasecmp (dsc_var->z_name, name))
525 /* Generates a name for a Z-score variable based on a variable
526 named VAR_NAME, given that *Z_CNT generated variable names are
527 known to already exist. If successful, returns the new name
528 as a dynamically allocated string. On failure, returns NULL. */
530 generate_z_varname (const struct dictionary *dict, struct dsc_proc *dsc,
531 const char *var_name, int *n_zs)
533 char *z_name, *trunc_name;
535 /* Try a name based on the original variable name. */
536 z_name = xasprintf ("Z%s", var_name);
537 trunc_name = utf8_encoding_trunc (z_name, dict_get_encoding (dict),
540 if (try_name (dict, dsc, trunc_name))
544 /* Generate a synthetic name. */
552 sprintf (name, "ZSC%03d", *n_zs);
553 else if (*n_zs <= 108)
554 sprintf (name, "STDZ%02d", *n_zs - 99);
555 else if (*n_zs <= 117)
556 sprintf (name, "ZZZZ%02d", *n_zs - 108);
557 else if (*n_zs <= 126)
558 sprintf (name, "ZQZQ%02d", *n_zs - 117);
561 msg (SE, _("Ran out of generic names for Z-score variables. "
562 "There are only 126 generic names: ZSC001-ZSC0999, "
563 "STDZ01-STDZ09, ZZZZ01-ZZZZ09, ZQZQ01-ZQZQ09."));
567 if (try_name (dict, dsc, name))
568 return xstrdup (name);
573 /* Outputs a table describing the mapping between source
574 variables and Z-score variables. */
576 dump_z_table (struct dsc_proc *dsc)
578 struct pivot_table *table = pivot_table_create (
579 N_("Mapping of Variables to Z-scores"));
581 pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Names"),
582 N_("Source"), N_("Target"));
584 struct pivot_dimension *names = pivot_dimension_create (
585 table, PIVOT_AXIS_ROW, N_("Variables"));
586 names->hide_all_labels = true;
588 for (size_t i = 0; i < dsc->n_vars; i++)
589 if (dsc->vars[i].z_name != NULL)
591 int row = pivot_category_create_leaf (names->root,
592 pivot_value_new_number (i));
594 pivot_table_put2 (table, 0, row,
595 pivot_value_new_variable (dsc->vars[i].v));
596 pivot_table_put2 (table, 1, row,
597 pivot_value_new_user_text (dsc->vars[i].z_name, -1));
600 pivot_table_submit (table);
604 descriptives_set_all_sysmis_zscores (const struct dsc_trns *t, struct ccase *c)
606 const struct dsc_z_score *z;
608 for (z = t->z_scores; z < t->z_scores + t->n_z_scores; z++)
609 *case_num_rw (c, z->z_var) = SYSMIS;
612 /* Transformation function to calculate Z-scores. Will return SYSMIS if any of
613 the following are true: 1) mean or standard deviation is SYSMIS 2) score is
614 SYSMIS 3) score is user missing and they were not included in the original
615 analyis. 4) any of the variables in the original analysis were missing
616 (either system or user-missing values that weren't included).
619 descriptives_trns_proc (void *trns_, struct ccase **c,
620 casenumber case_idx UNUSED)
622 struct dsc_trns *t = trns_;
623 struct dsc_z_score *z;
624 const struct variable **vars;
626 *c = case_unshare (*c);
630 double f = case_num (*c, t->filter);
631 if (f == 0.0 || var_is_num_missing (t->filter, f, MV_ANY))
633 descriptives_set_all_sysmis_zscores (t, *c);
634 return TRNS_CONTINUE;
640 struct ccase *z_case;
642 z_case = casereader_read (t->z_reader);
647 t->count = case_num_idx (z_case, z_idx++);
648 for (z = t->z_scores; z < t->z_scores + t->n_z_scores; z++)
650 z->mean = case_num_idx (z_case, z_idx++);
651 z->std_dev = case_num_idx (z_case, z_idx++);
659 msg (SE, _("Internal error processing Z scores. "
660 "Please report this to %s."),
664 descriptives_set_all_sysmis_zscores (t, *c);
665 return TRNS_CONTINUE;
670 if (t->missing_type == DSC_LISTWISE)
673 for (vars = t->vars; vars < t->vars + t->n_vars; vars++)
675 double score = case_num (*c, *vars);
676 if (var_is_num_missing (*vars, score, t->exclude))
678 descriptives_set_all_sysmis_zscores (t, *c);
679 return TRNS_CONTINUE;
684 for (z = t->z_scores; z < t->z_scores + t->n_z_scores; z++)
686 double input = case_num (*c, z->src_var);
687 double *output = case_num_rw (*c, z->z_var);
689 if (z->mean == SYSMIS || z->std_dev == SYSMIS
690 || var_is_num_missing (z->src_var, input, t->exclude))
693 *output = (input - z->mean) / z->std_dev;
695 return TRNS_CONTINUE;
698 /* Frees a descriptives_trns struct. */
700 descriptives_trns_free (void *trns_)
702 struct dsc_trns *t = trns_;
703 bool ok = t->ok && !casereader_error (t->z_reader);
706 casereader_destroy (t->z_reader);
707 assert((t->missing_type != DSC_LISTWISE) ^ (t->vars != NULL));
714 /* Sets up a transformation to calculate Z scores. */
716 setup_z_trns (struct dsc_proc *dsc, struct dataset *ds)
721 for (cnt = i = 0; i < dsc->n_vars; i++)
722 if (dsc->vars[i].z_name != NULL)
725 t = xmalloc (sizeof *t);
726 t->z_scores = xnmalloc (cnt, sizeof *t->z_scores);
728 t->missing_type = dsc->missing_type;
729 t->exclude = dsc->exclude;
730 if (t->missing_type == DSC_LISTWISE)
732 t->n_vars = dsc->n_vars;
733 t->vars = xnmalloc (t->n_vars, sizeof *t->vars);
734 for (i = 0; i < t->n_vars; i++)
735 t->vars[i] = dsc->vars[i].v;
742 t->filter = dict_get_filter (dataset_dict (ds));
743 t->z_reader = casewriter_make_reader (dsc->z_writer);
746 dsc->z_writer = NULL;
748 for (cnt = i = 0; i < dsc->n_vars; i++)
750 struct dsc_var *dv = &dsc->vars[i];
751 if (dv->z_name != NULL)
753 struct dsc_z_score *z;
754 struct variable *dst_var;
757 dst_var = dict_create_var_assert (dataset_dict (ds), dv->z_name, 0);
759 label = xasprintf (_("Z-score of %s"),var_to_string (dv->v));
760 var_set_label (dst_var, label);
763 z = &t->z_scores[cnt++];
769 add_transformation (ds,
770 descriptives_trns_proc, descriptives_trns_free, t);
773 /* Statistical calculation. */
775 static bool listwise_missing (struct dsc_proc *dsc, const struct ccase *c);
777 /* Calculates and displays descriptive statistics for the cases
780 calc_descriptives (struct dsc_proc *dsc, struct casereader *group,
783 const struct variable *filter = dict_get_filter (dataset_dict (ds));
784 struct casereader *pass1, *pass2;
790 c = casereader_peek (group, 0);
793 casereader_destroy (group);
796 output_split_file_values (ds, c);
799 group = casereader_create_filter_weight (group, dataset_dict (ds),
803 pass2 = dsc->max_moment <= MOMENT_MEAN ? NULL : casereader_clone (pass1);
805 for (i = 0; i < dsc->n_vars; i++)
807 struct dsc_var *dv = &dsc->vars[i];
809 dv->valid = dv->missing = 0.0;
810 if (dv->moments != NULL)
811 moments_clear (dv->moments);
815 dsc->missing_listwise = 0.;
818 /* First pass to handle most of the work. */
820 for (; (c = casereader_read (pass1)) != NULL; case_unref (c))
822 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
826 double f = case_num (c, filter);
827 if (f == 0.0 || var_is_num_missing (filter, f, MV_ANY))
831 /* Check for missing values. */
832 if (listwise_missing (dsc, c))
834 dsc->missing_listwise += weight;
835 if (dsc->missing_type == DSC_LISTWISE)
838 dsc->valid += weight;
840 for (i = 0; i < dsc->n_vars; i++)
842 struct dsc_var *dv = &dsc->vars[i];
843 double x = case_num (c, dv->v);
845 if (var_is_num_missing (dv->v, x, dsc->exclude))
847 dv->missing += weight;
851 if (dv->moments != NULL)
852 moments_pass_one (dv->moments, x, weight);
862 if (!casereader_destroy (pass1))
864 casereader_destroy (pass2);
868 /* Second pass for higher-order moments. */
869 if (dsc->max_moment > MOMENT_MEAN)
871 for (; (c = casereader_read (pass2)) != NULL; case_unref (c))
873 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
877 double f = case_num (c, filter);
878 if (f == 0.0 || var_is_num_missing (filter, f, MV_ANY))
882 /* Check for missing values. */
883 if (dsc->missing_type == DSC_LISTWISE && listwise_missing (dsc, c))
886 for (i = 0; i < dsc->n_vars; i++)
888 struct dsc_var *dv = &dsc->vars[i];
889 double x = case_num (c, dv->v);
891 if (var_is_num_missing (dv->v, x, dsc->exclude))
894 if (dv->moments != NULL)
895 moments_pass_two (dv->moments, x, weight);
898 if (!casereader_destroy (pass2))
902 /* Calculate results. */
903 if (dsc->z_writer && count > 0)
905 c = case_create (casewriter_get_proto (dsc->z_writer));
907 *case_num_rw_idx (c, z_idx++) = count;
912 for (i = 0; i < dsc->n_vars; i++)
914 struct dsc_var *dv = &dsc->vars[i];
918 for (j = 0; j < DSC_N_STATS; j++)
919 dv->stats[j] = SYSMIS;
921 dv->valid = W = dsc->valid - dv->missing;
923 if (dv->moments != NULL)
924 moments_calculate (dv->moments, NULL,
925 &dv->stats[DSC_MEAN], &dv->stats[DSC_VARIANCE],
926 &dv->stats[DSC_SKEWNESS], &dv->stats[DSC_KURTOSIS]);
927 if (dsc->calc_stats & (1ul << DSC_SEMEAN)
928 && dv->stats[DSC_VARIANCE] != SYSMIS && W > 0.)
929 dv->stats[DSC_SEMEAN] = sqrt (dv->stats[DSC_VARIANCE]) / sqrt (W);
930 if (dsc->calc_stats & (1ul << DSC_STDDEV)
931 && dv->stats[DSC_VARIANCE] != SYSMIS)
932 dv->stats[DSC_STDDEV] = sqrt (dv->stats[DSC_VARIANCE]);
933 if (dsc->calc_stats & (1ul << DSC_SEKURT))
934 if (dv->stats[DSC_KURTOSIS] != SYSMIS)
935 dv->stats[DSC_SEKURT] = calc_sekurt (W);
936 if (dsc->calc_stats & (1ul << DSC_SESKEW)
937 && dv->stats[DSC_SKEWNESS] != SYSMIS)
938 dv->stats[DSC_SESKEW] = calc_seskew (W);
939 dv->stats[DSC_RANGE] = ((dv->min == DBL_MAX || dv->max == -DBL_MAX)
940 ? SYSMIS : dv->max - dv->min);
941 dv->stats[DSC_MIN] = dv->min == DBL_MAX ? SYSMIS : dv->min;
942 dv->stats[DSC_MAX] = dv->max == -DBL_MAX ? SYSMIS : dv->max;
943 if (dsc->calc_stats & (1ul << DSC_SUM))
944 dv->stats[DSC_SUM] = W * dv->stats[DSC_MEAN];
946 if (dv->z_name && c != NULL)
948 *case_num_rw_idx (c, z_idx++) = dv->stats[DSC_MEAN];
949 *case_num_rw_idx (c, z_idx++) = dv->stats[DSC_STDDEV];
954 casewriter_write (dsc->z_writer, c);
956 /* Output results. */
960 /* Returns true if any of the descriptives variables in DSC's
961 variable list have missing values in case C, false otherwise. */
963 listwise_missing (struct dsc_proc *dsc, const struct ccase *c)
967 for (i = 0; i < dsc->n_vars; i++)
969 struct dsc_var *dv = &dsc->vars[i];
970 double x = case_num (c, dv->v);
972 if (var_is_num_missing (dv->v, x, dsc->exclude))
978 /* Statistical display. */
980 static algo_compare_func descriptives_compare_dsc_vars;
982 /* Displays a table of descriptive statistics for DSC. */
984 display (struct dsc_proc *dsc)
986 struct pivot_table *table = pivot_table_create (
987 N_("Descriptive Statistics"));
988 pivot_table_set_weight_var (table, dict_get_weight (dsc->dict));
990 struct pivot_dimension *statistics = pivot_dimension_create (
991 table, PIVOT_AXIS_COLUMN, N_("Statistics"));
992 pivot_category_create_leaf_rc (
993 statistics->root, pivot_value_new_text (N_("N")), PIVOT_RC_COUNT);
994 for (int i = 0; i < DSC_N_STATS; i++)
995 if (dsc->show_stats & (1ul << i))
996 pivot_category_create_leaf (statistics->root,
997 pivot_value_new_text (dsc_info[i].name));
999 if (dsc->sort_by_stat != DSC_NONE)
1000 sort (dsc->vars, dsc->n_vars, sizeof *dsc->vars,
1001 descriptives_compare_dsc_vars, dsc);
1003 struct pivot_dimension *variables = pivot_dimension_create (
1004 table, PIVOT_AXIS_ROW, N_("Variable"));
1005 for (size_t i = 0; i < dsc->n_vars; i++)
1007 const struct dsc_var *dv = &dsc->vars[i];
1009 int row = pivot_category_create_leaf (variables->root,
1010 pivot_value_new_variable (dv->v));
1013 pivot_table_put2 (table, column++, row,
1014 pivot_value_new_number (dv->valid));
1016 for (int j = 0; j < DSC_N_STATS; j++)
1017 if (dsc->show_stats & (1ul << j))
1019 union value v = { .f = dv->stats[j] };
1020 struct pivot_value *pv = (j == DSC_MIN || j == DSC_MAX
1021 ? pivot_value_new_var_value (dv->v, &v)
1022 : pivot_value_new_number (dv->stats[j]));
1023 pivot_table_put2 (table, column++, row, pv);
1027 int row = pivot_category_create_leaves (
1028 variables->root, N_("Valid N (listwise)"), N_("Missing N (listwise)"));
1029 pivot_table_put2 (table, 0, row, pivot_value_new_number (dsc->valid));
1030 pivot_table_put2 (table, 0, row + 1,
1031 pivot_value_new_number (dsc->missing_listwise));
1032 pivot_table_submit (table);
1035 /* Compares `struct dsc_var's A and B according to the ordering
1036 specified by CMD. */
1038 descriptives_compare_dsc_vars (const void *a_, const void *b_, const void *dsc_)
1040 const struct dsc_var *a = a_;
1041 const struct dsc_var *b = b_;
1042 const struct dsc_proc *dsc = dsc_;
1046 if (dsc->sort_by_stat == DSC_NAME)
1047 result = utf8_strcasecmp (var_get_name (a->v), var_get_name (b->v));
1050 double as = a->stats[dsc->sort_by_stat];
1051 double bs = b->stats[dsc->sort_by_stat];
1053 result = as < bs ? -1 : as > bs;
1056 if (!dsc->sort_ascending)