1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-2000, 2009-2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
24 #include "data/casegrouper.h"
25 #include "data/casereader.h"
26 #include "data/casewriter.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/subcase.h"
30 #include "data/transformations.h"
31 #include "data/variable.h"
32 #include "language/command.h"
33 #include "language/commands/split-file.h"
34 #include "language/lexer/lexer.h"
35 #include "language/lexer/variable-parser.h"
36 #include "libpspp/array.h"
37 #include "libpspp/assertion.h"
38 #include "libpspp/compiler.h"
39 #include "libpspp/i18n.h"
40 #include "libpspp/message.h"
41 #include "math/moments.h"
42 #include "output/pivot-table.h"
44 #include "gl/xalloc.h"
47 #define _(msgid) gettext (msgid)
48 #define N_(msgid) msgid
50 /* DESCRIPTIVES private data. */
52 /* Handling of missing values. */
55 DSC_VARIABLE, /* Handle missing values on a per-variable basis. */
56 DSC_LISTWISE /* Discard entire case if any variable is missing. */
59 /* Describes properties of a distribution for the purpose of
60 calculating a Z-score. */
63 const struct variable *src_var; /* Variable on which z-score is based. */
64 struct variable *z_var; /* New z-score variable. */
65 double mean; /* Distribution mean. */
66 double std_dev; /* Distribution standard deviation. */
69 /* DESCRIPTIVES transformation (for calculating Z-scores). */
72 struct dsc_z_score *z_scores; /* Array of Z-scores. */
73 size_t n_z_scores; /* Number of Z-scores. */
74 const struct variable **vars; /* Variables for listwise missing checks. */
75 size_t n_vars; /* Number of variables. */
76 enum dsc_missing_type missing_type; /* Treatment of missing values. */
77 enum mv_class exclude; /* Classes of missing values to exclude. */
78 const struct variable *filter; /* Dictionary FILTER BY variable. */
79 struct casereader *z_reader; /* Reader for count, mean, stddev. */
80 casenumber count; /* Number left in this SPLIT FILE group.*/
84 /* Statistics. Used as bit indexes, so must be 32 or fewer. */
87 DSC_MEAN = 0, DSC_SEMEAN, DSC_STDDEV, DSC_VARIANCE, DSC_KURTOSIS,
88 DSC_SEKURT, DSC_SKEWNESS, DSC_SESKEW, DSC_RANGE, DSC_MIN,
89 DSC_MAX, DSC_SUM, DSC_N_STATS,
91 /* Only valid as sort criteria. */
92 DSC_NAME = -2, /* Sort by name. */
93 DSC_NONE = -1 /* Unsorted. */
96 /* Describes one statistic. */
97 struct dsc_statistic_info
99 const char *identifier; /* Identifier. */
100 const char *name; /* Full name. */
101 enum moment moment; /* Highest moment needed to calculate. */
104 /* Table of statistics, indexed by DSC_*. */
105 static const struct dsc_statistic_info dsc_info[DSC_N_STATS] =
107 {"MEAN", N_("Mean"), MOMENT_MEAN},
108 {"SEMEAN", N_("S.E. Mean"), MOMENT_VARIANCE},
109 {"STDDEV", N_("Std Dev"), MOMENT_VARIANCE},
110 {"VARIANCE", N_("Variance"), MOMENT_VARIANCE},
111 {"KURTOSIS", N_("Kurtosis"), MOMENT_KURTOSIS},
112 {"SEKURTOSIS", N_("S.E. Kurt"), MOMENT_NONE},
113 {"SKEWNESS", N_("Skewness"), MOMENT_SKEWNESS},
114 {"SESKEWNESS", N_("S.E. Skew"), MOMENT_NONE},
115 {"RANGE", N_("Range"), MOMENT_NONE},
116 {"MINIMUM", N_("Minimum"), MOMENT_NONE},
117 {"MAXIMUM", N_("Maximum"), MOMENT_NONE},
118 {"SUM", N_("Sum"), MOMENT_MEAN},
121 /* Statistics calculated by default if none are explicitly
123 #define DEFAULT_STATS \
124 ((1UL << DSC_MEAN) | (1UL << DSC_STDDEV) | (1UL << DSC_MIN) \
127 /* A variable specified on DESCRIPTIVES. */
130 const struct variable *v; /* Variable to calculate on. */
131 char *z_name; /* Name for z-score variable. */
132 double valid, missing; /* Valid, missing counts. */
133 struct moments *moments; /* Moments. */
134 double min, max; /* Maximum and mimimum values. */
135 double stats[DSC_N_STATS]; /* All the stats' values. */
138 /* A DESCRIPTIVES procedure. */
141 /* Per-variable info. */
142 struct dictionary *dict; /* Dictionary. */
143 struct dsc_var *vars; /* Variables. */
144 size_t n_vars; /* Number of variables. */
147 enum dsc_missing_type missing_type; /* Treatment of missing values. */
148 enum mv_class exclude; /* Classes of missing values to exclude. */
150 /* Accumulated results. */
151 double missing_listwise; /* Sum of weights of cases missing listwise. */
152 double valid; /* Sum of weights of valid cases. */
153 bool bad_warn; /* Warn if bad weight found. */
154 enum dsc_statistic sort_by_stat; /* Statistic to sort by; -1: name. */
155 enum subcase_direction sort_direction;
156 unsigned long show_stats; /* Statistics to display. */
157 unsigned long calc_stats; /* Statistics to calculate. */
158 enum moment max_moment; /* Highest moment needed for stats. */
161 struct casewriter *z_writer; /* Mean and stddev per SPLIT FILE group. */
165 static enum dsc_statistic match_statistic (struct lexer *);
166 static void free_dsc_proc (struct dsc_proc *);
168 /* Z-score functions. */
169 static bool try_name (const struct dictionary *dict,
170 struct dsc_proc *dsc, const char *name);
171 static char *generate_z_varname (const struct dictionary *dict,
172 struct dsc_proc *dsc,
173 const char *name, int *n_zs);
174 static void dump_z_table (struct dsc_proc *);
175 static void setup_z_trns (struct dsc_proc *, struct dataset *);
177 /* Procedure execution functions. */
178 static void calc_descriptives (struct dsc_proc *, struct casereader *,
180 static void display (struct dsc_proc *dsc);
182 /* Parser and outline. */
184 /* Handles DESCRIPTIVES. */
186 cmd_descriptives (struct lexer *lexer, struct dataset *ds)
188 struct dictionary *dict = dataset_dict (ds);
189 const struct variable **vars = NULL;
191 bool save_z_scores = false;
194 /* Create and initialize dsc. */
195 struct dsc_proc *dsc = xmalloc (sizeof *dsc);
196 *dsc = (struct dsc_proc) {
198 .missing_type = DSC_VARIABLE,
201 .sort_by_stat = DSC_NONE,
202 .sort_direction = SC_ASCEND,
203 .show_stats = DEFAULT_STATS,
204 .calc_stats = DEFAULT_STATS,
207 /* Parse DESCRIPTIVES. */
209 while (lex_token (lexer) != T_ENDCMD)
211 if (lex_match_id (lexer, "MISSING"))
213 lex_match (lexer, T_EQUALS);
214 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
216 if (lex_match_id (lexer, "VARIABLE"))
217 dsc->missing_type = DSC_VARIABLE;
218 else if (lex_match_id (lexer, "LISTWISE"))
219 dsc->missing_type = DSC_LISTWISE;
220 else if (lex_match_id (lexer, "INCLUDE"))
221 dsc->exclude = MV_SYSTEM;
224 lex_error_expecting (lexer, "VARIABLE", "LISTWISE",
228 lex_match (lexer, T_COMMA);
231 else if (lex_match_id (lexer, "SAVE"))
233 save_z_scores = true;
234 z_ofs = lex_ofs (lexer) - 1;
236 else if (lex_match_id (lexer, "FORMAT"))
238 lex_match (lexer, T_EQUALS);
239 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
241 if (lex_match_id (lexer, "LABELS")
242 || lex_match_id (lexer, "NOLABELS")
243 || lex_match_id (lexer, "INDEX")
244 || lex_match_id (lexer, "NOINDEX")
245 || lex_match_id (lexer, "LINE")
246 || lex_match_id (lexer, "SERIAL"))
252 lex_error_expecting (lexer, "LABELS", "NOLABELS",
253 "INDEX", "NOINDEX", "LINE", "SERIAL");
256 lex_match (lexer, T_COMMA);
259 else if (lex_match_id (lexer, "STATISTICS"))
261 lex_match (lexer, T_EQUALS);
263 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
265 if (lex_match (lexer, T_ALL))
266 dsc->show_stats |= (1UL << DSC_N_STATS) - 1;
267 else if (lex_match_id (lexer, "DEFAULT"))
268 dsc->show_stats |= DEFAULT_STATS;
271 enum dsc_statistic s = match_statistic (lexer);
274 dsc->show_stats |= 1UL << s;
276 lex_match (lexer, T_COMMA);
278 if (dsc->show_stats == 0)
279 dsc->show_stats = DEFAULT_STATS;
281 else if (lex_match_id (lexer, "SORT"))
283 lex_match (lexer, T_EQUALS);
284 if (lex_match_id (lexer, "NAME"))
285 dsc->sort_by_stat = DSC_NAME;
288 dsc->sort_by_stat = match_statistic (lexer);
289 if (dsc->sort_by_stat == DSC_NONE)
290 dsc->sort_by_stat = DSC_MEAN;
292 if (lex_match (lexer, T_LPAREN))
294 if (lex_match_id (lexer, "A"))
295 dsc->sort_direction = SC_ASCEND;
296 else if (lex_match_id (lexer, "D"))
297 dsc->sort_direction = SC_DESCEND;
300 lex_error_expecting (lexer, "A", "D");
303 if (!lex_force_match (lexer, T_RPAREN))
307 else if (n_vars == 0)
309 lex_match_phrase (lexer, "VARIABLES=");
310 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
312 if (!parse_variables_const (lexer, dict, &vars, &n_vars,
313 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
316 dsc->vars = xnrealloc ((void *)dsc->vars, n_vars, sizeof *dsc->vars);
317 for (size_t i = dsc->n_vars; i < n_vars; i++)
318 dsc->vars[i] = (struct dsc_var) { .v = vars[i] };
319 dsc->n_vars = n_vars;
321 if (lex_match (lexer, T_LPAREN))
323 if (!lex_force_id (lexer))
325 z_ofs = lex_ofs (lexer);
326 if (try_name (dict, dsc, lex_tokcstr (lexer)))
328 struct dsc_var *dsc_var = &dsc->vars[dsc->n_vars - 1];
329 dsc_var->z_name = xstrdup (lex_tokcstr (lexer));
333 lex_error (lexer, _("Z-score variable name %s would be "
334 "a duplicate variable name."),
335 lex_tokcstr (lexer));
337 if (!lex_force_match (lexer, T_RPAREN))
344 lex_error_expecting (lexer, "MISSING", "SAVE", "FORMAT", "STATISTICS",
345 "SORT", "VARIABLES");
349 lex_match (lexer, T_SLASH);
353 msg (SE, _("No variables specified."));
357 /* Construct z-score varnames, show translation table. */
358 if (n_zs || save_z_scores)
364 for (size_t i = 0; i < dsc->n_vars; i++)
366 struct dsc_var *dsc_var = &dsc->vars[i];
367 if (dsc_var->z_name == NULL)
369 const char *name = var_get_name (dsc_var->v);
370 dsc_var->z_name = generate_z_varname (dict, dsc, name,
372 if (dsc_var->z_name == NULL)
380 /* It would be better to handle Z scores correctly (however we define
381 that) when TEMPORARY is in effect, but in the meantime this at least
382 prevents a use-after-free error. See bug #38786. */
383 if (proc_make_temporary_transformations_permanent (ds))
384 lex_ofs_msg (lexer, SW, z_ofs, z_ofs,
385 _("DESCRIPTIVES with Z scores ignores TEMPORARY. "
386 "Temporary transformations will be made permanent."));
388 struct caseproto *proto = caseproto_create ();
389 for (size_t i = 0; i < 1 + 2 * n_zs; i++)
390 proto = caseproto_add_width (proto, 0);
391 dsc->z_writer = autopaging_writer_create (proto);
392 caseproto_unref (proto);
397 /* Figure out statistics to display. */
398 if (dsc->show_stats & (1UL << DSC_SKEWNESS))
399 dsc->show_stats |= 1UL << DSC_SESKEW;
400 if (dsc->show_stats & (1UL << DSC_KURTOSIS))
401 dsc->show_stats |= 1UL << DSC_SEKURT;
403 /* Figure out which statistics to calculate. */
404 dsc->calc_stats = dsc->show_stats;
406 dsc->calc_stats |= (1UL << DSC_MEAN) | (1UL << DSC_STDDEV);
407 if (dsc->sort_by_stat >= 0)
408 dsc->calc_stats |= 1UL << dsc->sort_by_stat;
409 if (dsc->show_stats & (1UL << DSC_SESKEW))
410 dsc->calc_stats |= 1UL << DSC_SKEWNESS;
411 if (dsc->show_stats & (1UL << DSC_SEKURT))
412 dsc->calc_stats |= 1UL << DSC_KURTOSIS;
414 /* Figure out maximum moment needed and allocate moments for
416 dsc->max_moment = MOMENT_NONE;
417 for (size_t i = 0; i < DSC_N_STATS; i++)
418 if (dsc->calc_stats & (1UL << i) && dsc_info[i].moment > dsc->max_moment)
419 dsc->max_moment = dsc_info[i].moment;
420 if (dsc->max_moment != MOMENT_NONE)
421 for (size_t i = 0; i < dsc->n_vars; i++)
422 dsc->vars[i].moments = moments_create (dsc->max_moment);
425 struct casegrouper *grouper = casegrouper_create_splits (proc_open_filtering (
427 struct casereader *group;
428 while (casegrouper_get_next_group (grouper, &group))
429 calc_descriptives (dsc, group, ds);
430 bool ok = casegrouper_destroy (grouper);
431 ok = proc_commit (ds) && ok;
435 setup_z_trns (dsc, ds);
440 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
448 /* Returns the statistic named by the current token and skips past the token.
449 Returns DSC_NONE if no statistic is given (e.g., subcommand with no
450 specifiers). Emits an error if the current token ID does not name a
452 static enum dsc_statistic
453 match_statistic (struct lexer *lexer)
455 if (lex_token (lexer) == T_ID)
457 for (enum dsc_statistic stat = 0; stat < DSC_N_STATS; stat++)
458 if (lex_match_id (lexer, dsc_info[stat].identifier))
461 const char *stat_names[DSC_N_STATS];
462 for (enum dsc_statistic stat = 0; stat < DSC_N_STATS; stat++)
463 stat_names[stat] = dsc_info[stat].identifier;
464 lex_error_expecting_array (lexer, stat_names,
465 sizeof stat_names / sizeof *stat_names);
474 free_dsc_proc (struct dsc_proc *dsc)
479 for (size_t i = 0; i < dsc->n_vars; i++)
481 struct dsc_var *dsc_var = &dsc->vars[i];
482 free (dsc_var->z_name);
483 moments_destroy (dsc_var->moments);
485 casewriter_destroy (dsc->z_writer);
492 /* Returns false if NAME is a duplicate of any existing variable name or
493 of any previously-declared z-var name; otherwise returns true. */
495 try_name (const struct dictionary *dict, struct dsc_proc *dsc,
498 if (dict_lookup_var (dict, name) != NULL)
500 for (size_t i = 0; i < dsc->n_vars; i++)
502 struct dsc_var *dsc_var = &dsc->vars[i];
503 if (dsc_var->z_name != NULL && !utf8_strcasecmp (dsc_var->z_name, name))
509 /* Generates a name for a Z-score variable based on a variable
510 named VAR_NAME, given that *Z_CNT generated variable names are
511 known to already exist. If successful, returns the new name
512 as a dynamically allocated string. On failure, returns NULL. */
514 generate_z_varname (const struct dictionary *dict, struct dsc_proc *dsc,
515 const char *var_name, int *n_zs)
517 /* Try a name based on the original variable name. */
518 char *z_name = xasprintf ("Z%s", var_name);
519 char *trunc_name = utf8_encoding_trunc (z_name, dict_get_encoding (dict),
522 if (try_name (dict, dsc, trunc_name))
526 /* Generate a synthetic name. */
534 sprintf (name, "ZSC%03d", *n_zs);
535 else if (*n_zs <= 108)
536 sprintf (name, "STDZ%02d", *n_zs - 99);
537 else if (*n_zs <= 117)
538 sprintf (name, "ZZZZ%02d", *n_zs - 108);
539 else if (*n_zs <= 126)
540 sprintf (name, "ZQZQ%02d", *n_zs - 117);
543 msg (SE, _("Ran out of generic names for Z-score variables. "
544 "There are only 126 generic names: ZSC001-ZSC099, "
545 "STDZ01-STDZ09, ZZZZ01-ZZZZ09, ZQZQ01-ZQZQ09."));
549 if (try_name (dict, dsc, name))
550 return xstrdup (name);
555 /* Outputs a table describing the mapping between source
556 variables and Z-score variables. */
558 dump_z_table (struct dsc_proc *dsc)
560 struct pivot_table *table = pivot_table_create (
561 N_("Mapping of Variables to Z-scores"));
563 pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Names"),
564 N_("Source"), N_("Target"));
566 struct pivot_dimension *names = pivot_dimension_create (
567 table, PIVOT_AXIS_ROW, N_("Variables"));
568 names->hide_all_labels = true;
570 for (size_t i = 0; i < dsc->n_vars; i++)
571 if (dsc->vars[i].z_name != NULL)
573 int row = pivot_category_create_leaf (names->root,
574 pivot_value_new_number (i));
576 pivot_table_put2 (table, 0, row,
577 pivot_value_new_variable (dsc->vars[i].v));
578 pivot_table_put2 (table, 1, row,
579 pivot_value_new_user_text (dsc->vars[i].z_name, -1));
582 pivot_table_submit (table);
586 descriptives_set_all_sysmis_zscores (const struct dsc_trns *t, struct ccase *c)
588 for (const struct dsc_z_score *z = t->z_scores;
589 z < t->z_scores + t->n_z_scores; z++)
590 *case_num_rw (c, z->z_var) = SYSMIS;
593 /* Transformation function to calculate Z-scores. Will return SYSMIS if any of
594 the following are true: 1) mean or standard deviation is SYSMIS 2) score is
595 SYSMIS 3) score is user missing and they were not included in the original
596 analyis. 4) any of the variables in the original analysis were missing
597 (either system or user-missing values that weren't included).
599 static enum trns_result
600 descriptives_trns_proc (void *trns_, struct ccase **c,
601 casenumber case_idx UNUSED)
603 struct dsc_trns *t = trns_;
605 *c = case_unshare (*c);
609 double f = case_num (*c, t->filter);
610 if (f == 0.0 || var_is_num_missing (t->filter, f))
612 descriptives_set_all_sysmis_zscores (t, *c);
613 return TRNS_CONTINUE;
619 struct ccase *z_case = casereader_read (t->z_reader);
624 t->count = case_num_idx (z_case, z_idx++);
625 for (struct dsc_z_score *z = t->z_scores;
626 z < t->z_scores + t->n_z_scores; z++)
628 z->mean = case_num_idx (z_case, z_idx++);
629 z->std_dev = case_num_idx (z_case, z_idx++);
637 msg (SE, _("Internal error processing Z scores. "
638 "Please report this to %s."),
642 descriptives_set_all_sysmis_zscores (t, *c);
643 return TRNS_CONTINUE;
648 if (t->missing_type == DSC_LISTWISE)
650 assert (t->vars != NULL);
651 for (const struct variable **vars = t->vars; vars < t->vars + t->n_vars;
654 double score = case_num (*c, *vars);
655 if (var_is_num_missing (*vars, score) & t->exclude)
657 descriptives_set_all_sysmis_zscores (t, *c);
658 return TRNS_CONTINUE;
663 for (struct dsc_z_score *z = t->z_scores; z < t->z_scores + t->n_z_scores;
666 double input = case_num (*c, z->src_var);
667 double *output = case_num_rw (*c, z->z_var);
669 if (z->mean == SYSMIS || z->std_dev == SYSMIS
670 || var_is_num_missing (z->src_var, input) & t->exclude)
673 *output = (input - z->mean) / z->std_dev;
675 return TRNS_CONTINUE;
678 /* Frees a descriptives_trns struct. */
680 descriptives_trns_free (void *trns_)
682 struct dsc_trns *t = trns_;
683 bool ok = t->ok && !casereader_error (t->z_reader);
686 casereader_destroy (t->z_reader);
687 assert ((t->missing_type != DSC_LISTWISE) != (t->vars != NULL));
694 static const struct trns_class descriptives_trns_class = {
695 .name = "DESCRIPTIVES (Z scores)",
696 .execute = descriptives_trns_proc,
697 .destroy = descriptives_trns_free,
700 /* Sets up a transformation to calculate Z scores. */
702 setup_z_trns (struct dsc_proc *dsc, struct dataset *ds)
705 for (size_t i = 0; i < dsc->n_vars; i++)
706 if (dsc->vars[i].z_name != NULL)
709 struct dsc_trns *t = xmalloc (sizeof *t);
710 *t = (struct dsc_trns) {
711 .z_scores = xmalloc (n * sizeof *t->z_scores),
713 .missing_type = dsc->missing_type,
714 .exclude = dsc->exclude,
715 .filter = dict_get_filter (dataset_dict (ds)),
716 .z_reader = casewriter_make_reader (dsc->z_writer),
719 if (t->missing_type == DSC_LISTWISE)
721 t->n_vars = dsc->n_vars;
722 t->vars = xnmalloc (t->n_vars, sizeof *t->vars);
723 for (size_t i = 0; i < t->n_vars; i++)
724 t->vars[i] = dsc->vars[i].v;
726 dsc->z_writer = NULL;
729 for (size_t i = 0; i < dsc->n_vars; i++)
731 struct dsc_var *dv = &dsc->vars[i];
732 if (dv->z_name != NULL)
734 struct variable *dst_var = dict_create_var_assert (dataset_dict (ds),
737 char *label = xasprintf (_("Z-score of %s"), var_to_string (dv->v));
738 var_set_label (dst_var, label);
741 struct dsc_z_score *z = &t->z_scores[n++];
742 *z = (struct dsc_z_score) {
749 add_transformation (ds, &descriptives_trns_class, t);
752 /* Statistical calculation. */
754 static bool listwise_missing (struct dsc_proc *dsc, const struct ccase *c);
756 /* Calculates and displays descriptive statistics for the cases
759 calc_descriptives (struct dsc_proc *dsc, struct casereader *group,
762 output_split_file_values_peek (ds, group);
763 group = casereader_create_filter_weight (group, dataset_dict (ds),
766 struct casereader *pass1 = group;
767 struct casereader *pass2 = (dsc->max_moment <= MOMENT_MEAN ? NULL
768 : casereader_clone (pass1));
769 for (size_t i = 0; i < dsc->n_vars; i++)
771 struct dsc_var *dv = &dsc->vars[i];
773 dv->valid = dv->missing = 0.0;
774 if (dv->moments != NULL)
775 moments_clear (dv->moments);
779 dsc->missing_listwise = 0.;
782 /* First pass to handle most of the work. */
783 casenumber count = 0;
784 const struct variable *filter = dict_get_filter (dataset_dict (ds));
786 for (; (c = casereader_read (pass1)) != NULL; case_unref (c))
788 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
792 double f = case_num (c, filter);
793 if (f == 0.0 || var_is_num_missing (filter, f))
797 /* Check for missing values. */
798 if (listwise_missing (dsc, c))
800 dsc->missing_listwise += weight;
801 if (dsc->missing_type == DSC_LISTWISE)
804 dsc->valid += weight;
806 for (size_t i = 0; i < dsc->n_vars; i++)
808 struct dsc_var *dv = &dsc->vars[i];
809 double x = case_num (c, dv->v);
811 if (var_is_num_missing (dv->v, x) & dsc->exclude)
813 dv->missing += weight;
817 if (dv->moments != NULL)
818 moments_pass_one (dv->moments, x, weight);
828 if (!casereader_destroy (pass1))
830 casereader_destroy (pass2);
834 /* Second pass for higher-order moments. */
835 if (dsc->max_moment > MOMENT_MEAN)
837 for (; (c = casereader_read (pass2)) != NULL; case_unref (c))
839 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
843 double f = case_num (c, filter);
844 if (f == 0.0 || var_is_num_missing (filter, f))
848 /* Check for missing values. */
849 if (dsc->missing_type == DSC_LISTWISE && listwise_missing (dsc, c))
852 for (size_t i = 0; i < dsc->n_vars; i++)
854 struct dsc_var *dv = &dsc->vars[i];
855 double x = case_num (c, dv->v);
857 if (var_is_num_missing (dv->v, x) & dsc->exclude)
860 if (dv->moments != NULL)
861 moments_pass_two (dv->moments, x, weight);
864 if (!casereader_destroy (pass2))
868 /* Calculate results. */
870 if (dsc->z_writer && count > 0)
872 c = case_create (casewriter_get_proto (dsc->z_writer));
873 *case_num_rw_idx (c, z_idx++) = count;
878 for (size_t i = 0; i < dsc->n_vars; i++)
880 struct dsc_var *dv = &dsc->vars[i];
882 for (size_t j = 0; j < DSC_N_STATS; j++)
883 dv->stats[j] = SYSMIS;
885 double W = dsc->valid - dv->missing;
888 if (dv->moments != NULL)
889 moments_calculate (dv->moments, NULL,
890 &dv->stats[DSC_MEAN], &dv->stats[DSC_VARIANCE],
891 &dv->stats[DSC_SKEWNESS], &dv->stats[DSC_KURTOSIS]);
892 if (dsc->calc_stats & (1UL << DSC_SEMEAN)
893 && dv->stats[DSC_VARIANCE] != SYSMIS && W > 0.)
894 dv->stats[DSC_SEMEAN] = sqrt (dv->stats[DSC_VARIANCE]) / sqrt (W);
895 if (dsc->calc_stats & (1UL << DSC_STDDEV)
896 && dv->stats[DSC_VARIANCE] != SYSMIS)
897 dv->stats[DSC_STDDEV] = sqrt (dv->stats[DSC_VARIANCE]);
898 if (dsc->calc_stats & (1UL << DSC_SEKURT))
899 if (dv->stats[DSC_KURTOSIS] != SYSMIS)
900 dv->stats[DSC_SEKURT] = calc_sekurt (W);
901 if (dsc->calc_stats & (1UL << DSC_SESKEW)
902 && dv->stats[DSC_SKEWNESS] != SYSMIS)
903 dv->stats[DSC_SESKEW] = calc_seskew (W);
904 dv->stats[DSC_RANGE] = ((dv->min == DBL_MAX || dv->max == -DBL_MAX)
905 ? SYSMIS : dv->max - dv->min);
906 dv->stats[DSC_MIN] = dv->min == DBL_MAX ? SYSMIS : dv->min;
907 dv->stats[DSC_MAX] = dv->max == -DBL_MAX ? SYSMIS : dv->max;
908 if (dsc->calc_stats & (1UL << DSC_SUM))
909 dv->stats[DSC_SUM] = W * dv->stats[DSC_MEAN];
911 if (dv->z_name && c != NULL)
913 *case_num_rw_idx (c, z_idx++) = dv->stats[DSC_MEAN];
914 *case_num_rw_idx (c, z_idx++) = dv->stats[DSC_STDDEV];
919 casewriter_write (dsc->z_writer, c);
921 /* Output results. */
925 /* Returns true if any of the descriptives variables in DSC's
926 variable list have missing values in case C, false otherwise. */
928 listwise_missing (struct dsc_proc *dsc, const struct ccase *c)
930 for (size_t i = 0; i < dsc->n_vars; i++)
932 struct dsc_var *dv = &dsc->vars[i];
933 double x = case_num (c, dv->v);
935 if (var_is_num_missing (dv->v, x) & dsc->exclude)
941 /* Statistical display. */
943 static algo_compare_func descriptives_compare_dsc_vars;
945 /* Displays a table of descriptive statistics for DSC. */
947 display (struct dsc_proc *dsc)
949 struct pivot_table *table = pivot_table_create (
950 N_("Descriptive Statistics"));
951 pivot_table_set_weight_var (table, dict_get_weight (dsc->dict));
953 struct pivot_dimension *statistics = pivot_dimension_create (
954 table, PIVOT_AXIS_COLUMN, N_("Statistics"));
955 pivot_category_create_leaf_rc (
956 statistics->root, pivot_value_new_text (N_("N")), PIVOT_RC_COUNT);
957 for (int i = 0; i < DSC_N_STATS; i++)
958 if (dsc->show_stats & (1UL << i))
959 pivot_category_create_leaf (statistics->root,
960 pivot_value_new_text (dsc_info[i].name));
962 if (dsc->sort_by_stat != DSC_NONE)
963 sort (dsc->vars, dsc->n_vars, sizeof *dsc->vars,
964 descriptives_compare_dsc_vars, dsc);
966 struct pivot_dimension *variables = pivot_dimension_create (
967 table, PIVOT_AXIS_ROW, N_("Variable"));
968 for (size_t i = 0; i < dsc->n_vars; i++)
970 const struct dsc_var *dv = &dsc->vars[i];
972 int row = pivot_category_create_leaf (variables->root,
973 pivot_value_new_variable (dv->v));
976 pivot_table_put2 (table, column++, row,
977 pivot_value_new_number (dv->valid));
979 for (int j = 0; j < DSC_N_STATS; j++)
980 if (dsc->show_stats & (1UL << j))
982 union value v = { .f = dv->stats[j] };
983 struct pivot_value *pv = (j == DSC_MIN || j == DSC_MAX
984 ? pivot_value_new_var_value (dv->v, &v)
985 : pivot_value_new_number (dv->stats[j]));
986 pivot_table_put2 (table, column++, row, pv);
990 int row = pivot_category_create_leaves (
991 variables->root, N_("Valid N (listwise)"), N_("Missing N (listwise)"));
992 pivot_table_put2 (table, 0, row, pivot_value_new_number (dsc->valid));
993 pivot_table_put2 (table, 0, row + 1,
994 pivot_value_new_number (dsc->missing_listwise));
995 pivot_table_submit (table);
998 /* Compares `struct dsc_var's A and B according to the ordering
1001 descriptives_compare_dsc_vars (const void *a_, const void *b_, const void *dsc_)
1003 const struct dsc_var *a = a_;
1004 const struct dsc_var *b = b_;
1005 const struct dsc_proc *dsc = dsc_;
1009 if (dsc->sort_by_stat == DSC_NAME)
1010 result = utf8_strcasecmp (var_get_name (a->v), var_get_name (b->v));
1013 double as = a->stats[dsc->sort_by_stat];
1014 double bs = b->stats[dsc->sort_by_stat];
1016 result = as < bs ? -1 : as > bs;
1019 if (dsc->sort_direction == SC_DESCEND)