1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-2000, 2009-2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
24 #include "data/casegrouper.h"
25 #include "data/casereader.h"
26 #include "data/casewriter.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/transformations.h"
30 #include "data/variable.h"
31 #include "language/command.h"
32 #include "language/dictionary/split-file.h"
33 #include "language/lexer/lexer.h"
34 #include "language/lexer/variable-parser.h"
35 #include "libpspp/array.h"
36 #include "libpspp/assertion.h"
37 #include "libpspp/compiler.h"
38 #include "libpspp/i18n.h"
39 #include "libpspp/message.h"
40 #include "math/moments.h"
41 #include "output/pivot-table.h"
43 #include "gl/xalloc.h"
46 #define _(msgid) gettext (msgid)
47 #define N_(msgid) msgid
49 /* DESCRIPTIVES private data. */
53 /* Handling of missing values. */
56 DSC_VARIABLE, /* Handle missing values on a per-variable basis. */
57 DSC_LISTWISE /* Discard entire case if any variable is missing. */
60 /* Describes properties of a distribution for the purpose of
61 calculating a Z-score. */
64 const struct variable *src_var; /* Variable on which z-score is based. */
65 struct variable *z_var; /* New z-score variable. */
66 double mean; /* Distribution mean. */
67 double std_dev; /* Distribution standard deviation. */
70 /* DESCRIPTIVES transformation (for calculating Z-scores). */
73 struct dsc_z_score *z_scores; /* Array of Z-scores. */
74 int n_z_scores; /* Number of Z-scores. */
75 const struct variable **vars; /* Variables for listwise missing checks. */
76 size_t n_vars; /* Number of variables. */
77 enum dsc_missing_type missing_type; /* Treatment of missing values. */
78 enum mv_class exclude; /* Classes of missing values to exclude. */
79 const struct variable *filter; /* Dictionary FILTER BY variable. */
80 struct casereader *z_reader; /* Reader for count, mean, stddev. */
81 casenumber count; /* Number left in this SPLIT FILE group.*/
85 /* Statistics. Used as bit indexes, so must be 32 or fewer. */
88 DSC_MEAN = 0, DSC_SEMEAN, DSC_STDDEV, DSC_VARIANCE, DSC_KURTOSIS,
89 DSC_SEKURT, DSC_SKEWNESS, DSC_SESKEW, DSC_RANGE, DSC_MIN,
90 DSC_MAX, DSC_SUM, DSC_N_STATS,
92 /* Only valid as sort criteria. */
93 DSC_NAME = -2, /* Sort by name. */
94 DSC_NONE = -1 /* Unsorted. */
97 /* Describes one statistic. */
98 struct dsc_statistic_info
100 const char *identifier; /* Identifier. */
101 const char *name; /* Full name. */
102 enum moment moment; /* Highest moment needed to calculate. */
105 /* Table of statistics, indexed by DSC_*. */
106 static const struct dsc_statistic_info dsc_info[DSC_N_STATS] =
108 {"MEAN", N_("Mean"), MOMENT_MEAN},
109 {"SEMEAN", N_("S.E. Mean"), MOMENT_VARIANCE},
110 {"STDDEV", N_("Std Dev"), MOMENT_VARIANCE},
111 {"VARIANCE", N_("Variance"), MOMENT_VARIANCE},
112 {"KURTOSIS", N_("Kurtosis"), MOMENT_KURTOSIS},
113 {"SEKURTOSIS", N_("S.E. Kurt"), MOMENT_NONE},
114 {"SKEWNESS", N_("Skewness"), MOMENT_SKEWNESS},
115 {"SESKEWNESS", N_("S.E. Skew"), MOMENT_NONE},
116 {"RANGE", N_("Range"), MOMENT_NONE},
117 {"MINIMUM", N_("Minimum"), MOMENT_NONE},
118 {"MAXIMUM", N_("Maximum"), MOMENT_NONE},
119 {"SUM", N_("Sum"), MOMENT_MEAN},
122 /* Statistics calculated by default if none are explicitly
124 #define DEFAULT_STATS \
125 ((1ul << DSC_MEAN) | (1ul << DSC_STDDEV) | (1ul << DSC_MIN) \
128 /* A variable specified on DESCRIPTIVES. */
131 const struct variable *v; /* Variable to calculate on. */
132 char *z_name; /* Name for z-score variable. */
133 double valid, missing; /* Valid, missing counts. */
134 struct moments *moments; /* Moments. */
135 double min, max; /* Maximum and mimimum values. */
136 double stats[DSC_N_STATS]; /* All the stats' values. */
139 /* A DESCRIPTIVES procedure. */
142 /* Per-variable info. */
143 struct dictionary *dict; /* Dictionary. */
144 struct dsc_var *vars; /* Variables. */
145 size_t n_vars; /* Number of variables. */
148 enum dsc_missing_type missing_type; /* Treatment of missing values. */
149 enum mv_class exclude; /* Classes of missing values to exclude. */
151 /* Accumulated results. */
152 double missing_listwise; /* Sum of weights of cases missing listwise. */
153 double valid; /* Sum of weights of valid cases. */
154 bool bad_warn; /* Warn if bad weight found. */
155 enum dsc_statistic sort_by_stat; /* Statistic to sort by; -1: name. */
156 int sort_ascending; /* !0: ascending order; 0: descending. */
157 unsigned long show_stats; /* Statistics to display. */
158 unsigned long calc_stats; /* Statistics to calculate. */
159 enum moment max_moment; /* Highest moment needed for stats. */
162 struct casewriter *z_writer; /* Mean and stddev per SPLIT FILE group. */
166 static enum dsc_statistic match_statistic (struct lexer *);
167 static void free_dsc_proc (struct dsc_proc *);
169 /* Z-score functions. */
170 static bool try_name (const struct dictionary *dict,
171 struct dsc_proc *dsc, const char *name);
172 static char *generate_z_varname (const struct dictionary *dict,
173 struct dsc_proc *dsc,
174 const char *name, int *n_zs);
175 static void dump_z_table (struct dsc_proc *);
176 static void setup_z_trns (struct dsc_proc *, struct dataset *);
178 /* Procedure execution functions. */
179 static void calc_descriptives (struct dsc_proc *, struct casereader *,
181 static void display (struct dsc_proc *dsc);
183 /* Parser and outline. */
185 /* Handles DESCRIPTIVES. */
187 cmd_descriptives (struct lexer *lexer, struct dataset *ds)
189 struct dictionary *dict = dataset_dict (ds);
190 struct dsc_proc *dsc;
191 const struct variable **vars = NULL;
193 int save_z_scores = 0;
198 struct casegrouper *grouper;
199 struct casereader *group;
201 /* Create and initialize dsc. */
202 dsc = xmalloc (sizeof *dsc);
206 dsc->missing_type = DSC_VARIABLE;
207 dsc->exclude = MV_ANY;
208 dsc->missing_listwise = 0.;
211 dsc->sort_by_stat = DSC_NONE;
212 dsc->sort_ascending = 1;
213 dsc->show_stats = dsc->calc_stats = DEFAULT_STATS;
214 dsc->z_writer = NULL;
216 /* Parse DESCRIPTIVES. */
218 while (lex_token (lexer) != T_ENDCMD)
220 if (lex_match_id (lexer, "MISSING"))
222 lex_match (lexer, T_EQUALS);
223 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
225 if (lex_match_id (lexer, "VARIABLE"))
226 dsc->missing_type = DSC_VARIABLE;
227 else if (lex_match_id (lexer, "LISTWISE"))
228 dsc->missing_type = DSC_LISTWISE;
229 else if (lex_match_id (lexer, "INCLUDE"))
230 dsc->exclude = MV_SYSTEM;
233 lex_error (lexer, NULL);
236 lex_match (lexer, T_COMMA);
239 else if (lex_match_id (lexer, "SAVE"))
242 z_ofs = lex_ofs (lexer) - 1;
244 else if (lex_match_id (lexer, "FORMAT"))
246 lex_match (lexer, T_EQUALS);
247 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
249 if (lex_match_id (lexer, "LABELS")
250 || lex_match_id (lexer, "NOLABELS")
251 || lex_match_id (lexer, "INDEX")
252 || lex_match_id (lexer, "NOINDEX")
253 || lex_match_id (lexer, "LINE")
254 || lex_match_id (lexer, "SERIAL"))
260 lex_error (lexer, NULL);
263 lex_match (lexer, T_COMMA);
266 else if (lex_match_id (lexer, "STATISTICS"))
268 lex_match (lexer, T_EQUALS);
270 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
272 if (lex_match (lexer, T_ALL))
273 dsc->show_stats |= (1ul << DSC_N_STATS) - 1;
274 else if (lex_match_id (lexer, "DEFAULT"))
275 dsc->show_stats |= DEFAULT_STATS;
278 enum dsc_statistic s = match_statistic (lexer);
281 lex_error (lexer, NULL);
284 dsc->show_stats |= 1ul << s;
286 lex_match (lexer, T_COMMA);
288 if (dsc->show_stats == 0)
289 dsc->show_stats = DEFAULT_STATS;
291 else if (lex_match_id (lexer, "SORT"))
293 lex_match (lexer, T_EQUALS);
294 if (lex_match_id (lexer, "NAME"))
295 dsc->sort_by_stat = DSC_NAME;
298 dsc->sort_by_stat = match_statistic (lexer);
299 if (dsc->sort_by_stat == DSC_NONE)
300 dsc->sort_by_stat = DSC_MEAN;
302 if (lex_match (lexer, T_LPAREN))
304 if (lex_match_id (lexer, "A"))
305 dsc->sort_ascending = 1;
306 else if (lex_match_id (lexer, "D"))
307 dsc->sort_ascending = 0;
309 lex_error (lexer, NULL);
310 if (! lex_force_match (lexer, T_RPAREN))
314 else if (n_vars == 0)
316 if (lex_next_token (lexer, 1) == T_EQUALS)
318 lex_match_id (lexer, "VARIABLES");
319 lex_match (lexer, T_EQUALS);
322 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
326 if (!parse_variables_const (lexer, dict, &vars, &n_vars,
327 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
330 dsc->vars = xnrealloc ((void *)dsc->vars, n_vars, sizeof *dsc->vars);
331 for (i = dsc->n_vars; i < n_vars; i++)
333 struct dsc_var *dv = &dsc->vars[i];
338 dsc->n_vars = n_vars;
340 if (lex_match (lexer, T_LPAREN))
342 if (!lex_force_id (lexer))
344 z_ofs = lex_ofs (lexer);
345 if (try_name (dict, dsc, lex_tokcstr (lexer)))
347 struct dsc_var *dsc_var = &dsc->vars[dsc->n_vars - 1];
348 dsc_var->z_name = xstrdup (lex_tokcstr (lexer));
352 lex_error (lexer, _("Z-score variable name %s would be "
353 "a duplicate variable name."),
354 lex_tokcstr (lexer));
356 if (!lex_force_match (lexer, T_RPAREN))
363 lex_error (lexer, NULL);
367 lex_match (lexer, T_SLASH);
371 msg (SE, _("No variables specified."));
375 /* Construct z-score varnames, show translation table. */
376 if (n_zs || save_z_scores)
378 struct caseproto *proto;
384 for (i = 0; i < dsc->n_vars; i++)
386 struct dsc_var *dsc_var = &dsc->vars[i];
387 if (dsc_var->z_name == NULL)
389 const char *name = var_get_name (dsc_var->v);
390 dsc_var->z_name = generate_z_varname (dict, dsc, name,
392 if (dsc_var->z_name == NULL)
400 /* It would be better to handle Z scores correctly (however we define
401 that) when TEMPORARY is in effect, but in the meantime this at least
402 prevents a use-after-free error. See bug #38786. */
403 if (proc_make_temporary_transformations_permanent (ds))
404 lex_ofs_msg (lexer, SW, z_ofs, z_ofs,
405 _("DESCRIPTIVES with Z scores ignores TEMPORARY. "
406 "Temporary transformations will be made permanent."));
408 proto = caseproto_create ();
409 for (i = 0; i < 1 + 2 * n_zs; i++)
410 proto = caseproto_add_width (proto, 0);
411 dsc->z_writer = autopaging_writer_create (proto);
412 caseproto_unref (proto);
417 /* Figure out statistics to display. */
418 if (dsc->show_stats & (1ul << DSC_SKEWNESS))
419 dsc->show_stats |= 1ul << DSC_SESKEW;
420 if (dsc->show_stats & (1ul << DSC_KURTOSIS))
421 dsc->show_stats |= 1ul << DSC_SEKURT;
423 /* Figure out which statistics to calculate. */
424 dsc->calc_stats = dsc->show_stats;
426 dsc->calc_stats |= (1ul << DSC_MEAN) | (1ul << DSC_STDDEV);
427 if (dsc->sort_by_stat >= 0)
428 dsc->calc_stats |= 1ul << dsc->sort_by_stat;
429 if (dsc->show_stats & (1ul << DSC_SESKEW))
430 dsc->calc_stats |= 1ul << DSC_SKEWNESS;
431 if (dsc->show_stats & (1ul << DSC_SEKURT))
432 dsc->calc_stats |= 1ul << DSC_KURTOSIS;
434 /* Figure out maximum moment needed and allocate moments for
436 dsc->max_moment = MOMENT_NONE;
437 for (i = 0; i < DSC_N_STATS; i++)
438 if (dsc->calc_stats & (1ul << i) && dsc_info[i].moment > dsc->max_moment)
439 dsc->max_moment = dsc_info[i].moment;
440 if (dsc->max_moment != MOMENT_NONE)
441 for (i = 0; i < dsc->n_vars; i++)
442 dsc->vars[i].moments = moments_create (dsc->max_moment);
445 grouper = casegrouper_create_splits (proc_open_filtering (ds, false), dict);
446 while (casegrouper_get_next_group (grouper, &group))
447 calc_descriptives (dsc, group, ds);
448 ok = casegrouper_destroy (grouper);
449 ok = proc_commit (ds) && ok;
453 setup_z_trns (dsc, ds);
458 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
466 /* Returns the statistic named by the current token and skips past the token.
467 Returns DSC_NONE if no statistic is given (e.g., subcommand with no
468 specifiers). Emits an error if the current token ID does not name a
470 static enum dsc_statistic
471 match_statistic (struct lexer *lexer)
473 if (lex_token (lexer) == T_ID)
475 for (enum dsc_statistic stat = 0; stat < DSC_N_STATS; stat++)
476 if (lex_match_id (lexer, dsc_info[stat].identifier))
479 const char *stat_names[DSC_N_STATS];
480 for (enum dsc_statistic stat = 0; stat < DSC_N_STATS; stat++)
481 stat_names[stat] = dsc_info[stat].identifier;
482 lex_error_expecting_array (lexer, stat_names,
483 sizeof stat_names / sizeof *stat_names);
492 free_dsc_proc (struct dsc_proc *dsc)
499 for (i = 0; i < dsc->n_vars; i++)
501 struct dsc_var *dsc_var = &dsc->vars[i];
502 free (dsc_var->z_name);
503 moments_destroy (dsc_var->moments);
505 casewriter_destroy (dsc->z_writer);
512 /* Returns false if NAME is a duplicate of any existing variable name or
513 of any previously-declared z-var name; otherwise returns true. */
515 try_name (const struct dictionary *dict, struct dsc_proc *dsc,
520 if (dict_lookup_var (dict, name) != NULL)
522 for (i = 0; i < dsc->n_vars; i++)
524 struct dsc_var *dsc_var = &dsc->vars[i];
525 if (dsc_var->z_name != NULL && !utf8_strcasecmp (dsc_var->z_name, name))
531 /* Generates a name for a Z-score variable based on a variable
532 named VAR_NAME, given that *Z_CNT generated variable names are
533 known to already exist. If successful, returns the new name
534 as a dynamically allocated string. On failure, returns NULL. */
536 generate_z_varname (const struct dictionary *dict, struct dsc_proc *dsc,
537 const char *var_name, int *n_zs)
539 char *z_name, *trunc_name;
541 /* Try a name based on the original variable name. */
542 z_name = xasprintf ("Z%s", var_name);
543 trunc_name = utf8_encoding_trunc (z_name, dict_get_encoding (dict),
546 if (try_name (dict, dsc, trunc_name))
550 /* Generate a synthetic name. */
558 sprintf (name, "ZSC%03d", *n_zs);
559 else if (*n_zs <= 108)
560 sprintf (name, "STDZ%02d", *n_zs - 99);
561 else if (*n_zs <= 117)
562 sprintf (name, "ZZZZ%02d", *n_zs - 108);
563 else if (*n_zs <= 126)
564 sprintf (name, "ZQZQ%02d", *n_zs - 117);
567 msg (SE, _("Ran out of generic names for Z-score variables. "
568 "There are only 126 generic names: ZSC001-ZSC0999, "
569 "STDZ01-STDZ09, ZZZZ01-ZZZZ09, ZQZQ01-ZQZQ09."));
573 if (try_name (dict, dsc, name))
574 return xstrdup (name);
579 /* Outputs a table describing the mapping between source
580 variables and Z-score variables. */
582 dump_z_table (struct dsc_proc *dsc)
584 struct pivot_table *table = pivot_table_create (
585 N_("Mapping of Variables to Z-scores"));
587 pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Names"),
588 N_("Source"), N_("Target"));
590 struct pivot_dimension *names = pivot_dimension_create (
591 table, PIVOT_AXIS_ROW, N_("Variables"));
592 names->hide_all_labels = true;
594 for (size_t i = 0; i < dsc->n_vars; i++)
595 if (dsc->vars[i].z_name != NULL)
597 int row = pivot_category_create_leaf (names->root,
598 pivot_value_new_number (i));
600 pivot_table_put2 (table, 0, row,
601 pivot_value_new_variable (dsc->vars[i].v));
602 pivot_table_put2 (table, 1, row,
603 pivot_value_new_user_text (dsc->vars[i].z_name, -1));
606 pivot_table_submit (table);
610 descriptives_set_all_sysmis_zscores (const struct dsc_trns *t, struct ccase *c)
612 const struct dsc_z_score *z;
614 for (z = t->z_scores; z < t->z_scores + t->n_z_scores; z++)
615 *case_num_rw (c, z->z_var) = SYSMIS;
618 /* Transformation function to calculate Z-scores. Will return SYSMIS if any of
619 the following are true: 1) mean or standard deviation is SYSMIS 2) score is
620 SYSMIS 3) score is user missing and they were not included in the original
621 analyis. 4) any of the variables in the original analysis were missing
622 (either system or user-missing values that weren't included).
624 static enum trns_result
625 descriptives_trns_proc (void *trns_, struct ccase **c,
626 casenumber case_idx UNUSED)
628 struct dsc_trns *t = trns_;
629 struct dsc_z_score *z;
630 const struct variable **vars;
632 *c = case_unshare (*c);
636 double f = case_num (*c, t->filter);
637 if (f == 0.0 || var_is_num_missing (t->filter, f))
639 descriptives_set_all_sysmis_zscores (t, *c);
640 return TRNS_CONTINUE;
646 struct ccase *z_case;
648 z_case = casereader_read (t->z_reader);
653 t->count = case_num_idx (z_case, z_idx++);
654 for (z = t->z_scores; z < t->z_scores + t->n_z_scores; z++)
656 z->mean = case_num_idx (z_case, z_idx++);
657 z->std_dev = case_num_idx (z_case, z_idx++);
665 msg (SE, _("Internal error processing Z scores. "
666 "Please report this to %s."),
670 descriptives_set_all_sysmis_zscores (t, *c);
671 return TRNS_CONTINUE;
676 if (t->missing_type == DSC_LISTWISE)
679 for (vars = t->vars; vars < t->vars + t->n_vars; vars++)
681 double score = case_num (*c, *vars);
682 if (var_is_num_missing (*vars, score) & t->exclude)
684 descriptives_set_all_sysmis_zscores (t, *c);
685 return TRNS_CONTINUE;
690 for (z = t->z_scores; z < t->z_scores + t->n_z_scores; z++)
692 double input = case_num (*c, z->src_var);
693 double *output = case_num_rw (*c, z->z_var);
695 if (z->mean == SYSMIS || z->std_dev == SYSMIS
696 || var_is_num_missing (z->src_var, input) & t->exclude)
699 *output = (input - z->mean) / z->std_dev;
701 return TRNS_CONTINUE;
704 /* Frees a descriptives_trns struct. */
706 descriptives_trns_free (void *trns_)
708 struct dsc_trns *t = trns_;
709 bool ok = t->ok && !casereader_error (t->z_reader);
712 casereader_destroy (t->z_reader);
713 assert((t->missing_type != DSC_LISTWISE) ^ (t->vars != NULL));
720 static const struct trns_class descriptives_trns_class = {
721 .name = "DESCRIPTIVES (Z scores)",
722 .execute = descriptives_trns_proc,
723 .destroy = descriptives_trns_free,
726 /* Sets up a transformation to calculate Z scores. */
728 setup_z_trns (struct dsc_proc *dsc, struct dataset *ds)
733 for (n = i = 0; i < dsc->n_vars; i++)
734 if (dsc->vars[i].z_name != NULL)
737 t = xmalloc (sizeof *t);
738 t->z_scores = xnmalloc (n, sizeof *t->z_scores);
740 t->missing_type = dsc->missing_type;
741 t->exclude = dsc->exclude;
742 if (t->missing_type == DSC_LISTWISE)
744 t->n_vars = dsc->n_vars;
745 t->vars = xnmalloc (t->n_vars, sizeof *t->vars);
746 for (i = 0; i < t->n_vars; i++)
747 t->vars[i] = dsc->vars[i].v;
754 t->filter = dict_get_filter (dataset_dict (ds));
755 t->z_reader = casewriter_make_reader (dsc->z_writer);
758 dsc->z_writer = NULL;
760 for (n = i = 0; i < dsc->n_vars; i++)
762 struct dsc_var *dv = &dsc->vars[i];
763 if (dv->z_name != NULL)
765 struct dsc_z_score *z;
766 struct variable *dst_var;
769 dst_var = dict_create_var_assert (dataset_dict (ds), dv->z_name, 0);
771 label = xasprintf (_("Z-score of %s"),var_to_string (dv->v));
772 var_set_label (dst_var, label);
775 z = &t->z_scores[n++];
781 add_transformation (ds, &descriptives_trns_class, t);
784 /* Statistical calculation. */
786 static bool listwise_missing (struct dsc_proc *dsc, const struct ccase *c);
788 /* Calculates and displays descriptive statistics for the cases
791 calc_descriptives (struct dsc_proc *dsc, struct casereader *group,
794 const struct variable *filter = dict_get_filter (dataset_dict (ds));
795 struct casereader *pass1, *pass2;
801 c = casereader_peek (group, 0);
804 casereader_destroy (group);
807 output_split_file_values (ds, c);
810 group = casereader_create_filter_weight (group, dataset_dict (ds),
814 pass2 = dsc->max_moment <= MOMENT_MEAN ? NULL : casereader_clone (pass1);
816 for (i = 0; i < dsc->n_vars; i++)
818 struct dsc_var *dv = &dsc->vars[i];
820 dv->valid = dv->missing = 0.0;
821 if (dv->moments != NULL)
822 moments_clear (dv->moments);
826 dsc->missing_listwise = 0.;
829 /* First pass to handle most of the work. */
831 for (; (c = casereader_read (pass1)) != NULL; case_unref (c))
833 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
837 double f = case_num (c, filter);
838 if (f == 0.0 || var_is_num_missing (filter, f))
842 /* Check for missing values. */
843 if (listwise_missing (dsc, c))
845 dsc->missing_listwise += weight;
846 if (dsc->missing_type == DSC_LISTWISE)
849 dsc->valid += weight;
851 for (i = 0; i < dsc->n_vars; i++)
853 struct dsc_var *dv = &dsc->vars[i];
854 double x = case_num (c, dv->v);
856 if (var_is_num_missing (dv->v, x) & dsc->exclude)
858 dv->missing += weight;
862 if (dv->moments != NULL)
863 moments_pass_one (dv->moments, x, weight);
873 if (!casereader_destroy (pass1))
875 casereader_destroy (pass2);
879 /* Second pass for higher-order moments. */
880 if (dsc->max_moment > MOMENT_MEAN)
882 for (; (c = casereader_read (pass2)) != NULL; case_unref (c))
884 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
888 double f = case_num (c, filter);
889 if (f == 0.0 || var_is_num_missing (filter, f))
893 /* Check for missing values. */
894 if (dsc->missing_type == DSC_LISTWISE && listwise_missing (dsc, c))
897 for (i = 0; i < dsc->n_vars; i++)
899 struct dsc_var *dv = &dsc->vars[i];
900 double x = case_num (c, dv->v);
902 if (var_is_num_missing (dv->v, x) & dsc->exclude)
905 if (dv->moments != NULL)
906 moments_pass_two (dv->moments, x, weight);
909 if (!casereader_destroy (pass2))
913 /* Calculate results. */
914 if (dsc->z_writer && count > 0)
916 c = case_create (casewriter_get_proto (dsc->z_writer));
918 *case_num_rw_idx (c, z_idx++) = count;
923 for (i = 0; i < dsc->n_vars; i++)
925 struct dsc_var *dv = &dsc->vars[i];
929 for (j = 0; j < DSC_N_STATS; j++)
930 dv->stats[j] = SYSMIS;
932 dv->valid = W = dsc->valid - dv->missing;
934 if (dv->moments != NULL)
935 moments_calculate (dv->moments, NULL,
936 &dv->stats[DSC_MEAN], &dv->stats[DSC_VARIANCE],
937 &dv->stats[DSC_SKEWNESS], &dv->stats[DSC_KURTOSIS]);
938 if (dsc->calc_stats & (1ul << DSC_SEMEAN)
939 && dv->stats[DSC_VARIANCE] != SYSMIS && W > 0.)
940 dv->stats[DSC_SEMEAN] = sqrt (dv->stats[DSC_VARIANCE]) / sqrt (W);
941 if (dsc->calc_stats & (1ul << DSC_STDDEV)
942 && dv->stats[DSC_VARIANCE] != SYSMIS)
943 dv->stats[DSC_STDDEV] = sqrt (dv->stats[DSC_VARIANCE]);
944 if (dsc->calc_stats & (1ul << DSC_SEKURT))
945 if (dv->stats[DSC_KURTOSIS] != SYSMIS)
946 dv->stats[DSC_SEKURT] = calc_sekurt (W);
947 if (dsc->calc_stats & (1ul << DSC_SESKEW)
948 && dv->stats[DSC_SKEWNESS] != SYSMIS)
949 dv->stats[DSC_SESKEW] = calc_seskew (W);
950 dv->stats[DSC_RANGE] = ((dv->min == DBL_MAX || dv->max == -DBL_MAX)
951 ? SYSMIS : dv->max - dv->min);
952 dv->stats[DSC_MIN] = dv->min == DBL_MAX ? SYSMIS : dv->min;
953 dv->stats[DSC_MAX] = dv->max == -DBL_MAX ? SYSMIS : dv->max;
954 if (dsc->calc_stats & (1ul << DSC_SUM))
955 dv->stats[DSC_SUM] = W * dv->stats[DSC_MEAN];
957 if (dv->z_name && c != NULL)
959 *case_num_rw_idx (c, z_idx++) = dv->stats[DSC_MEAN];
960 *case_num_rw_idx (c, z_idx++) = dv->stats[DSC_STDDEV];
965 casewriter_write (dsc->z_writer, c);
967 /* Output results. */
971 /* Returns true if any of the descriptives variables in DSC's
972 variable list have missing values in case C, false otherwise. */
974 listwise_missing (struct dsc_proc *dsc, const struct ccase *c)
978 for (i = 0; i < dsc->n_vars; i++)
980 struct dsc_var *dv = &dsc->vars[i];
981 double x = case_num (c, dv->v);
983 if (var_is_num_missing (dv->v, x) & dsc->exclude)
989 /* Statistical display. */
991 static algo_compare_func descriptives_compare_dsc_vars;
993 /* Displays a table of descriptive statistics for DSC. */
995 display (struct dsc_proc *dsc)
997 struct pivot_table *table = pivot_table_create (
998 N_("Descriptive Statistics"));
999 pivot_table_set_weight_var (table, dict_get_weight (dsc->dict));
1001 struct pivot_dimension *statistics = pivot_dimension_create (
1002 table, PIVOT_AXIS_COLUMN, N_("Statistics"));
1003 pivot_category_create_leaf_rc (
1004 statistics->root, pivot_value_new_text (N_("N")), PIVOT_RC_COUNT);
1005 for (int i = 0; i < DSC_N_STATS; i++)
1006 if (dsc->show_stats & (1ul << i))
1007 pivot_category_create_leaf (statistics->root,
1008 pivot_value_new_text (dsc_info[i].name));
1010 if (dsc->sort_by_stat != DSC_NONE)
1011 sort (dsc->vars, dsc->n_vars, sizeof *dsc->vars,
1012 descriptives_compare_dsc_vars, dsc);
1014 struct pivot_dimension *variables = pivot_dimension_create (
1015 table, PIVOT_AXIS_ROW, N_("Variable"));
1016 for (size_t i = 0; i < dsc->n_vars; i++)
1018 const struct dsc_var *dv = &dsc->vars[i];
1020 int row = pivot_category_create_leaf (variables->root,
1021 pivot_value_new_variable (dv->v));
1024 pivot_table_put2 (table, column++, row,
1025 pivot_value_new_number (dv->valid));
1027 for (int j = 0; j < DSC_N_STATS; j++)
1028 if (dsc->show_stats & (1ul << j))
1030 union value v = { .f = dv->stats[j] };
1031 struct pivot_value *pv = (j == DSC_MIN || j == DSC_MAX
1032 ? pivot_value_new_var_value (dv->v, &v)
1033 : pivot_value_new_number (dv->stats[j]));
1034 pivot_table_put2 (table, column++, row, pv);
1038 int row = pivot_category_create_leaves (
1039 variables->root, N_("Valid N (listwise)"), N_("Missing N (listwise)"));
1040 pivot_table_put2 (table, 0, row, pivot_value_new_number (dsc->valid));
1041 pivot_table_put2 (table, 0, row + 1,
1042 pivot_value_new_number (dsc->missing_listwise));
1043 pivot_table_submit (table);
1046 /* Compares `struct dsc_var's A and B according to the ordering
1047 specified by CMD. */
1049 descriptives_compare_dsc_vars (const void *a_, const void *b_, const void *dsc_)
1051 const struct dsc_var *a = a_;
1052 const struct dsc_var *b = b_;
1053 const struct dsc_proc *dsc = dsc_;
1057 if (dsc->sort_by_stat == DSC_NAME)
1058 result = utf8_strcasecmp (var_get_name (a->v), var_get_name (b->v));
1061 double as = a->stats[dsc->sort_by_stat];
1062 double bs = b->stats[dsc->sort_by_stat];
1064 result = as < bs ? -1 : as > bs;
1067 if (!dsc->sort_ascending)