1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-2000, 2009-2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
24 #include "data/casegrouper.h"
25 #include "data/casereader.h"
26 #include "data/casewriter.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/transformations.h"
30 #include "data/variable.h"
31 #include "language/command.h"
32 #include "language/dictionary/split-file.h"
33 #include "language/lexer/lexer.h"
34 #include "language/lexer/variable-parser.h"
35 #include "libpspp/array.h"
36 #include "libpspp/assertion.h"
37 #include "libpspp/compiler.h"
38 #include "libpspp/i18n.h"
39 #include "libpspp/message.h"
40 #include "math/moments.h"
41 #include "output/tab.h"
43 #include "gl/xalloc.h"
46 #define _(msgid) gettext (msgid)
47 #define N_(msgid) msgid
49 /* DESCRIPTIVES private data. */
53 /* Handling of missing values. */
56 DSC_VARIABLE, /* Handle missing values on a per-variable basis. */
57 DSC_LISTWISE /* Discard entire case if any variable is missing. */
60 /* Describes properties of a distribution for the purpose of
61 calculating a Z-score. */
64 const struct variable *src_var; /* Variable on which z-score is based. */
65 struct variable *z_var; /* New z-score variable. */
66 double mean; /* Distribution mean. */
67 double std_dev; /* Distribution standard deviation. */
70 /* DESCRIPTIVES transformation (for calculating Z-scores). */
73 struct dsc_z_score *z_scores; /* Array of Z-scores. */
74 int z_score_cnt; /* Number of Z-scores. */
75 const struct variable **vars; /* Variables for listwise missing checks. */
76 size_t var_cnt; /* Number of variables. */
77 enum dsc_missing_type missing_type; /* Treatment of missing values. */
78 enum mv_class exclude; /* Classes of missing values to exclude. */
79 struct variable *filter; /* Dictionary FILTER BY variable. */
80 struct casereader *z_reader; /* Reader for count, mean, stddev. */
81 casenumber count; /* Number left in this SPLIT FILE group.*/
85 /* Statistics. Used as bit indexes, so must be 32 or fewer. */
88 DSC_MEAN = 0, DSC_SEMEAN, DSC_STDDEV, DSC_VARIANCE, DSC_KURTOSIS,
89 DSC_SEKURT, DSC_SKEWNESS, DSC_SESKEW, DSC_RANGE, DSC_MIN,
90 DSC_MAX, DSC_SUM, DSC_N_STATS,
92 /* Only valid as sort criteria. */
93 DSC_NAME = -2, /* Sort by name. */
94 DSC_NONE = -1 /* Unsorted. */
97 /* Describes one statistic. */
98 struct dsc_statistic_info
100 const char *identifier; /* Identifier. */
101 const char *name; /* Full name. */
102 enum moment moment; /* Highest moment needed to calculate. */
105 /* Table of statistics, indexed by DSC_*. */
106 static const struct dsc_statistic_info dsc_info[DSC_N_STATS] =
108 {"MEAN", N_("Mean"), MOMENT_MEAN},
109 {"SEMEAN", N_("S.E. Mean"), MOMENT_VARIANCE},
110 {"STDDEV", N_("Std Dev"), MOMENT_VARIANCE},
111 {"VARIANCE", N_("Variance"), MOMENT_VARIANCE},
112 {"KURTOSIS", N_("Kurtosis"), MOMENT_KURTOSIS},
113 {"SEKURTOSIS", N_("S.E. Kurt"), MOMENT_NONE},
114 {"SKEWNESS", N_("Skewness"), MOMENT_SKEWNESS},
115 {"SESKEWNESS", N_("S.E. Skew"), MOMENT_NONE},
116 {"RANGE", N_("Range"), MOMENT_NONE},
117 {"MINIMUM", N_("Minimum"), MOMENT_NONE},
118 {"MAXIMUM", N_("Maximum"), MOMENT_NONE},
119 {"SUM", N_("Sum"), MOMENT_MEAN},
122 /* Statistics calculated by default if none are explicitly
124 #define DEFAULT_STATS \
125 ((1ul << DSC_MEAN) | (1ul << DSC_STDDEV) | (1ul << DSC_MIN) \
128 /* A variable specified on DESCRIPTIVES. */
131 const struct variable *v; /* Variable to calculate on. */
132 char *z_name; /* Name for z-score variable. */
133 double valid, missing; /* Valid, missing counts. */
134 struct moments *moments; /* Moments. */
135 double min, max; /* Maximum and mimimum values. */
136 double stats[DSC_N_STATS]; /* All the stats' values. */
142 DSC_LINE, /* Abbreviated format. */
143 DSC_SERIAL /* Long format. */
146 /* A DESCRIPTIVES procedure. */
149 /* Per-variable info. */
150 struct dsc_var *vars; /* Variables. */
151 size_t var_cnt; /* Number of variables. */
154 enum dsc_missing_type missing_type; /* Treatment of missing values. */
155 enum mv_class exclude; /* Classes of missing values to exclude. */
156 int show_var_labels; /* Nonzero to show variable labels. */
157 int show_index; /* Nonzero to show variable index. */
158 enum dsc_format format; /* Output format. */
160 /* Accumulated results. */
161 double missing_listwise; /* Sum of weights of cases missing listwise. */
162 double valid; /* Sum of weights of valid cases. */
163 bool bad_warn; /* Warn if bad weight found. */
164 enum dsc_statistic sort_by_stat; /* Statistic to sort by; -1: name. */
165 int sort_ascending; /* !0: ascending order; 0: descending. */
166 unsigned long show_stats; /* Statistics to display. */
167 unsigned long calc_stats; /* Statistics to calculate. */
168 enum moment max_moment; /* Highest moment needed for stats. */
171 struct casewriter *z_writer; /* Mean and stddev per SPLIT FILE group. */
175 static enum dsc_statistic match_statistic (struct lexer *);
176 static void free_dsc_proc (struct dsc_proc *);
178 /* Z-score functions. */
179 static bool try_name (const struct dictionary *dict,
180 struct dsc_proc *dsc, const char *name);
181 static char *generate_z_varname (const struct dictionary *dict,
182 struct dsc_proc *dsc,
183 const char *name, int *z_cnt);
184 static void dump_z_table (struct dsc_proc *);
185 static void setup_z_trns (struct dsc_proc *, struct dataset *);
187 /* Procedure execution functions. */
188 static void calc_descriptives (struct dsc_proc *, struct casereader *,
190 static void display (struct dsc_proc *dsc);
192 /* Parser and outline. */
194 /* Handles DESCRIPTIVES. */
196 cmd_descriptives (struct lexer *lexer, struct dataset *ds)
198 struct dictionary *dict = dataset_dict (ds);
199 struct dsc_proc *dsc;
200 const struct variable **vars = NULL;
202 int save_z_scores = 0;
207 struct casegrouper *grouper;
208 struct casereader *group;
210 /* Create and initialize dsc. */
211 dsc = xmalloc (sizeof *dsc);
214 dsc->missing_type = DSC_VARIABLE;
215 dsc->exclude = MV_ANY;
216 dsc->show_var_labels = 1;
218 dsc->format = DSC_LINE;
219 dsc->missing_listwise = 0.;
222 dsc->sort_by_stat = DSC_NONE;
223 dsc->sort_ascending = 1;
224 dsc->show_stats = dsc->calc_stats = DEFAULT_STATS;
225 dsc->z_writer = NULL;
227 /* Parse DESCRIPTIVES. */
228 while (lex_token (lexer) != T_ENDCMD)
230 if (lex_match_id (lexer, "MISSING"))
232 lex_match (lexer, T_EQUALS);
233 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
235 if (lex_match_id (lexer, "VARIABLE"))
236 dsc->missing_type = DSC_VARIABLE;
237 else if (lex_match_id (lexer, "LISTWISE"))
238 dsc->missing_type = DSC_LISTWISE;
239 else if (lex_match_id (lexer, "INCLUDE"))
240 dsc->exclude = MV_SYSTEM;
243 lex_error (lexer, NULL);
246 lex_match (lexer, T_COMMA);
249 else if (lex_match_id (lexer, "SAVE"))
251 else if (lex_match_id (lexer, "FORMAT"))
253 lex_match (lexer, T_EQUALS);
254 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
256 if (lex_match_id (lexer, "LABELS"))
257 dsc->show_var_labels = 1;
258 else if (lex_match_id (lexer, "NOLABELS"))
259 dsc->show_var_labels = 0;
260 else if (lex_match_id (lexer, "INDEX"))
262 else if (lex_match_id (lexer, "NOINDEX"))
264 else if (lex_match_id (lexer, "LINE"))
265 dsc->format = DSC_LINE;
266 else if (lex_match_id (lexer, "SERIAL"))
267 dsc->format = DSC_SERIAL;
270 lex_error (lexer, NULL);
273 lex_match (lexer, T_COMMA);
276 else if (lex_match_id (lexer, "STATISTICS"))
278 lex_match (lexer, T_EQUALS);
280 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
282 if (lex_match (lexer, T_ALL))
283 dsc->show_stats |= (1ul << DSC_N_STATS) - 1;
284 else if (lex_match_id (lexer, "DEFAULT"))
285 dsc->show_stats |= DEFAULT_STATS;
287 dsc->show_stats |= 1ul << (match_statistic (lexer));
288 lex_match (lexer, T_COMMA);
290 if (dsc->show_stats == 0)
291 dsc->show_stats = DEFAULT_STATS;
293 else if (lex_match_id (lexer, "SORT"))
295 lex_match (lexer, T_EQUALS);
296 if (lex_match_id (lexer, "NAME"))
297 dsc->sort_by_stat = DSC_NAME;
300 dsc->sort_by_stat = match_statistic (lexer);
301 if (dsc->sort_by_stat == DSC_NONE )
302 dsc->sort_by_stat = DSC_MEAN;
304 if (lex_match (lexer, T_LPAREN))
306 if (lex_match_id (lexer, "A"))
307 dsc->sort_ascending = 1;
308 else if (lex_match_id (lexer, "D"))
309 dsc->sort_ascending = 0;
311 lex_error (lexer, NULL);
312 lex_force_match (lexer, T_RPAREN);
315 else if (var_cnt == 0)
317 if (lex_next_token (lexer, 1) == T_EQUALS)
319 lex_match_id (lexer, "VARIABLES");
320 lex_match (lexer, T_EQUALS);
323 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
327 if (!parse_variables_const (lexer, dict, &vars, &var_cnt,
328 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
331 dsc->vars = xnrealloc ((void *)dsc->vars, var_cnt, sizeof *dsc->vars);
332 for (i = dsc->var_cnt; i < var_cnt; i++)
334 struct dsc_var *dv = &dsc->vars[i];
339 dsc->var_cnt = var_cnt;
341 if (lex_match (lexer, T_LPAREN))
343 if (lex_token (lexer) != T_ID)
345 lex_error (lexer, NULL);
348 if (try_name (dict, dsc, lex_tokcstr (lexer)))
350 struct dsc_var *dsc_var = &dsc->vars[dsc->var_cnt - 1];
351 dsc_var->z_name = xstrdup (lex_tokcstr (lexer));
355 msg (SE, _("Z-score variable name %s would be"
356 " a duplicate variable name."), lex_tokcstr (lexer));
358 if (!lex_force_match (lexer, T_RPAREN))
365 lex_error (lexer, NULL);
369 lex_match (lexer, T_SLASH);
373 msg (SE, _("No variables specified."));
377 /* Construct z-score varnames, show translation table. */
378 if (z_cnt || save_z_scores)
380 struct caseproto *proto;
386 for (i = 0; i < dsc->var_cnt; i++)
388 struct dsc_var *dsc_var = &dsc->vars[i];
389 if (dsc_var->z_name == NULL)
391 const char *name = var_get_name (dsc_var->v);
392 dsc_var->z_name = generate_z_varname (dict, dsc, name,
394 if (dsc_var->z_name == NULL)
402 /* It would be better to handle Z scores correctly (however we define
403 that) when TEMPORARY is in effect, but in the meantime this at least
404 prevents a use-after-free error. See bug #38786. */
405 if (proc_make_temporary_transformations_permanent (ds))
406 msg (SW, _("DESCRIPTIVES with Z scores ignores TEMPORARY. "
407 "Temporary transformations will be made permanent."));
409 proto = caseproto_create ();
410 for (i = 0; i < 1 + 2 * z_cnt; i++)
411 proto = caseproto_add_width (proto, 0);
412 dsc->z_writer = autopaging_writer_create (proto);
413 caseproto_unref (proto);
418 /* Figure out statistics to display. */
419 if (dsc->show_stats & (1ul << DSC_SKEWNESS))
420 dsc->show_stats |= 1ul << DSC_SESKEW;
421 if (dsc->show_stats & (1ul << DSC_KURTOSIS))
422 dsc->show_stats |= 1ul << DSC_SEKURT;
424 /* Figure out which statistics to calculate. */
425 dsc->calc_stats = dsc->show_stats;
427 dsc->calc_stats |= (1ul << DSC_MEAN) | (1ul << DSC_STDDEV);
428 if (dsc->sort_by_stat >= 0)
429 dsc->calc_stats |= 1ul << dsc->sort_by_stat;
430 if (dsc->show_stats & (1ul << DSC_SESKEW))
431 dsc->calc_stats |= 1ul << DSC_SKEWNESS;
432 if (dsc->show_stats & (1ul << DSC_SEKURT))
433 dsc->calc_stats |= 1ul << DSC_KURTOSIS;
435 /* Figure out maximum moment needed and allocate moments for
437 dsc->max_moment = MOMENT_NONE;
438 for (i = 0; i < DSC_N_STATS; i++)
439 if (dsc->calc_stats & (1ul << i) && dsc_info[i].moment > dsc->max_moment)
440 dsc->max_moment = dsc_info[i].moment;
441 if (dsc->max_moment != MOMENT_NONE)
442 for (i = 0; i < dsc->var_cnt; i++)
443 dsc->vars[i].moments = moments_create (dsc->max_moment);
446 grouper = casegrouper_create_splits (proc_open_filtering (ds, false), dict);
447 while (casegrouper_get_next_group (grouper, &group))
448 calc_descriptives (dsc, group, ds);
449 ok = casegrouper_destroy (grouper);
450 ok = proc_commit (ds) && ok;
454 setup_z_trns (dsc, ds);
459 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
467 /* Returns the statistic named by the current token and skips past the token.
468 Returns DSC_NONE if no statistic is given (e.g., subcommand with no
469 specifiers). Emits an error if the current token ID does not name a
471 static enum dsc_statistic
472 match_statistic (struct lexer *lexer)
474 if (lex_token (lexer) == T_ID)
476 enum dsc_statistic stat;
478 for (stat = 0; stat < DSC_N_STATS; stat++)
479 if (lex_match_id (lexer, dsc_info[stat].identifier))
483 lex_error (lexer, _("expecting statistic name: reverting to default"));
491 free_dsc_proc (struct dsc_proc *dsc)
498 for (i = 0; i < dsc->var_cnt; i++)
500 struct dsc_var *dsc_var = &dsc->vars[i];
501 free (dsc_var->z_name);
502 moments_destroy (dsc_var->moments);
504 casewriter_destroy (dsc->z_writer);
511 /* Returns false if NAME is a duplicate of any existing variable name or
512 of any previously-declared z-var name; otherwise returns true. */
514 try_name (const struct dictionary *dict, struct dsc_proc *dsc,
519 if (dict_lookup_var (dict, name) != NULL)
521 for (i = 0; i < dsc->var_cnt; i++)
523 struct dsc_var *dsc_var = &dsc->vars[i];
524 if (dsc_var->z_name != NULL && !utf8_strcasecmp (dsc_var->z_name, name))
530 /* Generates a name for a Z-score variable based on a variable
531 named VAR_NAME, given that *Z_CNT generated variable names are
532 known to already exist. If successful, returns the new name
533 as a dynamically allocated string. On failure, returns NULL. */
535 generate_z_varname (const struct dictionary *dict, struct dsc_proc *dsc,
536 const char *var_name, int *z_cnt)
538 char *z_name, *trunc_name;
540 /* Try a name based on the original variable name. */
541 z_name = xasprintf ("Z%s", var_name);
542 trunc_name = utf8_encoding_trunc (z_name, dict_get_encoding (dict),
545 if (try_name (dict, dsc, trunc_name))
549 /* Generate a synthetic name. */
557 sprintf (name, "ZSC%03d", *z_cnt);
558 else if (*z_cnt <= 108)
559 sprintf (name, "STDZ%02d", *z_cnt - 99);
560 else if (*z_cnt <= 117)
561 sprintf (name, "ZZZZ%02d", *z_cnt - 108);
562 else if (*z_cnt <= 126)
563 sprintf (name, "ZQZQ%02d", *z_cnt - 117);
566 msg (SE, _("Ran out of generic names for Z-score variables. "
567 "There are only 126 generic names: ZSC001-ZSC0999, "
568 "STDZ01-STDZ09, ZZZZ01-ZZZZ09, ZQZQ01-ZQZQ09."));
572 if (try_name (dict, dsc, name))
573 return xstrdup (name);
578 /* Outputs a table describing the mapping between source
579 variables and Z-score variables. */
581 dump_z_table (struct dsc_proc *dsc)
589 for (i = 0; i < dsc->var_cnt; i++)
590 if (dsc->vars[i].z_name != NULL)
594 t = tab_create (2, cnt + 1);
595 tab_title (t, _("Mapping of variables to corresponding Z-scores."));
596 tab_headers (t, 0, 0, 1, 0);
597 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, cnt);
598 tab_hline (t, TAL_2, 0, 1, 1);
599 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Source"));
600 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Target"));
605 for (i = 0, y = 1; i < dsc->var_cnt; i++)
606 if (dsc->vars[i].z_name != NULL)
608 tab_text (t, 0, y, TAB_LEFT, var_to_string (dsc->vars[i].v));
609 tab_text (t, 1, y++, TAB_LEFT, dsc->vars[i].z_name);
617 descriptives_set_all_sysmis_zscores (const struct dsc_trns *t, struct ccase *c)
619 const struct dsc_z_score *z;
621 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
622 case_data_rw (c, z->z_var)->f = SYSMIS;
625 /* Transformation function to calculate Z-scores. Will return SYSMIS if any of
626 the following are true: 1) mean or standard deviation is SYSMIS 2) score is
627 SYSMIS 3) score is user missing and they were not included in the original
628 analyis. 4) any of the variables in the original analysis were missing
629 (either system or user-missing values that weren't included).
632 descriptives_trns_proc (void *trns_, struct ccase **c,
633 casenumber case_idx UNUSED)
635 struct dsc_trns *t = trns_;
636 struct dsc_z_score *z;
637 const struct variable **vars;
639 *c = case_unshare (*c);
643 double f = case_num (*c, t->filter);
644 if (f == 0.0 || var_is_num_missing (t->filter, f, MV_ANY))
646 descriptives_set_all_sysmis_zscores (t, *c);
647 return TRNS_CONTINUE;
653 struct ccase *z_case;
655 z_case = casereader_read (t->z_reader);
660 t->count = case_num_idx (z_case, z_idx++);
661 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
663 z->mean = case_num_idx (z_case, z_idx++);
664 z->std_dev = case_num_idx (z_case, z_idx++);
672 msg (SE, _("Internal error processing Z scores"));
675 descriptives_set_all_sysmis_zscores (t, *c);
676 return TRNS_CONTINUE;
681 if (t->missing_type == DSC_LISTWISE)
684 for (vars = t->vars; vars < t->vars + t->var_cnt; vars++)
686 double score = case_num (*c, *vars);
687 if (var_is_num_missing (*vars, score, t->exclude))
689 descriptives_set_all_sysmis_zscores (t, *c);
690 return TRNS_CONTINUE;
695 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
697 double input = case_num (*c, z->src_var);
698 double *output = &case_data_rw (*c, z->z_var)->f;
700 if (z->mean == SYSMIS || z->std_dev == SYSMIS
701 || var_is_num_missing (z->src_var, input, t->exclude))
704 *output = (input - z->mean) / z->std_dev;
706 return TRNS_CONTINUE;
709 /* Frees a descriptives_trns struct. */
711 descriptives_trns_free (void *trns_)
713 struct dsc_trns *t = trns_;
714 bool ok = t->ok && !casereader_error (t->z_reader);
717 casereader_destroy (t->z_reader);
718 assert((t->missing_type != DSC_LISTWISE) ^ (t->vars != NULL));
725 /* Sets up a transformation to calculate Z scores. */
727 setup_z_trns (struct dsc_proc *dsc, struct dataset *ds)
732 for (cnt = i = 0; i < dsc->var_cnt; i++)
733 if (dsc->vars[i].z_name != NULL)
736 t = xmalloc (sizeof *t);
737 t->z_scores = xnmalloc (cnt, sizeof *t->z_scores);
738 t->z_score_cnt = cnt;
739 t->missing_type = dsc->missing_type;
740 t->exclude = dsc->exclude;
741 if ( t->missing_type == DSC_LISTWISE )
743 t->var_cnt = dsc->var_cnt;
744 t->vars = xnmalloc (t->var_cnt, sizeof *t->vars);
745 for (i = 0; i < t->var_cnt; i++)
746 t->vars[i] = dsc->vars[i].v;
753 t->filter = dict_get_filter (dataset_dict (ds));
754 t->z_reader = casewriter_make_reader (dsc->z_writer);
757 dsc->z_writer = NULL;
759 for (cnt = i = 0; i < dsc->var_cnt; i++)
761 struct dsc_var *dv = &dsc->vars[i];
762 if (dv->z_name != NULL)
764 struct dsc_z_score *z;
765 struct variable *dst_var;
768 dst_var = dict_create_var_assert (dataset_dict (ds), dv->z_name, 0);
770 label = xasprintf (_("Z-score of %s"),var_to_string (dv->v));
771 var_set_label (dst_var, label);
774 z = &t->z_scores[cnt++];
780 add_transformation (ds,
781 descriptives_trns_proc, descriptives_trns_free, t);
784 /* Statistical calculation. */
786 static bool listwise_missing (struct dsc_proc *dsc, const struct ccase *c);
788 /* Calculates and displays descriptive statistics for the cases
791 calc_descriptives (struct dsc_proc *dsc, struct casereader *group,
794 struct variable *filter = dict_get_filter (dataset_dict (ds));
795 struct casereader *pass1, *pass2;
801 c = casereader_peek (group, 0);
804 casereader_destroy (group);
807 output_split_file_values (ds, c);
810 group = casereader_create_filter_weight (group, dataset_dict (ds),
814 pass2 = dsc->max_moment <= MOMENT_MEAN ? NULL : casereader_clone (pass1);
816 for (i = 0; i < dsc->var_cnt; i++)
818 struct dsc_var *dv = &dsc->vars[i];
820 dv->valid = dv->missing = 0.0;
821 if (dv->moments != NULL)
822 moments_clear (dv->moments);
826 dsc->missing_listwise = 0.;
829 /* First pass to handle most of the work. */
831 for (; (c = casereader_read (pass1)) != NULL; case_unref (c))
833 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
837 double f = case_num (c, filter);
838 if (f == 0.0 || var_is_num_missing (filter, f, MV_ANY))
842 /* Check for missing values. */
843 if (listwise_missing (dsc, c))
845 dsc->missing_listwise += weight;
846 if (dsc->missing_type == DSC_LISTWISE)
849 dsc->valid += weight;
851 for (i = 0; i < dsc->var_cnt; i++)
853 struct dsc_var *dv = &dsc->vars[i];
854 double x = case_num (c, dv->v);
856 if (var_is_num_missing (dv->v, x, dsc->exclude))
858 dv->missing += weight;
862 if (dv->moments != NULL)
863 moments_pass_one (dv->moments, x, weight);
873 if (!casereader_destroy (pass1))
875 casereader_destroy (pass2);
879 /* Second pass for higher-order moments. */
880 if (dsc->max_moment > MOMENT_MEAN)
882 for (; (c = casereader_read (pass2)) != NULL; case_unref (c))
884 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
888 double f = case_num (c, filter);
889 if (f == 0.0 || var_is_num_missing (filter, f, MV_ANY))
893 /* Check for missing values. */
894 if (dsc->missing_type == DSC_LISTWISE && listwise_missing (dsc, c))
897 for (i = 0; i < dsc->var_cnt; i++)
899 struct dsc_var *dv = &dsc->vars[i];
900 double x = case_num (c, dv->v);
902 if (var_is_num_missing (dv->v, x, dsc->exclude))
905 if (dv->moments != NULL)
906 moments_pass_two (dv->moments, x, weight);
909 if (!casereader_destroy (pass2))
913 /* Calculate results. */
914 if (dsc->z_writer && count > 0)
916 c = case_create (casewriter_get_proto (dsc->z_writer));
918 case_data_rw_idx (c, z_idx++)->f = count;
923 for (i = 0; i < dsc->var_cnt; i++)
925 struct dsc_var *dv = &dsc->vars[i];
929 for (j = 0; j < DSC_N_STATS; j++)
930 dv->stats[j] = SYSMIS;
932 dv->valid = W = dsc->valid - dv->missing;
934 if (dv->moments != NULL)
935 moments_calculate (dv->moments, NULL,
936 &dv->stats[DSC_MEAN], &dv->stats[DSC_VARIANCE],
937 &dv->stats[DSC_SKEWNESS], &dv->stats[DSC_KURTOSIS]);
938 if (dsc->calc_stats & (1ul << DSC_SEMEAN)
939 && dv->stats[DSC_VARIANCE] != SYSMIS && W > 0.)
940 dv->stats[DSC_SEMEAN] = sqrt (dv->stats[DSC_VARIANCE]) / sqrt (W);
941 if (dsc->calc_stats & (1ul << DSC_STDDEV)
942 && dv->stats[DSC_VARIANCE] != SYSMIS)
943 dv->stats[DSC_STDDEV] = sqrt (dv->stats[DSC_VARIANCE]);
944 if (dsc->calc_stats & (1ul << DSC_SEKURT))
945 if (dv->stats[DSC_KURTOSIS] != SYSMIS)
946 dv->stats[DSC_SEKURT] = calc_sekurt (W);
947 if (dsc->calc_stats & (1ul << DSC_SESKEW)
948 && dv->stats[DSC_SKEWNESS] != SYSMIS)
949 dv->stats[DSC_SESKEW] = calc_seskew (W);
950 dv->stats[DSC_RANGE] = ((dv->min == DBL_MAX || dv->max == -DBL_MAX)
951 ? SYSMIS : dv->max - dv->min);
952 dv->stats[DSC_MIN] = dv->min == DBL_MAX ? SYSMIS : dv->min;
953 dv->stats[DSC_MAX] = dv->max == -DBL_MAX ? SYSMIS : dv->max;
954 if (dsc->calc_stats & (1ul << DSC_SUM))
955 dv->stats[DSC_SUM] = W * dv->stats[DSC_MEAN];
957 if (dv->z_name && c != NULL)
959 case_data_rw_idx (c, z_idx++)->f = dv->stats[DSC_MEAN];
960 case_data_rw_idx (c, z_idx++)->f = dv->stats[DSC_STDDEV];
965 casewriter_write (dsc->z_writer, c);
967 /* Output results. */
971 /* Returns true if any of the descriptives variables in DSC's
972 variable list have missing values in case C, false otherwise. */
974 listwise_missing (struct dsc_proc *dsc, const struct ccase *c)
978 for (i = 0; i < dsc->var_cnt; i++)
980 struct dsc_var *dv = &dsc->vars[i];
981 double x = case_num (c, dv->v);
983 if (var_is_num_missing (dv->v, x, dsc->exclude))
989 /* Statistical display. */
991 static algo_compare_func descriptives_compare_dsc_vars;
993 /* Displays a table of descriptive statistics for DSC. */
995 display (struct dsc_proc *dsc)
1001 nc = 1 + (dsc->format == DSC_SERIAL ? 2 : 1);
1002 for (i = 0; i < DSC_N_STATS; i++)
1003 if (dsc->show_stats & (1ul << i))
1006 if (dsc->sort_by_stat != DSC_NONE)
1007 sort (dsc->vars, dsc->var_cnt, sizeof *dsc->vars,
1008 descriptives_compare_dsc_vars, dsc);
1010 t = tab_create (nc, dsc->var_cnt + 1);
1011 tab_headers (t, 1, 0, 1, 0);
1012 tab_box (t, TAL_1, TAL_1, -1, -1, 0, 0, nc - 1, dsc->var_cnt);
1013 tab_box (t, -1, -1, -1, TAL_1, 1, 0, nc - 1, dsc->var_cnt);
1014 tab_hline (t, TAL_2, 0, nc - 1, 1);
1015 tab_vline (t, TAL_2, 1, 0, dsc->var_cnt);
1018 tab_text (t, nc++, 0, TAB_LEFT | TAT_TITLE, _("Variable"));
1019 if (dsc->format == DSC_SERIAL)
1021 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Valid N"));
1022 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Missing N"));
1025 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, "N");
1027 for (i = 0; i < DSC_N_STATS; i++)
1028 if (dsc->show_stats & (1ul << i))
1030 const char *title = gettext (dsc_info[i].name);
1031 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, title);
1034 for (i = 0; i < dsc->var_cnt; i++)
1036 struct dsc_var *dv = &dsc->vars[i];
1040 tab_text (t, nc++, i + 1, TAB_LEFT, var_to_string (dv->v));
1041 tab_text_format (t, nc++, i + 1, 0, "%.*g", DBL_DIG + 1, dv->valid);
1042 if (dsc->format == DSC_SERIAL)
1043 tab_text_format (t, nc++, i + 1, 0, "%.*g", DBL_DIG + 1, dv->missing);
1045 for (j = 0; j < DSC_N_STATS; j++)
1046 if (dsc->show_stats & (1ul << j))
1047 tab_double (t, nc++, i + 1, TAB_NONE, dv->stats[j], NULL, RC_OTHER);
1050 tab_title (t, _("Valid cases = %.*g; cases with missing value(s) = %.*g."),
1051 DBL_DIG + 1, dsc->valid,
1052 DBL_DIG + 1, dsc->missing_listwise);
1057 /* Compares `struct dsc_var's A and B according to the ordering
1058 specified by CMD. */
1060 descriptives_compare_dsc_vars (const void *a_, const void *b_, const void *dsc_)
1062 const struct dsc_var *a = a_;
1063 const struct dsc_var *b = b_;
1064 const struct dsc_proc *dsc = dsc_;
1068 if (dsc->sort_by_stat == DSC_NAME)
1069 result = utf8_strcasecmp (var_get_name (a->v), var_get_name (b->v));
1072 double as = a->stats[dsc->sort_by_stat];
1073 double bs = b->stats[dsc->sort_by_stat];
1075 result = as < bs ? -1 : as > bs;
1078 if (!dsc->sort_ascending)