1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-2000, 2009-2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
24 #include "data/casegrouper.h"
25 #include "data/casereader.h"
26 #include "data/casewriter.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/transformations.h"
30 #include "data/variable.h"
31 #include "language/command.h"
32 #include "language/dictionary/split-file.h"
33 #include "language/lexer/lexer.h"
34 #include "language/lexer/variable-parser.h"
35 #include "libpspp/array.h"
36 #include "libpspp/assertion.h"
37 #include "libpspp/compiler.h"
38 #include "libpspp/i18n.h"
39 #include "libpspp/message.h"
40 #include "math/moments.h"
41 #include "output/tab.h"
43 #include "gl/xalloc.h"
46 #define _(msgid) gettext (msgid)
47 #define N_(msgid) msgid
49 /* DESCRIPTIVES private data. */
53 /* Handling of missing values. */
56 DSC_VARIABLE, /* Handle missing values on a per-variable basis. */
57 DSC_LISTWISE /* Discard entire case if any variable is missing. */
60 /* Describes properties of a distribution for the purpose of
61 calculating a Z-score. */
64 const struct variable *src_var; /* Variable on which z-score is based. */
65 struct variable *z_var; /* New z-score variable. */
66 double mean; /* Distribution mean. */
67 double std_dev; /* Distribution standard deviation. */
70 /* DESCRIPTIVES transformation (for calculating Z-scores). */
73 struct dsc_z_score *z_scores; /* Array of Z-scores. */
74 int z_score_cnt; /* Number of Z-scores. */
75 const struct variable **vars; /* Variables for listwise missing checks. */
76 size_t var_cnt; /* Number of variables. */
77 enum dsc_missing_type missing_type; /* Treatment of missing values. */
78 enum mv_class exclude; /* Classes of missing values to exclude. */
79 struct variable *filter; /* Dictionary FILTER BY variable. */
80 struct casereader *z_reader; /* Reader for count, mean, stddev. */
81 casenumber count; /* Number left in this SPLIT FILE group.*/
85 /* Statistics. Used as bit indexes, so must be 32 or fewer. */
88 DSC_MEAN = 0, DSC_SEMEAN, DSC_STDDEV, DSC_VARIANCE, DSC_KURTOSIS,
89 DSC_SEKURT, DSC_SKEWNESS, DSC_SESKEW, DSC_RANGE, DSC_MIN,
90 DSC_MAX, DSC_SUM, DSC_N_STATS,
92 /* Only valid as sort criteria. */
93 DSC_NAME = -2, /* Sort by name. */
94 DSC_NONE = -1 /* Unsorted. */
97 /* Describes one statistic. */
98 struct dsc_statistic_info
100 const char *identifier; /* Identifier. */
101 const char *name; /* Full name. */
102 enum moment moment; /* Highest moment needed to calculate. */
105 /* Table of statistics, indexed by DSC_*. */
106 static const struct dsc_statistic_info dsc_info[DSC_N_STATS] =
108 {"MEAN", N_("Mean"), MOMENT_MEAN},
109 {"SEMEAN", N_("S.E. Mean"), MOMENT_VARIANCE},
110 {"STDDEV", N_("Std Dev"), MOMENT_VARIANCE},
111 {"VARIANCE", N_("Variance"), MOMENT_VARIANCE},
112 {"KURTOSIS", N_("Kurtosis"), MOMENT_KURTOSIS},
113 {"SEKURTOSIS", N_("S.E. Kurt"), MOMENT_NONE},
114 {"SKEWNESS", N_("Skewness"), MOMENT_SKEWNESS},
115 {"SESKEWNESS", N_("S.E. Skew"), MOMENT_NONE},
116 {"RANGE", N_("Range"), MOMENT_NONE},
117 {"MINIMUM", N_("Minimum"), MOMENT_NONE},
118 {"MAXIMUM", N_("Maximum"), MOMENT_NONE},
119 {"SUM", N_("Sum"), MOMENT_MEAN},
122 /* Statistics calculated by default if none are explicitly
124 #define DEFAULT_STATS \
125 ((1ul << DSC_MEAN) | (1ul << DSC_STDDEV) | (1ul << DSC_MIN) \
128 /* A variable specified on DESCRIPTIVES. */
131 const struct variable *v; /* Variable to calculate on. */
132 char *z_name; /* Name for z-score variable. */
133 double valid, missing; /* Valid, missing counts. */
134 struct moments *moments; /* Moments. */
135 double min, max; /* Maximum and mimimum values. */
136 double stats[DSC_N_STATS]; /* All the stats' values. */
142 DSC_LINE, /* Abbreviated format. */
143 DSC_SERIAL /* Long format. */
146 /* A DESCRIPTIVES procedure. */
149 /* Per-variable info. */
150 struct dsc_var *vars; /* Variables. */
151 size_t var_cnt; /* Number of variables. */
154 enum dsc_missing_type missing_type; /* Treatment of missing values. */
155 enum mv_class exclude; /* Classes of missing values to exclude. */
156 int show_var_labels; /* Nonzero to show variable labels. */
157 int show_index; /* Nonzero to show variable index. */
158 enum dsc_format format; /* Output format. */
160 /* Accumulated results. */
161 double missing_listwise; /* Sum of weights of cases missing listwise. */
162 double valid; /* Sum of weights of valid cases. */
163 bool bad_warn; /* Warn if bad weight found. */
164 enum dsc_statistic sort_by_stat; /* Statistic to sort by; -1: name. */
165 int sort_ascending; /* !0: ascending order; 0: descending. */
166 unsigned long show_stats; /* Statistics to display. */
167 unsigned long calc_stats; /* Statistics to calculate. */
168 enum moment max_moment; /* Highest moment needed for stats. */
171 struct casewriter *z_writer; /* Mean and stddev per SPLIT FILE group. */
175 static enum dsc_statistic match_statistic (struct lexer *);
176 static void free_dsc_proc (struct dsc_proc *);
178 /* Z-score functions. */
179 static bool try_name (const struct dictionary *dict,
180 struct dsc_proc *dsc, const char *name);
181 static char *generate_z_varname (const struct dictionary *dict,
182 struct dsc_proc *dsc,
183 const char *name, int *z_cnt);
184 static void dump_z_table (struct dsc_proc *);
185 static void setup_z_trns (struct dsc_proc *, struct dataset *);
187 /* Procedure execution functions. */
188 static void calc_descriptives (struct dsc_proc *, struct casereader *,
190 static void display (struct dsc_proc *dsc);
192 /* Parser and outline. */
194 /* Handles DESCRIPTIVES. */
196 cmd_descriptives (struct lexer *lexer, struct dataset *ds)
198 struct dictionary *dict = dataset_dict (ds);
199 struct dsc_proc *dsc;
200 const struct variable **vars = NULL;
202 int save_z_scores = 0;
207 struct casegrouper *grouper;
208 struct casereader *group;
210 /* Create and initialize dsc. */
211 dsc = xmalloc (sizeof *dsc);
214 dsc->missing_type = DSC_VARIABLE;
215 dsc->exclude = MV_ANY;
216 dsc->show_var_labels = 1;
218 dsc->format = DSC_LINE;
219 dsc->missing_listwise = 0.;
222 dsc->sort_by_stat = DSC_NONE;
223 dsc->sort_ascending = 1;
224 dsc->show_stats = dsc->calc_stats = DEFAULT_STATS;
225 dsc->z_writer = NULL;
227 /* Parse DESCRIPTIVES. */
228 while (lex_token (lexer) != T_ENDCMD)
230 if (lex_match_id (lexer, "MISSING"))
232 lex_match (lexer, T_EQUALS);
233 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
235 if (lex_match_id (lexer, "VARIABLE"))
236 dsc->missing_type = DSC_VARIABLE;
237 else if (lex_match_id (lexer, "LISTWISE"))
238 dsc->missing_type = DSC_LISTWISE;
239 else if (lex_match_id (lexer, "INCLUDE"))
240 dsc->exclude = MV_SYSTEM;
243 lex_error (lexer, NULL);
246 lex_match (lexer, T_COMMA);
249 else if (lex_match_id (lexer, "SAVE"))
251 else if (lex_match_id (lexer, "FORMAT"))
253 lex_match (lexer, T_EQUALS);
254 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
256 if (lex_match_id (lexer, "LABELS"))
257 dsc->show_var_labels = 1;
258 else if (lex_match_id (lexer, "NOLABELS"))
259 dsc->show_var_labels = 0;
260 else if (lex_match_id (lexer, "INDEX"))
262 else if (lex_match_id (lexer, "NOINDEX"))
264 else if (lex_match_id (lexer, "LINE"))
265 dsc->format = DSC_LINE;
266 else if (lex_match_id (lexer, "SERIAL"))
267 dsc->format = DSC_SERIAL;
270 lex_error (lexer, NULL);
273 lex_match (lexer, T_COMMA);
276 else if (lex_match_id (lexer, "STATISTICS"))
278 lex_match (lexer, T_EQUALS);
280 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
282 if (lex_match (lexer, T_ALL))
283 dsc->show_stats |= (1ul << DSC_N_STATS) - 1;
284 else if (lex_match_id (lexer, "DEFAULT"))
285 dsc->show_stats |= DEFAULT_STATS;
287 dsc->show_stats |= 1ul << (match_statistic (lexer));
288 lex_match (lexer, T_COMMA);
290 if (dsc->show_stats == 0)
291 dsc->show_stats = DEFAULT_STATS;
293 else if (lex_match_id (lexer, "SORT"))
295 lex_match (lexer, T_EQUALS);
296 if (lex_match_id (lexer, "NAME"))
297 dsc->sort_by_stat = DSC_NAME;
300 dsc->sort_by_stat = match_statistic (lexer);
301 if (dsc->sort_by_stat == DSC_NONE )
302 dsc->sort_by_stat = DSC_MEAN;
304 if (lex_match (lexer, T_LPAREN))
306 if (lex_match_id (lexer, "A"))
307 dsc->sort_ascending = 1;
308 else if (lex_match_id (lexer, "D"))
309 dsc->sort_ascending = 0;
311 lex_error (lexer, NULL);
312 if (! lex_force_match (lexer, T_RPAREN))
316 else if (var_cnt == 0)
318 if (lex_next_token (lexer, 1) == T_EQUALS)
320 lex_match_id (lexer, "VARIABLES");
321 lex_match (lexer, T_EQUALS);
324 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
328 if (!parse_variables_const (lexer, dict, &vars, &var_cnt,
329 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
332 dsc->vars = xnrealloc ((void *)dsc->vars, var_cnt, sizeof *dsc->vars);
333 for (i = dsc->var_cnt; i < var_cnt; i++)
335 struct dsc_var *dv = &dsc->vars[i];
340 dsc->var_cnt = var_cnt;
342 if (lex_match (lexer, T_LPAREN))
344 if (lex_token (lexer) != T_ID)
346 lex_error (lexer, NULL);
349 if (try_name (dict, dsc, lex_tokcstr (lexer)))
351 struct dsc_var *dsc_var = &dsc->vars[dsc->var_cnt - 1];
352 dsc_var->z_name = xstrdup (lex_tokcstr (lexer));
356 msg (SE, _("Z-score variable name %s would be"
357 " a duplicate variable name."), lex_tokcstr (lexer));
359 if (!lex_force_match (lexer, T_RPAREN))
366 lex_error (lexer, NULL);
370 lex_match (lexer, T_SLASH);
374 msg (SE, _("No variables specified."));
378 /* Construct z-score varnames, show translation table. */
379 if (z_cnt || save_z_scores)
381 struct caseproto *proto;
387 for (i = 0; i < dsc->var_cnt; i++)
389 struct dsc_var *dsc_var = &dsc->vars[i];
390 if (dsc_var->z_name == NULL)
392 const char *name = var_get_name (dsc_var->v);
393 dsc_var->z_name = generate_z_varname (dict, dsc, name,
395 if (dsc_var->z_name == NULL)
403 /* It would be better to handle Z scores correctly (however we define
404 that) when TEMPORARY is in effect, but in the meantime this at least
405 prevents a use-after-free error. See bug #38786. */
406 if (proc_make_temporary_transformations_permanent (ds))
407 msg (SW, _("DESCRIPTIVES with Z scores ignores TEMPORARY. "
408 "Temporary transformations will be made permanent."));
410 proto = caseproto_create ();
411 for (i = 0; i < 1 + 2 * z_cnt; i++)
412 proto = caseproto_add_width (proto, 0);
413 dsc->z_writer = autopaging_writer_create (proto);
414 caseproto_unref (proto);
419 /* Figure out statistics to display. */
420 if (dsc->show_stats & (1ul << DSC_SKEWNESS))
421 dsc->show_stats |= 1ul << DSC_SESKEW;
422 if (dsc->show_stats & (1ul << DSC_KURTOSIS))
423 dsc->show_stats |= 1ul << DSC_SEKURT;
425 /* Figure out which statistics to calculate. */
426 dsc->calc_stats = dsc->show_stats;
428 dsc->calc_stats |= (1ul << DSC_MEAN) | (1ul << DSC_STDDEV);
429 if (dsc->sort_by_stat >= 0)
430 dsc->calc_stats |= 1ul << dsc->sort_by_stat;
431 if (dsc->show_stats & (1ul << DSC_SESKEW))
432 dsc->calc_stats |= 1ul << DSC_SKEWNESS;
433 if (dsc->show_stats & (1ul << DSC_SEKURT))
434 dsc->calc_stats |= 1ul << DSC_KURTOSIS;
436 /* Figure out maximum moment needed and allocate moments for
438 dsc->max_moment = MOMENT_NONE;
439 for (i = 0; i < DSC_N_STATS; i++)
440 if (dsc->calc_stats & (1ul << i) && dsc_info[i].moment > dsc->max_moment)
441 dsc->max_moment = dsc_info[i].moment;
442 if (dsc->max_moment != MOMENT_NONE)
443 for (i = 0; i < dsc->var_cnt; i++)
444 dsc->vars[i].moments = moments_create (dsc->max_moment);
447 grouper = casegrouper_create_splits (proc_open_filtering (ds, false), dict);
448 while (casegrouper_get_next_group (grouper, &group))
449 calc_descriptives (dsc, group, ds);
450 ok = casegrouper_destroy (grouper);
451 ok = proc_commit (ds) && ok;
455 setup_z_trns (dsc, ds);
460 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
468 /* Returns the statistic named by the current token and skips past the token.
469 Returns DSC_NONE if no statistic is given (e.g., subcommand with no
470 specifiers). Emits an error if the current token ID does not name a
472 static enum dsc_statistic
473 match_statistic (struct lexer *lexer)
475 if (lex_token (lexer) == T_ID)
477 enum dsc_statistic stat;
479 for (stat = 0; stat < DSC_N_STATS; stat++)
480 if (lex_match_id (lexer, dsc_info[stat].identifier))
484 lex_error (lexer, _("expecting statistic name: reverting to default"));
492 free_dsc_proc (struct dsc_proc *dsc)
499 for (i = 0; i < dsc->var_cnt; i++)
501 struct dsc_var *dsc_var = &dsc->vars[i];
502 free (dsc_var->z_name);
503 moments_destroy (dsc_var->moments);
505 casewriter_destroy (dsc->z_writer);
512 /* Returns false if NAME is a duplicate of any existing variable name or
513 of any previously-declared z-var name; otherwise returns true. */
515 try_name (const struct dictionary *dict, struct dsc_proc *dsc,
520 if (dict_lookup_var (dict, name) != NULL)
522 for (i = 0; i < dsc->var_cnt; i++)
524 struct dsc_var *dsc_var = &dsc->vars[i];
525 if (dsc_var->z_name != NULL && !utf8_strcasecmp (dsc_var->z_name, name))
531 /* Generates a name for a Z-score variable based on a variable
532 named VAR_NAME, given that *Z_CNT generated variable names are
533 known to already exist. If successful, returns the new name
534 as a dynamically allocated string. On failure, returns NULL. */
536 generate_z_varname (const struct dictionary *dict, struct dsc_proc *dsc,
537 const char *var_name, int *z_cnt)
539 char *z_name, *trunc_name;
541 /* Try a name based on the original variable name. */
542 z_name = xasprintf ("Z%s", var_name);
543 trunc_name = utf8_encoding_trunc (z_name, dict_get_encoding (dict),
546 if (try_name (dict, dsc, trunc_name))
550 /* Generate a synthetic name. */
558 sprintf (name, "ZSC%03d", *z_cnt);
559 else if (*z_cnt <= 108)
560 sprintf (name, "STDZ%02d", *z_cnt - 99);
561 else if (*z_cnt <= 117)
562 sprintf (name, "ZZZZ%02d", *z_cnt - 108);
563 else if (*z_cnt <= 126)
564 sprintf (name, "ZQZQ%02d", *z_cnt - 117);
567 msg (SE, _("Ran out of generic names for Z-score variables. "
568 "There are only 126 generic names: ZSC001-ZSC0999, "
569 "STDZ01-STDZ09, ZZZZ01-ZZZZ09, ZQZQ01-ZQZQ09."));
573 if (try_name (dict, dsc, name))
574 return xstrdup (name);
579 /* Outputs a table describing the mapping between source
580 variables and Z-score variables. */
582 dump_z_table (struct dsc_proc *dsc)
590 for (i = 0; i < dsc->var_cnt; i++)
591 if (dsc->vars[i].z_name != NULL)
595 t = tab_create (2, cnt + 1);
596 tab_title (t, _("Mapping of variables to corresponding Z-scores."));
597 tab_headers (t, 0, 0, 1, 0);
598 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, cnt);
599 tab_hline (t, TAL_2, 0, 1, 1);
600 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Source"));
601 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Target"));
606 for (i = 0, y = 1; i < dsc->var_cnt; i++)
607 if (dsc->vars[i].z_name != NULL)
609 tab_text (t, 0, y, TAB_LEFT, var_to_string (dsc->vars[i].v));
610 tab_text (t, 1, y++, TAB_LEFT, dsc->vars[i].z_name);
618 descriptives_set_all_sysmis_zscores (const struct dsc_trns *t, struct ccase *c)
620 const struct dsc_z_score *z;
622 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
623 case_data_rw (c, z->z_var)->f = SYSMIS;
626 /* Transformation function to calculate Z-scores. Will return SYSMIS if any of
627 the following are true: 1) mean or standard deviation is SYSMIS 2) score is
628 SYSMIS 3) score is user missing and they were not included in the original
629 analyis. 4) any of the variables in the original analysis were missing
630 (either system or user-missing values that weren't included).
633 descriptives_trns_proc (void *trns_, struct ccase **c,
634 casenumber case_idx UNUSED)
636 struct dsc_trns *t = trns_;
637 struct dsc_z_score *z;
638 const struct variable **vars;
640 *c = case_unshare (*c);
644 double f = case_num (*c, t->filter);
645 if (f == 0.0 || var_is_num_missing (t->filter, f, MV_ANY))
647 descriptives_set_all_sysmis_zscores (t, *c);
648 return TRNS_CONTINUE;
654 struct ccase *z_case;
656 z_case = casereader_read (t->z_reader);
661 t->count = case_num_idx (z_case, z_idx++);
662 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
664 z->mean = case_num_idx (z_case, z_idx++);
665 z->std_dev = case_num_idx (z_case, z_idx++);
673 msg (SE, _("Internal error processing Z scores"));
676 descriptives_set_all_sysmis_zscores (t, *c);
677 return TRNS_CONTINUE;
682 if (t->missing_type == DSC_LISTWISE)
685 for (vars = t->vars; vars < t->vars + t->var_cnt; vars++)
687 double score = case_num (*c, *vars);
688 if (var_is_num_missing (*vars, score, t->exclude))
690 descriptives_set_all_sysmis_zscores (t, *c);
691 return TRNS_CONTINUE;
696 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
698 double input = case_num (*c, z->src_var);
699 double *output = &case_data_rw (*c, z->z_var)->f;
701 if (z->mean == SYSMIS || z->std_dev == SYSMIS
702 || var_is_num_missing (z->src_var, input, t->exclude))
705 *output = (input - z->mean) / z->std_dev;
707 return TRNS_CONTINUE;
710 /* Frees a descriptives_trns struct. */
712 descriptives_trns_free (void *trns_)
714 struct dsc_trns *t = trns_;
715 bool ok = t->ok && !casereader_error (t->z_reader);
718 casereader_destroy (t->z_reader);
719 assert((t->missing_type != DSC_LISTWISE) ^ (t->vars != NULL));
726 /* Sets up a transformation to calculate Z scores. */
728 setup_z_trns (struct dsc_proc *dsc, struct dataset *ds)
733 for (cnt = i = 0; i < dsc->var_cnt; i++)
734 if (dsc->vars[i].z_name != NULL)
737 t = xmalloc (sizeof *t);
738 t->z_scores = xnmalloc (cnt, sizeof *t->z_scores);
739 t->z_score_cnt = cnt;
740 t->missing_type = dsc->missing_type;
741 t->exclude = dsc->exclude;
742 if ( t->missing_type == DSC_LISTWISE )
744 t->var_cnt = dsc->var_cnt;
745 t->vars = xnmalloc (t->var_cnt, sizeof *t->vars);
746 for (i = 0; i < t->var_cnt; i++)
747 t->vars[i] = dsc->vars[i].v;
754 t->filter = dict_get_filter (dataset_dict (ds));
755 t->z_reader = casewriter_make_reader (dsc->z_writer);
758 dsc->z_writer = NULL;
760 for (cnt = i = 0; i < dsc->var_cnt; i++)
762 struct dsc_var *dv = &dsc->vars[i];
763 if (dv->z_name != NULL)
765 struct dsc_z_score *z;
766 struct variable *dst_var;
769 dst_var = dict_create_var_assert (dataset_dict (ds), dv->z_name, 0);
771 label = xasprintf (_("Z-score of %s"),var_to_string (dv->v));
772 var_set_label (dst_var, label);
775 z = &t->z_scores[cnt++];
781 add_transformation (ds,
782 descriptives_trns_proc, descriptives_trns_free, t);
785 /* Statistical calculation. */
787 static bool listwise_missing (struct dsc_proc *dsc, const struct ccase *c);
789 /* Calculates and displays descriptive statistics for the cases
792 calc_descriptives (struct dsc_proc *dsc, struct casereader *group,
795 struct variable *filter = dict_get_filter (dataset_dict (ds));
796 struct casereader *pass1, *pass2;
802 c = casereader_peek (group, 0);
805 casereader_destroy (group);
808 output_split_file_values (ds, c);
811 group = casereader_create_filter_weight (group, dataset_dict (ds),
815 pass2 = dsc->max_moment <= MOMENT_MEAN ? NULL : casereader_clone (pass1);
817 for (i = 0; i < dsc->var_cnt; i++)
819 struct dsc_var *dv = &dsc->vars[i];
821 dv->valid = dv->missing = 0.0;
822 if (dv->moments != NULL)
823 moments_clear (dv->moments);
827 dsc->missing_listwise = 0.;
830 /* First pass to handle most of the work. */
832 for (; (c = casereader_read (pass1)) != NULL; case_unref (c))
834 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
838 double f = case_num (c, filter);
839 if (f == 0.0 || var_is_num_missing (filter, f, MV_ANY))
843 /* Check for missing values. */
844 if (listwise_missing (dsc, c))
846 dsc->missing_listwise += weight;
847 if (dsc->missing_type == DSC_LISTWISE)
850 dsc->valid += weight;
852 for (i = 0; i < dsc->var_cnt; i++)
854 struct dsc_var *dv = &dsc->vars[i];
855 double x = case_num (c, dv->v);
857 if (var_is_num_missing (dv->v, x, dsc->exclude))
859 dv->missing += weight;
863 if (dv->moments != NULL)
864 moments_pass_one (dv->moments, x, weight);
874 if (!casereader_destroy (pass1))
876 casereader_destroy (pass2);
880 /* Second pass for higher-order moments. */
881 if (dsc->max_moment > MOMENT_MEAN)
883 for (; (c = casereader_read (pass2)) != NULL; case_unref (c))
885 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
889 double f = case_num (c, filter);
890 if (f == 0.0 || var_is_num_missing (filter, f, MV_ANY))
894 /* Check for missing values. */
895 if (dsc->missing_type == DSC_LISTWISE && listwise_missing (dsc, c))
898 for (i = 0; i < dsc->var_cnt; i++)
900 struct dsc_var *dv = &dsc->vars[i];
901 double x = case_num (c, dv->v);
903 if (var_is_num_missing (dv->v, x, dsc->exclude))
906 if (dv->moments != NULL)
907 moments_pass_two (dv->moments, x, weight);
910 if (!casereader_destroy (pass2))
914 /* Calculate results. */
915 if (dsc->z_writer && count > 0)
917 c = case_create (casewriter_get_proto (dsc->z_writer));
919 case_data_rw_idx (c, z_idx++)->f = count;
924 for (i = 0; i < dsc->var_cnt; i++)
926 struct dsc_var *dv = &dsc->vars[i];
930 for (j = 0; j < DSC_N_STATS; j++)
931 dv->stats[j] = SYSMIS;
933 dv->valid = W = dsc->valid - dv->missing;
935 if (dv->moments != NULL)
936 moments_calculate (dv->moments, NULL,
937 &dv->stats[DSC_MEAN], &dv->stats[DSC_VARIANCE],
938 &dv->stats[DSC_SKEWNESS], &dv->stats[DSC_KURTOSIS]);
939 if (dsc->calc_stats & (1ul << DSC_SEMEAN)
940 && dv->stats[DSC_VARIANCE] != SYSMIS && W > 0.)
941 dv->stats[DSC_SEMEAN] = sqrt (dv->stats[DSC_VARIANCE]) / sqrt (W);
942 if (dsc->calc_stats & (1ul << DSC_STDDEV)
943 && dv->stats[DSC_VARIANCE] != SYSMIS)
944 dv->stats[DSC_STDDEV] = sqrt (dv->stats[DSC_VARIANCE]);
945 if (dsc->calc_stats & (1ul << DSC_SEKURT))
946 if (dv->stats[DSC_KURTOSIS] != SYSMIS)
947 dv->stats[DSC_SEKURT] = calc_sekurt (W);
948 if (dsc->calc_stats & (1ul << DSC_SESKEW)
949 && dv->stats[DSC_SKEWNESS] != SYSMIS)
950 dv->stats[DSC_SESKEW] = calc_seskew (W);
951 dv->stats[DSC_RANGE] = ((dv->min == DBL_MAX || dv->max == -DBL_MAX)
952 ? SYSMIS : dv->max - dv->min);
953 dv->stats[DSC_MIN] = dv->min == DBL_MAX ? SYSMIS : dv->min;
954 dv->stats[DSC_MAX] = dv->max == -DBL_MAX ? SYSMIS : dv->max;
955 if (dsc->calc_stats & (1ul << DSC_SUM))
956 dv->stats[DSC_SUM] = W * dv->stats[DSC_MEAN];
958 if (dv->z_name && c != NULL)
960 case_data_rw_idx (c, z_idx++)->f = dv->stats[DSC_MEAN];
961 case_data_rw_idx (c, z_idx++)->f = dv->stats[DSC_STDDEV];
966 casewriter_write (dsc->z_writer, c);
968 /* Output results. */
972 /* Returns true if any of the descriptives variables in DSC's
973 variable list have missing values in case C, false otherwise. */
975 listwise_missing (struct dsc_proc *dsc, const struct ccase *c)
979 for (i = 0; i < dsc->var_cnt; i++)
981 struct dsc_var *dv = &dsc->vars[i];
982 double x = case_num (c, dv->v);
984 if (var_is_num_missing (dv->v, x, dsc->exclude))
990 /* Statistical display. */
992 static algo_compare_func descriptives_compare_dsc_vars;
994 /* Displays a table of descriptive statistics for DSC. */
996 display (struct dsc_proc *dsc)
1000 struct tab_table *t;
1002 nc = 1 + (dsc->format == DSC_SERIAL ? 2 : 1);
1003 for (i = 0; i < DSC_N_STATS; i++)
1004 if (dsc->show_stats & (1ul << i))
1007 if (dsc->sort_by_stat != DSC_NONE)
1008 sort (dsc->vars, dsc->var_cnt, sizeof *dsc->vars,
1009 descriptives_compare_dsc_vars, dsc);
1011 t = tab_create (nc, dsc->var_cnt + 1);
1012 tab_headers (t, 1, 0, 1, 0);
1013 tab_box (t, TAL_1, TAL_1, -1, -1, 0, 0, nc - 1, dsc->var_cnt);
1014 tab_box (t, -1, -1, -1, TAL_1, 1, 0, nc - 1, dsc->var_cnt);
1015 tab_hline (t, TAL_2, 0, nc - 1, 1);
1016 tab_vline (t, TAL_2, 1, 0, dsc->var_cnt);
1019 tab_text (t, nc++, 0, TAB_LEFT | TAT_TITLE, _("Variable"));
1020 if (dsc->format == DSC_SERIAL)
1022 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Valid N"));
1023 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Missing N"));
1026 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, "N");
1028 for (i = 0; i < DSC_N_STATS; i++)
1029 if (dsc->show_stats & (1ul << i))
1031 const char *title = gettext (dsc_info[i].name);
1032 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, title);
1035 for (i = 0; i < dsc->var_cnt; i++)
1037 struct dsc_var *dv = &dsc->vars[i];
1041 tab_text (t, nc++, i + 1, TAB_LEFT, var_to_string (dv->v));
1042 tab_text_format (t, nc++, i + 1, 0, "%.*g", DBL_DIG + 1, dv->valid);
1043 if (dsc->format == DSC_SERIAL)
1044 tab_text_format (t, nc++, i + 1, 0, "%.*g", DBL_DIG + 1, dv->missing);
1046 for (j = 0; j < DSC_N_STATS; j++)
1047 if (dsc->show_stats & (1ul << j))
1048 tab_double (t, nc++, i + 1, TAB_NONE, dv->stats[j], NULL, RC_OTHER);
1051 tab_title (t, _("Valid cases = %.*g; cases with missing value(s) = %.*g."),
1052 DBL_DIG + 1, dsc->valid,
1053 DBL_DIG + 1, dsc->missing_listwise);
1058 /* Compares `struct dsc_var's A and B according to the ordering
1059 specified by CMD. */
1061 descriptives_compare_dsc_vars (const void *a_, const void *b_, const void *dsc_)
1063 const struct dsc_var *a = a_;
1064 const struct dsc_var *b = b_;
1065 const struct dsc_proc *dsc = dsc_;
1069 if (dsc->sort_by_stat == DSC_NAME)
1070 result = utf8_strcasecmp (var_get_name (a->v), var_get_name (b->v));
1073 double as = a->stats[dsc->sort_by_stat];
1074 double bs = b->stats[dsc->sort_by_stat];
1076 result = as < bs ? -1 : as > bs;
1079 if (!dsc->sort_ascending)