1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 /* FIXME: Many possible optimizations. */
28 #include <data/case.h>
29 #include <data/casefile.h>
30 #include <data/dictionary.h>
31 #include <data/procedure.h>
32 #include <data/transformations.h>
33 #include <data/variable.h>
34 #include <language/command.h>
35 #include <language/dictionary/split-file.h>
36 #include <language/lexer/lexer.h>
37 #include <language/lexer/variable-parser.h>
38 #include <libpspp/alloc.h>
39 #include <libpspp/array.h>
40 #include <libpspp/compiler.h>
41 #include <libpspp/magic.h>
42 #include <libpspp/message.h>
43 #include <libpspp/assertion.h>
44 #include <math/moments.h>
45 #include <output/manager.h>
46 #include <output/table.h>
49 #define _(msgid) gettext (msgid)
50 #define N_(msgid) msgid
52 /* DESCRIPTIVES private data. */
56 /* Handling of missing values. */
59 DSC_VARIABLE, /* Handle missing values on a per-variable basis. */
60 DSC_LISTWISE /* Discard entire case if any variable is missing. */
63 /* Describes properties of a distribution for the purpose of
64 calculating a Z-score. */
67 struct variable *src_var; /* Variable on which z-score is based. */
68 struct variable *z_var; /* New z-score variable. */
69 double mean; /* Distribution mean. */
70 double std_dev; /* Distribution standard deviation. */
73 /* DESCRIPTIVES transformation (for calculating Z-scores). */
76 struct dsc_z_score *z_scores; /* Array of Z-scores. */
77 int z_score_cnt; /* Number of Z-scores. */
78 struct variable **vars; /* Variables for listwise missing checks. */
79 size_t var_cnt; /* Number of variables. */
80 enum dsc_missing_type missing_type; /* Treatment of missing values. */
81 int include_user_missing; /* Nonzero to include user-missing values. */
84 /* Statistics. Used as bit indexes, so must be 32 or fewer. */
87 DSC_MEAN = 0, DSC_SEMEAN, DSC_STDDEV, DSC_VARIANCE, DSC_KURTOSIS,
88 DSC_SEKURT, DSC_SKEWNESS, DSC_SESKEW, DSC_RANGE, DSC_MIN,
89 DSC_MAX, DSC_SUM, DSC_N_STATS,
91 /* Only valid as sort criteria. */
92 DSC_NAME = -2, /* Sort by name. */
93 DSC_NONE = -1 /* Unsorted. */
96 /* Describes one statistic. */
97 struct dsc_statistic_info
99 const char *identifier; /* Identifier. */
100 const char *name; /* Full name. */
101 enum moment moment; /* Highest moment needed to calculate. */
104 /* Table of statistics, indexed by DSC_*. */
105 static const struct dsc_statistic_info dsc_info[DSC_N_STATS] =
107 {"MEAN", N_("Mean"), MOMENT_MEAN},
108 {"SEMEAN", N_("S E Mean"), MOMENT_VARIANCE},
109 {"STDDEV", N_("Std Dev"), MOMENT_VARIANCE},
110 {"VARIANCE", N_("Variance"), MOMENT_VARIANCE},
111 {"KURTOSIS", N_("Kurtosis"), MOMENT_KURTOSIS},
112 {"SEKURTOSIS", N_("S E Kurt"), MOMENT_NONE},
113 {"SKEWNESS", N_("Skewness"), MOMENT_SKEWNESS},
114 {"SESKEWNESS", N_("S E Skew"), MOMENT_NONE},
115 {"RANGE", N_("Range"), MOMENT_NONE},
116 {"MINIMUM", N_("Minimum"), MOMENT_NONE},
117 {"MAXIMUM", N_("Maximum"), MOMENT_NONE},
118 {"SUM", N_("Sum"), MOMENT_MEAN},
121 /* Statistics calculated by default if none are explicitly
123 #define DEFAULT_STATS \
124 ((1ul << DSC_MEAN) | (1ul << DSC_STDDEV) | (1ul << DSC_MIN) \
127 /* A variable specified on DESCRIPTIVES. */
130 struct variable *v; /* Variable to calculate on. */
131 char z_name[LONG_NAME_LEN + 1]; /* Name for z-score variable. */
132 double valid, missing; /* Valid, missing counts. */
133 struct moments *moments; /* Moments. */
134 double min, max; /* Maximum and mimimum values. */
135 double stats[DSC_N_STATS]; /* All the stats' values. */
141 DSC_LINE, /* Abbreviated format. */
142 DSC_SERIAL /* Long format. */
145 /* A DESCRIPTIVES procedure. */
148 /* Per-variable info. */
149 struct dsc_var *vars; /* Variables. */
150 size_t var_cnt; /* Number of variables. */
153 enum dsc_missing_type missing_type; /* Treatment of missing values. */
154 int include_user_missing; /* Nonzero to include user-missing values. */
155 int show_var_labels; /* Nonzero to show variable labels. */
156 int show_index; /* Nonzero to show variable index. */
157 enum dsc_format format; /* Output format. */
159 /* Accumulated results. */
160 double missing_listwise; /* Sum of weights of cases missing listwise. */
161 double valid; /* Sum of weights of valid cases. */
162 bool bad_warn; /* Warn if bad weight found. */
163 enum dsc_statistic sort_by_stat; /* Statistic to sort by; -1: name. */
164 int sort_ascending; /* !0: ascending order; 0: descending. */
165 unsigned long show_stats; /* Statistics to display. */
166 unsigned long calc_stats; /* Statistics to calculate. */
167 enum moment max_moment; /* Highest moment needed for stats. */
171 static enum dsc_statistic match_statistic (struct lexer *);
172 static void free_dsc_proc (struct dsc_proc *);
174 /* Z-score functions. */
175 static bool try_name (const struct dictionary *dict,
176 struct dsc_proc *dsc, const char *name);
177 static bool generate_z_varname (const struct dictionary *dict,
178 struct dsc_proc *dsc, char *z_name,
179 const char *name, size_t *z_cnt);
180 static void dump_z_table (struct dsc_proc *);
181 static void setup_z_trns (struct dsc_proc *, struct dataset *);
183 /* Procedure execution functions. */
184 static bool calc_descriptives (const struct ccase *first,
185 const struct casefile *, void *dsc_,
186 const struct dataset *);
187 static void display (struct dsc_proc *dsc);
189 /* Parser and outline. */
191 /* Handles DESCRIPTIVES. */
193 cmd_descriptives (struct lexer *lexer, struct dataset *ds)
195 struct dictionary *dict = dataset_dict (ds);
196 struct dsc_proc *dsc;
197 struct variable **vars = NULL;
199 int save_z_scores = 0;
204 /* Create and initialize dsc. */
205 dsc = xmalloc (sizeof *dsc);
208 dsc->missing_type = DSC_VARIABLE;
209 dsc->include_user_missing = 0;
210 dsc->show_var_labels = 1;
212 dsc->format = DSC_LINE;
213 dsc->missing_listwise = 0.;
216 dsc->sort_by_stat = DSC_NONE;
217 dsc->sort_ascending = 1;
218 dsc->show_stats = dsc->calc_stats = DEFAULT_STATS;
220 /* Parse DESCRIPTIVES. */
221 while (lex_token (lexer) != '.')
223 if (lex_match_id (lexer, "MISSING"))
225 lex_match (lexer, '=');
226 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
228 if (lex_match_id (lexer, "VARIABLE"))
229 dsc->missing_type = DSC_VARIABLE;
230 else if (lex_match_id (lexer, "LISTWISE"))
231 dsc->missing_type = DSC_LISTWISE;
232 else if (lex_match_id (lexer, "INCLUDE"))
233 dsc->include_user_missing = 1;
236 lex_error (lexer, NULL);
239 lex_match (lexer, ',');
242 else if (lex_match_id (lexer, "SAVE"))
244 else if (lex_match_id (lexer, "FORMAT"))
246 lex_match (lexer, '=');
247 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
249 if (lex_match_id (lexer, "LABELS"))
250 dsc->show_var_labels = 1;
251 else if (lex_match_id (lexer, "NOLABELS"))
252 dsc->show_var_labels = 0;
253 else if (lex_match_id (lexer, "INDEX"))
255 else if (lex_match_id (lexer, "NOINDEX"))
257 else if (lex_match_id (lexer, "LINE"))
258 dsc->format = DSC_LINE;
259 else if (lex_match_id (lexer, "SERIAL"))
260 dsc->format = DSC_SERIAL;
263 lex_error (lexer, NULL);
266 lex_match (lexer, ',');
269 else if (lex_match_id (lexer, "STATISTICS"))
271 lex_match (lexer, '=');
273 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
275 if (lex_match (lexer, T_ALL))
276 dsc->show_stats |= (1ul << DSC_N_STATS) - 1;
277 else if (lex_match_id (lexer, "DEFAULT"))
278 dsc->show_stats |= DEFAULT_STATS;
280 dsc->show_stats |= 1ul << (match_statistic (lexer));
281 lex_match (lexer, ',');
283 if (dsc->show_stats == 0)
284 dsc->show_stats = DEFAULT_STATS;
286 else if (lex_match_id (lexer, "SORT"))
288 lex_match (lexer, '=');
289 if (lex_match_id (lexer, "NAME"))
290 dsc->sort_by_stat = DSC_NAME;
293 dsc->sort_by_stat = match_statistic (lexer);
294 if (dsc->sort_by_stat == DSC_NONE )
295 dsc->sort_by_stat = DSC_MEAN;
297 if (lex_match (lexer, '('))
299 if (lex_match_id (lexer, "A"))
300 dsc->sort_ascending = 1;
301 else if (lex_match_id (lexer, "D"))
302 dsc->sort_ascending = 0;
304 lex_error (lexer, NULL);
305 lex_force_match (lexer, ')');
308 else if (var_cnt == 0)
310 if (lex_look_ahead (lexer) == '=')
312 lex_match_id (lexer, "VARIABLES");
313 lex_match (lexer, '=');
316 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
320 if (!parse_variables (lexer, dataset_dict (ds), &vars, &var_cnt,
321 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
324 dsc->vars = xnrealloc (dsc->vars, var_cnt, sizeof *dsc->vars);
325 for (i = dsc->var_cnt; i < var_cnt; i++)
327 struct dsc_var *dv = &dsc->vars[i];
329 dv->z_name[0] = '\0';
332 dsc->var_cnt = var_cnt;
334 if (lex_match (lexer, '('))
336 if (lex_token (lexer) != T_ID)
338 lex_error (lexer, NULL);
341 if (try_name (dict, dsc, lex_tokid (lexer)))
343 strcpy (dsc->vars[dsc->var_cnt - 1].z_name, lex_tokid (lexer));
347 msg (SE, _("Z-score variable name %s would be"
348 " a duplicate variable name."), lex_tokid (lexer));
350 if (!lex_force_match (lexer, ')'))
357 lex_error (lexer, NULL);
361 lex_match (lexer, '/');
365 msg (SE, _("No variables specified."));
369 /* Construct z-score varnames, show translation table. */
370 if (z_cnt || save_z_scores)
376 for (i = 0; i < dsc->var_cnt; i++)
377 if (dsc->vars[i].z_name[0] == 0)
379 if (!generate_z_varname (dict, dsc, dsc->vars[i].z_name,
380 var_get_name (dsc->vars[i].v),
389 /* Figure out statistics to display. */
390 if (dsc->show_stats & (1ul << DSC_SKEWNESS))
391 dsc->show_stats |= 1ul << DSC_SESKEW;
392 if (dsc->show_stats & (1ul << DSC_KURTOSIS))
393 dsc->show_stats |= 1ul << DSC_SEKURT;
395 /* Figure out which statistics to calculate. */
396 dsc->calc_stats = dsc->show_stats;
398 dsc->calc_stats |= (1ul << DSC_MEAN) | (1ul << DSC_STDDEV);
399 if (dsc->sort_by_stat >= 0)
400 dsc->calc_stats |= 1ul << dsc->sort_by_stat;
401 if (dsc->show_stats & (1ul << DSC_SESKEW))
402 dsc->calc_stats |= 1ul << DSC_SKEWNESS;
403 if (dsc->show_stats & (1ul << DSC_SEKURT))
404 dsc->calc_stats |= 1ul << DSC_KURTOSIS;
406 /* Figure out maximum moment needed and allocate moments for
408 dsc->max_moment = MOMENT_NONE;
409 for (i = 0; i < DSC_N_STATS; i++)
410 if (dsc->calc_stats & (1ul << i) && dsc_info[i].moment > dsc->max_moment)
411 dsc->max_moment = dsc_info[i].moment;
412 if (dsc->max_moment != MOMENT_NONE)
413 for (i = 0; i < dsc->var_cnt; i++)
414 dsc->vars[i].moments = moments_create (dsc->max_moment);
417 ok = multipass_procedure_with_splits (ds, calc_descriptives, dsc);
421 setup_z_trns (dsc, ds);
426 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
434 /* Returns the statistic named by the current token and skips past the token.
435 Returns DSC_NONE if no statistic is given (e.g., subcommand with no
436 specifiers). Emits an error if the current token ID does not name a
438 static enum dsc_statistic
439 match_statistic (struct lexer *lexer)
441 if (lex_token (lexer) == T_ID)
443 enum dsc_statistic stat;
445 for (stat = 0; stat < DSC_N_STATS; stat++)
446 if (lex_match_id (lexer, dsc_info[stat].identifier))
450 lex_error (lexer, _("expecting statistic name: reverting to default"));
458 free_dsc_proc (struct dsc_proc *dsc)
465 for (i = 0; i < dsc->var_cnt; i++)
466 moments_destroy (dsc->vars[i].moments);
473 /* Returns false if NAME is a duplicate of any existing variable name or
474 of any previously-declared z-var name; otherwise returns true. */
476 try_name (const struct dictionary *dict, struct dsc_proc *dsc,
481 if (dict_lookup_var (dict, name) != NULL)
483 for (i = 0; i < dsc->var_cnt; i++)
484 if (!strcasecmp (dsc->vars[i].z_name, name))
489 /* Generates a name for a Z-score variable based on a variable
490 named VAR_NAME, given that *Z_CNT generated variable names are
491 known to already exist. If successful, returns true and
492 copies the new name into Z_NAME. On failure, returns false. */
494 generate_z_varname (const struct dictionary *dict, struct dsc_proc *dsc, char *z_name,
495 const char *var_name, size_t *z_cnt)
497 char name[LONG_NAME_LEN + 1];
499 /* Try a name based on the original variable name. */
501 str_copy_trunc (name + 1, sizeof name - 1, var_name);
502 if (try_name (dict, dsc, name))
504 strcpy (z_name, name);
508 /* Generate a synthetic name. */
514 sprintf (name, "ZSC%03d", *z_cnt);
515 else if (*z_cnt <= 108)
516 sprintf (name, "STDZ%02d", *z_cnt - 99);
517 else if (*z_cnt <= 117)
518 sprintf (name, "ZZZZ%02d", *z_cnt - 108);
519 else if (*z_cnt <= 126)
520 sprintf (name, "ZQZQ%02d", *z_cnt - 117);
523 msg (SE, _("Ran out of generic names for Z-score variables. "
524 "There are only 126 generic names: ZSC001-ZSC0999, "
525 "STDZ01-STDZ09, ZZZZ01-ZZZZ09, ZQZQ01-ZQZQ09."));
529 if (try_name (dict, dsc, name))
531 strcpy (z_name, name);
538 /* Outputs a table describing the mapping between source
539 variables and Z-score variables. */
541 dump_z_table (struct dsc_proc *dsc)
549 for (i = 0; i < dsc->var_cnt; i++)
550 if (dsc->vars[i].z_name[0] != '\0')
554 t = tab_create (2, cnt + 1, 0);
555 tab_title (t, _("Mapping of variables to corresponding Z-scores."));
556 tab_columns (t, SOM_COL_DOWN, 1);
557 tab_headers (t, 0, 0, 1, 0);
558 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, cnt);
559 tab_hline (t, TAL_2, 0, 1, 1);
560 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Source"));
561 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Target"));
562 tab_dim (t, tab_natural_dimensions);
567 for (i = 0, y = 1; i < dsc->var_cnt; i++)
568 if (dsc->vars[i].z_name[0] != '\0')
570 tab_text (t, 0, y, TAB_LEFT, var_get_name (dsc->vars[i].v));
571 tab_text (t, 1, y++, TAB_LEFT, dsc->vars[i].z_name);
578 /* Transformation function to calculate Z-scores. Will return SYSMIS if any of
579 the following are true: 1) mean or standard deviation is SYSMIS 2) score is
580 SYSMIS 3) score is user missing and they were not included in the original
581 analyis. 4) any of the variables in the original analysis were missing
582 (either system or user-missing values that weren't included).
585 descriptives_trns_proc (void *trns_, struct ccase * c,
586 casenumber case_idx UNUSED)
588 struct dsc_trns *t = trns_;
589 struct dsc_z_score *z;
590 struct variable **vars;
593 if (t->missing_type == DSC_LISTWISE)
596 for (vars = t->vars; vars < t->vars + t->var_cnt; vars++)
598 double score = case_num (c, *vars);
600 || (!t->include_user_missing
601 && var_is_num_user_missing (*vars, score)))
609 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
611 double input = case_num (c, z->src_var);
612 double *output = &case_data_rw (c, z->z_var)->f;
614 if (z->mean == SYSMIS || z->std_dev == SYSMIS
615 || all_sysmis || input == SYSMIS
616 || (!t->include_user_missing
617 && var_is_num_user_missing (z->src_var, input)))
620 *output = (input - z->mean) / z->std_dev;
622 return TRNS_CONTINUE;
625 /* Frees a descriptives_trns struct. */
627 descriptives_trns_free (void *trns_)
629 struct dsc_trns *t = trns_;
632 assert((t->missing_type != DSC_LISTWISE) ^ (t->vars != NULL));
637 /* Sets up a transformation to calculate Z scores. */
639 setup_z_trns (struct dsc_proc *dsc, struct dataset *ds)
644 for (cnt = i = 0; i < dsc->var_cnt; i++)
645 if (dsc->vars[i].z_name[0] != '\0')
648 t = xmalloc (sizeof *t);
649 t->z_scores = xnmalloc (cnt, sizeof *t->z_scores);
650 t->z_score_cnt = cnt;
651 t->missing_type = dsc->missing_type;
652 t->include_user_missing = dsc->include_user_missing;
653 if ( t->missing_type == DSC_LISTWISE )
655 t->var_cnt = dsc->var_cnt;
656 t->vars = xnmalloc (t->var_cnt, sizeof *t->vars);
657 for (i = 0; i < t->var_cnt; i++)
658 t->vars[i] = dsc->vars[i].v;
666 for (cnt = i = 0; i < dsc->var_cnt; i++)
668 struct dsc_var *dv = &dsc->vars[i];
669 if (dv->z_name[0] != '\0')
671 struct dsc_z_score *z;
672 struct variable *dst_var;
674 dst_var = dict_create_var_assert (dataset_dict (ds), dv->z_name, 0);
675 var_set_label (dst_var, xasprintf (_("Z-score of %s"),
676 var_to_string (dv->v)));
678 z = &t->z_scores[cnt++];
681 z->mean = dv->stats[DSC_MEAN];
682 z->std_dev = dv->stats[DSC_STDDEV];
686 add_transformation (ds,
687 descriptives_trns_proc, descriptives_trns_free, t);
690 /* Statistical calculation. */
692 static bool listwise_missing (struct dsc_proc *dsc, const struct ccase *c);
694 /* Calculates and displays descriptive statistics for the cases
697 calc_descriptives (const struct ccase *first,
698 const struct casefile *cf, void *dsc_,
699 const struct dataset *ds)
701 struct dsc_proc *dsc = dsc_;
702 struct casereader *reader;
706 output_split_file_values (ds, first);
708 for (i = 0; i < dsc->var_cnt; i++)
710 struct dsc_var *dv = &dsc->vars[i];
712 dv->valid = dv->missing = 0.0;
713 if (dv->moments != NULL)
714 moments_clear (dv->moments);
718 dsc->missing_listwise = 0.;
721 /* First pass to handle most of the work. */
722 for (reader = casefile_get_reader (cf, NULL);
723 casereader_read (reader, &c);
726 double weight = dict_get_case_weight (dataset_dict (ds), &c, &dsc->bad_warn);
730 /* Check for missing values. */
731 if (listwise_missing (dsc, &c))
733 dsc->missing_listwise += weight;
734 if (dsc->missing_type == DSC_LISTWISE)
737 dsc->valid += weight;
739 for (i = 0; i < dsc->var_cnt; i++)
741 struct dsc_var *dv = &dsc->vars[i];
742 double x = case_num (&c, dv->v);
744 if (dsc->missing_type != DSC_LISTWISE
746 || (!dsc->include_user_missing
747 && var_is_num_user_missing (dv->v, x))))
749 dv->missing += weight;
753 if (dv->moments != NULL)
754 moments_pass_one (dv->moments, x, weight);
762 casereader_destroy (reader);
764 /* Second pass for higher-order moments. */
765 if (dsc->max_moment > MOMENT_MEAN)
767 for (reader = casefile_get_reader (cf, NULL);
768 casereader_read (reader, &c);
771 double weight = dict_get_case_weight (dataset_dict (ds), &c,
776 /* Check for missing values. */
777 if (listwise_missing (dsc, &c)
778 && dsc->missing_type == DSC_LISTWISE)
781 for (i = 0; i < dsc->var_cnt; i++)
783 struct dsc_var *dv = &dsc->vars[i];
784 double x = case_num (&c, dv->v);
786 if (dsc->missing_type != DSC_LISTWISE
788 || (!dsc->include_user_missing
789 && var_is_num_user_missing (dv->v, x))))
792 if (dv->moments != NULL)
793 moments_pass_two (dv->moments, x, weight);
796 casereader_destroy (reader);
799 /* Calculate results. */
800 for (i = 0; i < dsc->var_cnt; i++)
802 struct dsc_var *dv = &dsc->vars[i];
806 for (j = 0; j < DSC_N_STATS; j++)
807 dv->stats[j] = SYSMIS;
809 dv->valid = W = dsc->valid - dv->missing;
811 if (dv->moments != NULL)
812 moments_calculate (dv->moments, NULL,
813 &dv->stats[DSC_MEAN], &dv->stats[DSC_VARIANCE],
814 &dv->stats[DSC_SKEWNESS], &dv->stats[DSC_KURTOSIS]);
815 if (dsc->calc_stats & (1ul << DSC_SEMEAN)
816 && dv->stats[DSC_VARIANCE] != SYSMIS && W > 0.)
817 dv->stats[DSC_SEMEAN] = sqrt (dv->stats[DSC_VARIANCE]) / sqrt (W);
818 if (dsc->calc_stats & (1ul << DSC_STDDEV)
819 && dv->stats[DSC_VARIANCE] != SYSMIS)
820 dv->stats[DSC_STDDEV] = sqrt (dv->stats[DSC_VARIANCE]);
821 if (dsc->calc_stats & (1ul << DSC_SEKURT))
822 if (dv->stats[DSC_KURTOSIS] != SYSMIS)
823 dv->stats[DSC_SEKURT] = calc_sekurt (W);
824 if (dsc->calc_stats & (1ul << DSC_SESKEW)
825 && dv->stats[DSC_SKEWNESS] != SYSMIS)
826 dv->stats[DSC_SESKEW] = calc_seskew (W);
827 dv->stats[DSC_RANGE] = ((dv->min == DBL_MAX || dv->max == -DBL_MAX)
828 ? SYSMIS : dv->max - dv->min);
829 dv->stats[DSC_MIN] = dv->min == DBL_MAX ? SYSMIS : dv->min;
830 dv->stats[DSC_MAX] = dv->max == -DBL_MAX ? SYSMIS : dv->max;
831 if (dsc->calc_stats & (1ul << DSC_SUM))
832 dv->stats[DSC_SUM] = W * dv->stats[DSC_MEAN];
835 /* Output results. */
841 /* Returns true if any of the descriptives variables in DSC's
842 variable list have missing values in case C, false otherwise. */
844 listwise_missing (struct dsc_proc *dsc, const struct ccase *c)
848 for (i = 0; i < dsc->var_cnt; i++)
850 struct dsc_var *dv = &dsc->vars[i];
851 double x = case_num (c, dv->v);
854 || (!dsc->include_user_missing
855 && var_is_num_user_missing (dv->v, x)))
861 /* Statistical display. */
863 static algo_compare_func descriptives_compare_dsc_vars;
865 /* Displays a table of descriptive statistics for DSC. */
867 display (struct dsc_proc *dsc)
873 nc = 1 + (dsc->format == DSC_SERIAL ? 2 : 1);
874 for (i = 0; i < DSC_N_STATS; i++)
875 if (dsc->show_stats & (1ul << i))
878 if (dsc->sort_by_stat != DSC_NONE)
879 sort (dsc->vars, dsc->var_cnt, sizeof *dsc->vars,
880 descriptives_compare_dsc_vars, dsc);
882 t = tab_create (nc, dsc->var_cnt + 1, 0);
883 tab_headers (t, 1, 0, 1, 0);
884 tab_box (t, TAL_1, TAL_1, -1, -1, 0, 0, nc - 1, dsc->var_cnt);
885 tab_box (t, -1, -1, -1, TAL_1, 1, 0, nc - 1, dsc->var_cnt);
886 tab_hline (t, TAL_2, 0, nc - 1, 1);
887 tab_vline (t, TAL_2, 1, 0, dsc->var_cnt);
888 tab_dim (t, tab_natural_dimensions);
891 tab_text (t, nc++, 0, TAB_LEFT | TAT_TITLE, _("Variable"));
892 if (dsc->format == DSC_SERIAL)
894 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Valid N"));
895 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Missing N"));
898 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, "N");
900 for (i = 0; i < DSC_N_STATS; i++)
901 if (dsc->show_stats & (1ul << i))
903 const char *title = gettext (dsc_info[i].name);
904 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, title);
907 for (i = 0; i < dsc->var_cnt; i++)
909 struct dsc_var *dv = &dsc->vars[i];
913 tab_text (t, nc++, i + 1, TAB_LEFT, var_get_name (dv->v));
914 tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", dv->valid);
915 if (dsc->format == DSC_SERIAL)
916 tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", dv->missing);
917 for (j = 0; j < DSC_N_STATS; j++)
918 if (dsc->show_stats & (1ul << j))
919 tab_float (t, nc++, i + 1, TAB_NONE, dv->stats[j], 10, 3);
922 tab_title (t, _("Valid cases = %g; cases with missing value(s) = %g."),
923 dsc->valid, dsc->missing_listwise);
928 /* Compares `struct dsc_var's A and B according to the ordering
931 descriptives_compare_dsc_vars (const void *a_, const void *b_, const void *dsc_)
933 const struct dsc_var *a = a_;
934 const struct dsc_var *b = b_;
935 const struct dsc_proc *dsc = dsc_;
939 if (dsc->sort_by_stat == DSC_NAME)
940 result = strcasecmp (var_get_name (a->v), var_get_name (b->v));
943 double as = a->stats[dsc->sort_by_stat];
944 double bs = b->stats[dsc->sort_by_stat];
946 result = as < bs ? -1 : as > bs;
949 if (!dsc->sort_ascending)