1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 This program is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 /* FIXME: Many possible optimizations. */
27 #include <data/case.h>
28 #include <data/casefile.h>
29 #include <data/dictionary.h>
30 #include <data/procedure.h>
31 #include <data/transformations.h>
32 #include <data/variable.h>
33 #include <language/command.h>
34 #include <language/dictionary/split-file.h>
35 #include <language/lexer/lexer.h>
36 #include <language/lexer/variable-parser.h>
37 #include <libpspp/alloc.h>
38 #include <libpspp/array.h>
39 #include <libpspp/compiler.h>
40 #include <libpspp/magic.h>
41 #include <libpspp/message.h>
42 #include <libpspp/assertion.h>
43 #include <math/moments.h>
44 #include <output/manager.h>
45 #include <output/table.h>
48 #define _(msgid) gettext (msgid)
49 #define N_(msgid) msgid
51 /* DESCRIPTIVES private data. */
55 /* Handling of missing values. */
58 DSC_VARIABLE, /* Handle missing values on a per-variable basis. */
59 DSC_LISTWISE /* Discard entire case if any variable is missing. */
62 /* Describes properties of a distribution for the purpose of
63 calculating a Z-score. */
66 struct variable *src_var; /* Variable on which z-score is based. */
67 struct variable *z_var; /* New z-score variable. */
68 double mean; /* Distribution mean. */
69 double std_dev; /* Distribution standard deviation. */
72 /* DESCRIPTIVES transformation (for calculating Z-scores). */
75 struct dsc_z_score *z_scores; /* Array of Z-scores. */
76 int z_score_cnt; /* Number of Z-scores. */
77 struct variable **vars; /* Variables for listwise missing checks. */
78 size_t var_cnt; /* Number of variables. */
79 enum dsc_missing_type missing_type; /* Treatment of missing values. */
80 enum mv_class exclude; /* Classes of missing values to exclude. */
83 /* Statistics. Used as bit indexes, so must be 32 or fewer. */
86 DSC_MEAN = 0, DSC_SEMEAN, DSC_STDDEV, DSC_VARIANCE, DSC_KURTOSIS,
87 DSC_SEKURT, DSC_SKEWNESS, DSC_SESKEW, DSC_RANGE, DSC_MIN,
88 DSC_MAX, DSC_SUM, DSC_N_STATS,
90 /* Only valid as sort criteria. */
91 DSC_NAME = -2, /* Sort by name. */
92 DSC_NONE = -1 /* Unsorted. */
95 /* Describes one statistic. */
96 struct dsc_statistic_info
98 const char *identifier; /* Identifier. */
99 const char *name; /* Full name. */
100 enum moment moment; /* Highest moment needed to calculate. */
103 /* Table of statistics, indexed by DSC_*. */
104 static const struct dsc_statistic_info dsc_info[DSC_N_STATS] =
106 {"MEAN", N_("Mean"), MOMENT_MEAN},
107 {"SEMEAN", N_("S E Mean"), MOMENT_VARIANCE},
108 {"STDDEV", N_("Std Dev"), MOMENT_VARIANCE},
109 {"VARIANCE", N_("Variance"), MOMENT_VARIANCE},
110 {"KURTOSIS", N_("Kurtosis"), MOMENT_KURTOSIS},
111 {"SEKURTOSIS", N_("S E Kurt"), MOMENT_NONE},
112 {"SKEWNESS", N_("Skewness"), MOMENT_SKEWNESS},
113 {"SESKEWNESS", N_("S E Skew"), MOMENT_NONE},
114 {"RANGE", N_("Range"), MOMENT_NONE},
115 {"MINIMUM", N_("Minimum"), MOMENT_NONE},
116 {"MAXIMUM", N_("Maximum"), MOMENT_NONE},
117 {"SUM", N_("Sum"), MOMENT_MEAN},
120 /* Statistics calculated by default if none are explicitly
122 #define DEFAULT_STATS \
123 ((1ul << DSC_MEAN) | (1ul << DSC_STDDEV) | (1ul << DSC_MIN) \
126 /* A variable specified on DESCRIPTIVES. */
129 struct variable *v; /* Variable to calculate on. */
130 char z_name[LONG_NAME_LEN + 1]; /* Name for z-score variable. */
131 double valid, missing; /* Valid, missing counts. */
132 struct moments *moments; /* Moments. */
133 double min, max; /* Maximum and mimimum values. */
134 double stats[DSC_N_STATS]; /* All the stats' values. */
140 DSC_LINE, /* Abbreviated format. */
141 DSC_SERIAL /* Long format. */
144 /* A DESCRIPTIVES procedure. */
147 /* Per-variable info. */
148 struct dsc_var *vars; /* Variables. */
149 size_t var_cnt; /* Number of variables. */
152 enum dsc_missing_type missing_type; /* Treatment of missing values. */
153 enum mv_class exclude; /* Classes of missing values to exclude. */
154 int show_var_labels; /* Nonzero to show variable labels. */
155 int show_index; /* Nonzero to show variable index. */
156 enum dsc_format format; /* Output format. */
158 /* Accumulated results. */
159 double missing_listwise; /* Sum of weights of cases missing listwise. */
160 double valid; /* Sum of weights of valid cases. */
161 bool bad_warn; /* Warn if bad weight found. */
162 enum dsc_statistic sort_by_stat; /* Statistic to sort by; -1: name. */
163 int sort_ascending; /* !0: ascending order; 0: descending. */
164 unsigned long show_stats; /* Statistics to display. */
165 unsigned long calc_stats; /* Statistics to calculate. */
166 enum moment max_moment; /* Highest moment needed for stats. */
170 static enum dsc_statistic match_statistic (struct lexer *);
171 static void free_dsc_proc (struct dsc_proc *);
173 /* Z-score functions. */
174 static bool try_name (const struct dictionary *dict,
175 struct dsc_proc *dsc, const char *name);
176 static bool generate_z_varname (const struct dictionary *dict,
177 struct dsc_proc *dsc, char *z_name,
178 const char *name, size_t *z_cnt);
179 static void dump_z_table (struct dsc_proc *);
180 static void setup_z_trns (struct dsc_proc *, struct dataset *);
182 /* Procedure execution functions. */
183 static bool calc_descriptives (const struct ccase *first,
184 const struct casefile *, void *dsc_,
185 const struct dataset *);
186 static void display (struct dsc_proc *dsc);
188 /* Parser and outline. */
190 /* Handles DESCRIPTIVES. */
192 cmd_descriptives (struct lexer *lexer, struct dataset *ds)
194 struct dictionary *dict = dataset_dict (ds);
195 struct dsc_proc *dsc;
196 struct variable **vars = NULL;
198 int save_z_scores = 0;
203 /* Create and initialize dsc. */
204 dsc = xmalloc (sizeof *dsc);
207 dsc->missing_type = DSC_VARIABLE;
208 dsc->exclude = MV_ANY;
209 dsc->show_var_labels = 1;
211 dsc->format = DSC_LINE;
212 dsc->missing_listwise = 0.;
215 dsc->sort_by_stat = DSC_NONE;
216 dsc->sort_ascending = 1;
217 dsc->show_stats = dsc->calc_stats = DEFAULT_STATS;
219 /* Parse DESCRIPTIVES. */
220 while (lex_token (lexer) != '.')
222 if (lex_match_id (lexer, "MISSING"))
224 lex_match (lexer, '=');
225 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
227 if (lex_match_id (lexer, "VARIABLE"))
228 dsc->missing_type = DSC_VARIABLE;
229 else if (lex_match_id (lexer, "LISTWISE"))
230 dsc->missing_type = DSC_LISTWISE;
231 else if (lex_match_id (lexer, "INCLUDE"))
232 dsc->exclude = MV_SYSTEM;
235 lex_error (lexer, NULL);
238 lex_match (lexer, ',');
241 else if (lex_match_id (lexer, "SAVE"))
243 else if (lex_match_id (lexer, "FORMAT"))
245 lex_match (lexer, '=');
246 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
248 if (lex_match_id (lexer, "LABELS"))
249 dsc->show_var_labels = 1;
250 else if (lex_match_id (lexer, "NOLABELS"))
251 dsc->show_var_labels = 0;
252 else if (lex_match_id (lexer, "INDEX"))
254 else if (lex_match_id (lexer, "NOINDEX"))
256 else if (lex_match_id (lexer, "LINE"))
257 dsc->format = DSC_LINE;
258 else if (lex_match_id (lexer, "SERIAL"))
259 dsc->format = DSC_SERIAL;
262 lex_error (lexer, NULL);
265 lex_match (lexer, ',');
268 else if (lex_match_id (lexer, "STATISTICS"))
270 lex_match (lexer, '=');
272 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
274 if (lex_match (lexer, T_ALL))
275 dsc->show_stats |= (1ul << DSC_N_STATS) - 1;
276 else if (lex_match_id (lexer, "DEFAULT"))
277 dsc->show_stats |= DEFAULT_STATS;
279 dsc->show_stats |= 1ul << (match_statistic (lexer));
280 lex_match (lexer, ',');
282 if (dsc->show_stats == 0)
283 dsc->show_stats = DEFAULT_STATS;
285 else if (lex_match_id (lexer, "SORT"))
287 lex_match (lexer, '=');
288 if (lex_match_id (lexer, "NAME"))
289 dsc->sort_by_stat = DSC_NAME;
292 dsc->sort_by_stat = match_statistic (lexer);
293 if (dsc->sort_by_stat == DSC_NONE )
294 dsc->sort_by_stat = DSC_MEAN;
296 if (lex_match (lexer, '('))
298 if (lex_match_id (lexer, "A"))
299 dsc->sort_ascending = 1;
300 else if (lex_match_id (lexer, "D"))
301 dsc->sort_ascending = 0;
303 lex_error (lexer, NULL);
304 lex_force_match (lexer, ')');
307 else if (var_cnt == 0)
309 if (lex_look_ahead (lexer) == '=')
311 lex_match_id (lexer, "VARIABLES");
312 lex_match (lexer, '=');
315 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
319 if (!parse_variables (lexer, dataset_dict (ds), &vars, &var_cnt,
320 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
323 dsc->vars = xnrealloc (dsc->vars, var_cnt, sizeof *dsc->vars);
324 for (i = dsc->var_cnt; i < var_cnt; i++)
326 struct dsc_var *dv = &dsc->vars[i];
328 dv->z_name[0] = '\0';
331 dsc->var_cnt = var_cnt;
333 if (lex_match (lexer, '('))
335 if (lex_token (lexer) != T_ID)
337 lex_error (lexer, NULL);
340 if (try_name (dict, dsc, lex_tokid (lexer)))
342 strcpy (dsc->vars[dsc->var_cnt - 1].z_name, lex_tokid (lexer));
346 msg (SE, _("Z-score variable name %s would be"
347 " a duplicate variable name."), lex_tokid (lexer));
349 if (!lex_force_match (lexer, ')'))
356 lex_error (lexer, NULL);
360 lex_match (lexer, '/');
364 msg (SE, _("No variables specified."));
368 /* Construct z-score varnames, show translation table. */
369 if (z_cnt || save_z_scores)
375 for (i = 0; i < dsc->var_cnt; i++)
376 if (dsc->vars[i].z_name[0] == 0)
378 if (!generate_z_varname (dict, dsc, dsc->vars[i].z_name,
379 var_get_name (dsc->vars[i].v),
388 /* Figure out statistics to display. */
389 if (dsc->show_stats & (1ul << DSC_SKEWNESS))
390 dsc->show_stats |= 1ul << DSC_SESKEW;
391 if (dsc->show_stats & (1ul << DSC_KURTOSIS))
392 dsc->show_stats |= 1ul << DSC_SEKURT;
394 /* Figure out which statistics to calculate. */
395 dsc->calc_stats = dsc->show_stats;
397 dsc->calc_stats |= (1ul << DSC_MEAN) | (1ul << DSC_STDDEV);
398 if (dsc->sort_by_stat >= 0)
399 dsc->calc_stats |= 1ul << dsc->sort_by_stat;
400 if (dsc->show_stats & (1ul << DSC_SESKEW))
401 dsc->calc_stats |= 1ul << DSC_SKEWNESS;
402 if (dsc->show_stats & (1ul << DSC_SEKURT))
403 dsc->calc_stats |= 1ul << DSC_KURTOSIS;
405 /* Figure out maximum moment needed and allocate moments for
407 dsc->max_moment = MOMENT_NONE;
408 for (i = 0; i < DSC_N_STATS; i++)
409 if (dsc->calc_stats & (1ul << i) && dsc_info[i].moment > dsc->max_moment)
410 dsc->max_moment = dsc_info[i].moment;
411 if (dsc->max_moment != MOMENT_NONE)
412 for (i = 0; i < dsc->var_cnt; i++)
413 dsc->vars[i].moments = moments_create (dsc->max_moment);
416 ok = multipass_procedure_with_splits (ds, calc_descriptives, dsc);
420 setup_z_trns (dsc, ds);
425 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
433 /* Returns the statistic named by the current token and skips past the token.
434 Returns DSC_NONE if no statistic is given (e.g., subcommand with no
435 specifiers). Emits an error if the current token ID does not name a
437 static enum dsc_statistic
438 match_statistic (struct lexer *lexer)
440 if (lex_token (lexer) == T_ID)
442 enum dsc_statistic stat;
444 for (stat = 0; stat < DSC_N_STATS; stat++)
445 if (lex_match_id (lexer, dsc_info[stat].identifier))
449 lex_error (lexer, _("expecting statistic name: reverting to default"));
457 free_dsc_proc (struct dsc_proc *dsc)
464 for (i = 0; i < dsc->var_cnt; i++)
465 moments_destroy (dsc->vars[i].moments);
472 /* Returns false if NAME is a duplicate of any existing variable name or
473 of any previously-declared z-var name; otherwise returns true. */
475 try_name (const struct dictionary *dict, struct dsc_proc *dsc,
480 if (dict_lookup_var (dict, name) != NULL)
482 for (i = 0; i < dsc->var_cnt; i++)
483 if (!strcasecmp (dsc->vars[i].z_name, name))
488 /* Generates a name for a Z-score variable based on a variable
489 named VAR_NAME, given that *Z_CNT generated variable names are
490 known to already exist. If successful, returns true and
491 copies the new name into Z_NAME. On failure, returns false. */
493 generate_z_varname (const struct dictionary *dict, struct dsc_proc *dsc, char *z_name,
494 const char *var_name, size_t *z_cnt)
496 char name[LONG_NAME_LEN + 1];
498 /* Try a name based on the original variable name. */
500 str_copy_trunc (name + 1, sizeof name - 1, var_name);
501 if (try_name (dict, dsc, name))
503 strcpy (z_name, name);
507 /* Generate a synthetic name. */
513 sprintf (name, "ZSC%03d", *z_cnt);
514 else if (*z_cnt <= 108)
515 sprintf (name, "STDZ%02d", *z_cnt - 99);
516 else if (*z_cnt <= 117)
517 sprintf (name, "ZZZZ%02d", *z_cnt - 108);
518 else if (*z_cnt <= 126)
519 sprintf (name, "ZQZQ%02d", *z_cnt - 117);
522 msg (SE, _("Ran out of generic names for Z-score variables. "
523 "There are only 126 generic names: ZSC001-ZSC0999, "
524 "STDZ01-STDZ09, ZZZZ01-ZZZZ09, ZQZQ01-ZQZQ09."));
528 if (try_name (dict, dsc, name))
530 strcpy (z_name, name);
537 /* Outputs a table describing the mapping between source
538 variables and Z-score variables. */
540 dump_z_table (struct dsc_proc *dsc)
548 for (i = 0; i < dsc->var_cnt; i++)
549 if (dsc->vars[i].z_name[0] != '\0')
553 t = tab_create (2, cnt + 1, 0);
554 tab_title (t, _("Mapping of variables to corresponding Z-scores."));
555 tab_columns (t, SOM_COL_DOWN, 1);
556 tab_headers (t, 0, 0, 1, 0);
557 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, cnt);
558 tab_hline (t, TAL_2, 0, 1, 1);
559 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Source"));
560 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Target"));
561 tab_dim (t, tab_natural_dimensions);
566 for (i = 0, y = 1; i < dsc->var_cnt; i++)
567 if (dsc->vars[i].z_name[0] != '\0')
569 tab_text (t, 0, y, TAB_LEFT, var_get_name (dsc->vars[i].v));
570 tab_text (t, 1, y++, TAB_LEFT, dsc->vars[i].z_name);
577 /* Transformation function to calculate Z-scores. Will return SYSMIS if any of
578 the following are true: 1) mean or standard deviation is SYSMIS 2) score is
579 SYSMIS 3) score is user missing and they were not included in the original
580 analyis. 4) any of the variables in the original analysis were missing
581 (either system or user-missing values that weren't included).
584 descriptives_trns_proc (void *trns_, struct ccase * c,
585 casenumber case_idx UNUSED)
587 struct dsc_trns *t = trns_;
588 struct dsc_z_score *z;
589 struct variable **vars;
592 if (t->missing_type == DSC_LISTWISE)
595 for (vars = t->vars; vars < t->vars + t->var_cnt; vars++)
597 double score = case_num (c, *vars);
598 if (var_is_num_missing (*vars, score, t->exclude))
606 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
608 double input = case_num (c, z->src_var);
609 double *output = &case_data_rw (c, z->z_var)->f;
611 if (z->mean == SYSMIS || z->std_dev == SYSMIS || all_sysmis
612 || var_is_num_missing (z->src_var, input, t->exclude))
615 *output = (input - z->mean) / z->std_dev;
617 return TRNS_CONTINUE;
620 /* Frees a descriptives_trns struct. */
622 descriptives_trns_free (void *trns_)
624 struct dsc_trns *t = trns_;
627 assert((t->missing_type != DSC_LISTWISE) ^ (t->vars != NULL));
632 /* Sets up a transformation to calculate Z scores. */
634 setup_z_trns (struct dsc_proc *dsc, struct dataset *ds)
639 for (cnt = i = 0; i < dsc->var_cnt; i++)
640 if (dsc->vars[i].z_name[0] != '\0')
643 t = xmalloc (sizeof *t);
644 t->z_scores = xnmalloc (cnt, sizeof *t->z_scores);
645 t->z_score_cnt = cnt;
646 t->missing_type = dsc->missing_type;
647 t->exclude = dsc->exclude;
648 if ( t->missing_type == DSC_LISTWISE )
650 t->var_cnt = dsc->var_cnt;
651 t->vars = xnmalloc (t->var_cnt, sizeof *t->vars);
652 for (i = 0; i < t->var_cnt; i++)
653 t->vars[i] = dsc->vars[i].v;
661 for (cnt = i = 0; i < dsc->var_cnt; i++)
663 struct dsc_var *dv = &dsc->vars[i];
664 if (dv->z_name[0] != '\0')
666 struct dsc_z_score *z;
667 struct variable *dst_var;
669 dst_var = dict_create_var_assert (dataset_dict (ds), dv->z_name, 0);
670 var_set_label (dst_var, xasprintf (_("Z-score of %s"),
671 var_to_string (dv->v)));
673 z = &t->z_scores[cnt++];
676 z->mean = dv->stats[DSC_MEAN];
677 z->std_dev = dv->stats[DSC_STDDEV];
681 add_transformation (ds,
682 descriptives_trns_proc, descriptives_trns_free, t);
685 /* Statistical calculation. */
687 static bool listwise_missing (struct dsc_proc *dsc, const struct ccase *c);
689 /* Calculates and displays descriptive statistics for the cases
692 calc_descriptives (const struct ccase *first,
693 const struct casefile *cf, void *dsc_,
694 const struct dataset *ds)
696 struct dsc_proc *dsc = dsc_;
697 struct casereader *reader;
701 output_split_file_values (ds, first);
703 for (i = 0; i < dsc->var_cnt; i++)
705 struct dsc_var *dv = &dsc->vars[i];
707 dv->valid = dv->missing = 0.0;
708 if (dv->moments != NULL)
709 moments_clear (dv->moments);
713 dsc->missing_listwise = 0.;
716 /* First pass to handle most of the work. */
717 for (reader = casefile_get_reader (cf, NULL);
718 casereader_read (reader, &c);
721 double weight = dict_get_case_weight (dataset_dict (ds), &c, &dsc->bad_warn);
725 /* Check for missing values. */
726 if (listwise_missing (dsc, &c))
728 dsc->missing_listwise += weight;
729 if (dsc->missing_type == DSC_LISTWISE)
732 dsc->valid += weight;
734 for (i = 0; i < dsc->var_cnt; i++)
736 struct dsc_var *dv = &dsc->vars[i];
737 double x = case_num (&c, dv->v);
739 if (dsc->missing_type != DSC_LISTWISE
740 && var_is_num_missing (dv->v, x, dsc->exclude))
742 dv->missing += weight;
746 if (dv->moments != NULL)
747 moments_pass_one (dv->moments, x, weight);
755 casereader_destroy (reader);
757 /* Second pass for higher-order moments. */
758 if (dsc->max_moment > MOMENT_MEAN)
760 for (reader = casefile_get_reader (cf, NULL);
761 casereader_read (reader, &c);
764 double weight = dict_get_case_weight (dataset_dict (ds), &c,
769 /* Check for missing values. */
770 if (dsc->missing_type == DSC_LISTWISE && listwise_missing (dsc, &c))
773 for (i = 0; i < dsc->var_cnt; i++)
775 struct dsc_var *dv = &dsc->vars[i];
776 double x = case_num (&c, dv->v);
778 if (dsc->missing_type != DSC_LISTWISE
779 && var_is_num_missing (dv->v, x, dsc->exclude))
782 if (dv->moments != NULL)
783 moments_pass_two (dv->moments, x, weight);
786 casereader_destroy (reader);
789 /* Calculate results. */
790 for (i = 0; i < dsc->var_cnt; i++)
792 struct dsc_var *dv = &dsc->vars[i];
796 for (j = 0; j < DSC_N_STATS; j++)
797 dv->stats[j] = SYSMIS;
799 dv->valid = W = dsc->valid - dv->missing;
801 if (dv->moments != NULL)
802 moments_calculate (dv->moments, NULL,
803 &dv->stats[DSC_MEAN], &dv->stats[DSC_VARIANCE],
804 &dv->stats[DSC_SKEWNESS], &dv->stats[DSC_KURTOSIS]);
805 if (dsc->calc_stats & (1ul << DSC_SEMEAN)
806 && dv->stats[DSC_VARIANCE] != SYSMIS && W > 0.)
807 dv->stats[DSC_SEMEAN] = sqrt (dv->stats[DSC_VARIANCE]) / sqrt (W);
808 if (dsc->calc_stats & (1ul << DSC_STDDEV)
809 && dv->stats[DSC_VARIANCE] != SYSMIS)
810 dv->stats[DSC_STDDEV] = sqrt (dv->stats[DSC_VARIANCE]);
811 if (dsc->calc_stats & (1ul << DSC_SEKURT))
812 if (dv->stats[DSC_KURTOSIS] != SYSMIS)
813 dv->stats[DSC_SEKURT] = calc_sekurt (W);
814 if (dsc->calc_stats & (1ul << DSC_SESKEW)
815 && dv->stats[DSC_SKEWNESS] != SYSMIS)
816 dv->stats[DSC_SESKEW] = calc_seskew (W);
817 dv->stats[DSC_RANGE] = ((dv->min == DBL_MAX || dv->max == -DBL_MAX)
818 ? SYSMIS : dv->max - dv->min);
819 dv->stats[DSC_MIN] = dv->min == DBL_MAX ? SYSMIS : dv->min;
820 dv->stats[DSC_MAX] = dv->max == -DBL_MAX ? SYSMIS : dv->max;
821 if (dsc->calc_stats & (1ul << DSC_SUM))
822 dv->stats[DSC_SUM] = W * dv->stats[DSC_MEAN];
825 /* Output results. */
831 /* Returns true if any of the descriptives variables in DSC's
832 variable list have missing values in case C, false otherwise. */
834 listwise_missing (struct dsc_proc *dsc, const struct ccase *c)
838 for (i = 0; i < dsc->var_cnt; i++)
840 struct dsc_var *dv = &dsc->vars[i];
841 double x = case_num (c, dv->v);
843 if (var_is_num_missing (dv->v, x, dsc->exclude))
849 /* Statistical display. */
851 static algo_compare_func descriptives_compare_dsc_vars;
853 /* Displays a table of descriptive statistics for DSC. */
855 display (struct dsc_proc *dsc)
861 nc = 1 + (dsc->format == DSC_SERIAL ? 2 : 1);
862 for (i = 0; i < DSC_N_STATS; i++)
863 if (dsc->show_stats & (1ul << i))
866 if (dsc->sort_by_stat != DSC_NONE)
867 sort (dsc->vars, dsc->var_cnt, sizeof *dsc->vars,
868 descriptives_compare_dsc_vars, dsc);
870 t = tab_create (nc, dsc->var_cnt + 1, 0);
871 tab_headers (t, 1, 0, 1, 0);
872 tab_box (t, TAL_1, TAL_1, -1, -1, 0, 0, nc - 1, dsc->var_cnt);
873 tab_box (t, -1, -1, -1, TAL_1, 1, 0, nc - 1, dsc->var_cnt);
874 tab_hline (t, TAL_2, 0, nc - 1, 1);
875 tab_vline (t, TAL_2, 1, 0, dsc->var_cnt);
876 tab_dim (t, tab_natural_dimensions);
879 tab_text (t, nc++, 0, TAB_LEFT | TAT_TITLE, _("Variable"));
880 if (dsc->format == DSC_SERIAL)
882 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Valid N"));
883 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Missing N"));
886 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, "N");
888 for (i = 0; i < DSC_N_STATS; i++)
889 if (dsc->show_stats & (1ul << i))
891 const char *title = gettext (dsc_info[i].name);
892 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, title);
895 for (i = 0; i < dsc->var_cnt; i++)
897 struct dsc_var *dv = &dsc->vars[i];
901 tab_text (t, nc++, i + 1, TAB_LEFT, var_get_name (dv->v));
902 tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", dv->valid);
903 if (dsc->format == DSC_SERIAL)
904 tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", dv->missing);
905 for (j = 0; j < DSC_N_STATS; j++)
906 if (dsc->show_stats & (1ul << j))
907 tab_float (t, nc++, i + 1, TAB_NONE, dv->stats[j], 10, 3);
910 tab_title (t, _("Valid cases = %g; cases with missing value(s) = %g."),
911 dsc->valid, dsc->missing_listwise);
916 /* Compares `struct dsc_var's A and B according to the ordering
919 descriptives_compare_dsc_vars (const void *a_, const void *b_, const void *dsc_)
921 const struct dsc_var *a = a_;
922 const struct dsc_var *b = b_;
923 const struct dsc_proc *dsc = dsc_;
927 if (dsc->sort_by_stat == DSC_NAME)
928 result = strcasecmp (var_get_name (a->v), var_get_name (b->v));
931 double as = a->stats[dsc->sort_by_stat];
932 double bs = b->stats[dsc->sort_by_stat];
934 result = as < bs ? -1 : as > bs;
937 if (!dsc->sort_ascending)