1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
23 #include <data/casegrouper.h>
24 #include <data/casereader.h>
25 #include <data/dictionary.h>
26 #include <data/procedure.h>
27 #include <data/transformations.h>
28 #include <data/variable.h>
29 #include <language/command.h>
30 #include <language/dictionary/split-file.h>
31 #include <language/lexer/lexer.h>
32 #include <language/lexer/variable-parser.h>
33 #include <libpspp/alloc.h>
34 #include <libpspp/array.h>
35 #include <libpspp/compiler.h>
36 #include <libpspp/magic.h>
37 #include <libpspp/message.h>
38 #include <libpspp/assertion.h>
39 #include <math/moments.h>
40 #include <output/manager.h>
41 #include <output/table.h>
44 #define _(msgid) gettext (msgid)
45 #define N_(msgid) msgid
47 /* DESCRIPTIVES private data. */
51 /* Handling of missing values. */
54 DSC_VARIABLE, /* Handle missing values on a per-variable basis. */
55 DSC_LISTWISE /* Discard entire case if any variable is missing. */
58 /* Describes properties of a distribution for the purpose of
59 calculating a Z-score. */
62 const struct variable *src_var; /* Variable on which z-score is based. */
63 struct variable *z_var; /* New z-score variable. */
64 double mean; /* Distribution mean. */
65 double std_dev; /* Distribution standard deviation. */
68 /* DESCRIPTIVES transformation (for calculating Z-scores). */
71 struct dsc_z_score *z_scores; /* Array of Z-scores. */
72 int z_score_cnt; /* Number of Z-scores. */
73 const struct variable **vars; /* Variables for listwise missing checks. */
74 size_t var_cnt; /* Number of variables. */
75 enum dsc_missing_type missing_type; /* Treatment of missing values. */
76 enum mv_class exclude; /* Classes of missing values to exclude. */
79 /* Statistics. Used as bit indexes, so must be 32 or fewer. */
82 DSC_MEAN = 0, DSC_SEMEAN, DSC_STDDEV, DSC_VARIANCE, DSC_KURTOSIS,
83 DSC_SEKURT, DSC_SKEWNESS, DSC_SESKEW, DSC_RANGE, DSC_MIN,
84 DSC_MAX, DSC_SUM, DSC_N_STATS,
86 /* Only valid as sort criteria. */
87 DSC_NAME = -2, /* Sort by name. */
88 DSC_NONE = -1 /* Unsorted. */
91 /* Describes one statistic. */
92 struct dsc_statistic_info
94 const char *identifier; /* Identifier. */
95 const char *name; /* Full name. */
96 enum moment moment; /* Highest moment needed to calculate. */
99 /* Table of statistics, indexed by DSC_*. */
100 static const struct dsc_statistic_info dsc_info[DSC_N_STATS] =
102 {"MEAN", N_("Mean"), MOMENT_MEAN},
103 {"SEMEAN", N_("S E Mean"), MOMENT_VARIANCE},
104 {"STDDEV", N_("Std Dev"), MOMENT_VARIANCE},
105 {"VARIANCE", N_("Variance"), MOMENT_VARIANCE},
106 {"KURTOSIS", N_("Kurtosis"), MOMENT_KURTOSIS},
107 {"SEKURTOSIS", N_("S E Kurt"), MOMENT_NONE},
108 {"SKEWNESS", N_("Skewness"), MOMENT_SKEWNESS},
109 {"SESKEWNESS", N_("S E Skew"), MOMENT_NONE},
110 {"RANGE", N_("Range"), MOMENT_NONE},
111 {"MINIMUM", N_("Minimum"), MOMENT_NONE},
112 {"MAXIMUM", N_("Maximum"), MOMENT_NONE},
113 {"SUM", N_("Sum"), MOMENT_MEAN},
116 /* Statistics calculated by default if none are explicitly
118 #define DEFAULT_STATS \
119 ((1ul << DSC_MEAN) | (1ul << DSC_STDDEV) | (1ul << DSC_MIN) \
122 /* A variable specified on DESCRIPTIVES. */
125 const struct variable *v; /* Variable to calculate on. */
126 char z_name[LONG_NAME_LEN + 1]; /* Name for z-score variable. */
127 double valid, missing; /* Valid, missing counts. */
128 struct moments *moments; /* Moments. */
129 double min, max; /* Maximum and mimimum values. */
130 double stats[DSC_N_STATS]; /* All the stats' values. */
136 DSC_LINE, /* Abbreviated format. */
137 DSC_SERIAL /* Long format. */
140 /* A DESCRIPTIVES procedure. */
143 /* Per-variable info. */
144 struct dsc_var *vars; /* Variables. */
145 size_t var_cnt; /* Number of variables. */
148 enum dsc_missing_type missing_type; /* Treatment of missing values. */
149 enum mv_class exclude; /* Classes of missing values to exclude. */
150 int show_var_labels; /* Nonzero to show variable labels. */
151 int show_index; /* Nonzero to show variable index. */
152 enum dsc_format format; /* Output format. */
154 /* Accumulated results. */
155 double missing_listwise; /* Sum of weights of cases missing listwise. */
156 double valid; /* Sum of weights of valid cases. */
157 bool bad_warn; /* Warn if bad weight found. */
158 enum dsc_statistic sort_by_stat; /* Statistic to sort by; -1: name. */
159 int sort_ascending; /* !0: ascending order; 0: descending. */
160 unsigned long show_stats; /* Statistics to display. */
161 unsigned long calc_stats; /* Statistics to calculate. */
162 enum moment max_moment; /* Highest moment needed for stats. */
166 static enum dsc_statistic match_statistic (struct lexer *);
167 static void free_dsc_proc (struct dsc_proc *);
169 /* Z-score functions. */
170 static bool try_name (const struct dictionary *dict,
171 struct dsc_proc *dsc, const char *name);
172 static bool generate_z_varname (const struct dictionary *dict,
173 struct dsc_proc *dsc, char *z_name,
174 const char *name, int *z_cnt);
175 static void dump_z_table (struct dsc_proc *);
176 static void setup_z_trns (struct dsc_proc *, struct dataset *);
178 /* Procedure execution functions. */
179 static void calc_descriptives (struct dsc_proc *, struct casereader *,
181 static void display (struct dsc_proc *dsc);
183 /* Parser and outline. */
185 /* Handles DESCRIPTIVES. */
187 cmd_descriptives (struct lexer *lexer, struct dataset *ds)
189 struct dictionary *dict = dataset_dict (ds);
190 struct dsc_proc *dsc;
191 const struct variable **vars = NULL;
193 int save_z_scores = 0;
198 struct casegrouper *grouper;
199 struct casereader *group;
201 /* Create and initialize dsc. */
202 dsc = xmalloc (sizeof *dsc);
205 dsc->missing_type = DSC_VARIABLE;
206 dsc->exclude = MV_ANY;
207 dsc->show_var_labels = 1;
209 dsc->format = DSC_LINE;
210 dsc->missing_listwise = 0.;
213 dsc->sort_by_stat = DSC_NONE;
214 dsc->sort_ascending = 1;
215 dsc->show_stats = dsc->calc_stats = DEFAULT_STATS;
217 /* Parse DESCRIPTIVES. */
218 while (lex_token (lexer) != '.')
220 if (lex_match_id (lexer, "MISSING"))
222 lex_match (lexer, '=');
223 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
225 if (lex_match_id (lexer, "VARIABLE"))
226 dsc->missing_type = DSC_VARIABLE;
227 else if (lex_match_id (lexer, "LISTWISE"))
228 dsc->missing_type = DSC_LISTWISE;
229 else if (lex_match_id (lexer, "INCLUDE"))
230 dsc->exclude = MV_SYSTEM;
233 lex_error (lexer, NULL);
236 lex_match (lexer, ',');
239 else if (lex_match_id (lexer, "SAVE"))
241 else if (lex_match_id (lexer, "FORMAT"))
243 lex_match (lexer, '=');
244 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
246 if (lex_match_id (lexer, "LABELS"))
247 dsc->show_var_labels = 1;
248 else if (lex_match_id (lexer, "NOLABELS"))
249 dsc->show_var_labels = 0;
250 else if (lex_match_id (lexer, "INDEX"))
252 else if (lex_match_id (lexer, "NOINDEX"))
254 else if (lex_match_id (lexer, "LINE"))
255 dsc->format = DSC_LINE;
256 else if (lex_match_id (lexer, "SERIAL"))
257 dsc->format = DSC_SERIAL;
260 lex_error (lexer, NULL);
263 lex_match (lexer, ',');
266 else if (lex_match_id (lexer, "STATISTICS"))
268 lex_match (lexer, '=');
270 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
272 if (lex_match (lexer, T_ALL))
273 dsc->show_stats |= (1ul << DSC_N_STATS) - 1;
274 else if (lex_match_id (lexer, "DEFAULT"))
275 dsc->show_stats |= DEFAULT_STATS;
277 dsc->show_stats |= 1ul << (match_statistic (lexer));
278 lex_match (lexer, ',');
280 if (dsc->show_stats == 0)
281 dsc->show_stats = DEFAULT_STATS;
283 else if (lex_match_id (lexer, "SORT"))
285 lex_match (lexer, '=');
286 if (lex_match_id (lexer, "NAME"))
287 dsc->sort_by_stat = DSC_NAME;
290 dsc->sort_by_stat = match_statistic (lexer);
291 if (dsc->sort_by_stat == DSC_NONE )
292 dsc->sort_by_stat = DSC_MEAN;
294 if (lex_match (lexer, '('))
296 if (lex_match_id (lexer, "A"))
297 dsc->sort_ascending = 1;
298 else if (lex_match_id (lexer, "D"))
299 dsc->sort_ascending = 0;
301 lex_error (lexer, NULL);
302 lex_force_match (lexer, ')');
305 else if (var_cnt == 0)
307 if (lex_look_ahead (lexer) == '=')
309 lex_match_id (lexer, "VARIABLES");
310 lex_match (lexer, '=');
313 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
317 if (!parse_variables_const (lexer, dict, &vars, &var_cnt,
318 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
321 dsc->vars = xnrealloc ((void *)dsc->vars, var_cnt, sizeof *dsc->vars);
322 for (i = dsc->var_cnt; i < var_cnt; i++)
324 struct dsc_var *dv = &dsc->vars[i];
326 dv->z_name[0] = '\0';
329 dsc->var_cnt = var_cnt;
331 if (lex_match (lexer, '('))
333 if (lex_token (lexer) != T_ID)
335 lex_error (lexer, NULL);
338 if (try_name (dict, dsc, lex_tokid (lexer)))
340 strcpy (dsc->vars[dsc->var_cnt - 1].z_name, lex_tokid (lexer));
344 msg (SE, _("Z-score variable name %s would be"
345 " a duplicate variable name."), lex_tokid (lexer));
347 if (!lex_force_match (lexer, ')'))
354 lex_error (lexer, NULL);
358 lex_match (lexer, '/');
362 msg (SE, _("No variables specified."));
366 /* Construct z-score varnames, show translation table. */
367 if (z_cnt || save_z_scores)
373 for (i = 0; i < dsc->var_cnt; i++)
374 if (dsc->vars[i].z_name[0] == 0)
376 if (!generate_z_varname (dict, dsc, dsc->vars[i].z_name,
377 var_get_name (dsc->vars[i].v),
386 /* Figure out statistics to display. */
387 if (dsc->show_stats & (1ul << DSC_SKEWNESS))
388 dsc->show_stats |= 1ul << DSC_SESKEW;
389 if (dsc->show_stats & (1ul << DSC_KURTOSIS))
390 dsc->show_stats |= 1ul << DSC_SEKURT;
392 /* Figure out which statistics to calculate. */
393 dsc->calc_stats = dsc->show_stats;
395 dsc->calc_stats |= (1ul << DSC_MEAN) | (1ul << DSC_STDDEV);
396 if (dsc->sort_by_stat >= 0)
397 dsc->calc_stats |= 1ul << dsc->sort_by_stat;
398 if (dsc->show_stats & (1ul << DSC_SESKEW))
399 dsc->calc_stats |= 1ul << DSC_SKEWNESS;
400 if (dsc->show_stats & (1ul << DSC_SEKURT))
401 dsc->calc_stats |= 1ul << DSC_KURTOSIS;
403 /* Figure out maximum moment needed and allocate moments for
405 dsc->max_moment = MOMENT_NONE;
406 for (i = 0; i < DSC_N_STATS; i++)
407 if (dsc->calc_stats & (1ul << i) && dsc_info[i].moment > dsc->max_moment)
408 dsc->max_moment = dsc_info[i].moment;
409 if (dsc->max_moment != MOMENT_NONE)
410 for (i = 0; i < dsc->var_cnt; i++)
411 dsc->vars[i].moments = moments_create (dsc->max_moment);
414 grouper = casegrouper_create_splits (proc_open (ds), dict);
415 while (casegrouper_get_next_group (grouper, &group))
416 calc_descriptives (dsc, group, ds);
417 ok = casegrouper_destroy (grouper);
418 ok = proc_commit (ds) && ok;
422 setup_z_trns (dsc, ds);
427 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
435 /* Returns the statistic named by the current token and skips past the token.
436 Returns DSC_NONE if no statistic is given (e.g., subcommand with no
437 specifiers). Emits an error if the current token ID does not name a
439 static enum dsc_statistic
440 match_statistic (struct lexer *lexer)
442 if (lex_token (lexer) == T_ID)
444 enum dsc_statistic stat;
446 for (stat = 0; stat < DSC_N_STATS; stat++)
447 if (lex_match_id (lexer, dsc_info[stat].identifier))
451 lex_error (lexer, _("expecting statistic name: reverting to default"));
459 free_dsc_proc (struct dsc_proc *dsc)
466 for (i = 0; i < dsc->var_cnt; i++)
467 moments_destroy (dsc->vars[i].moments);
474 /* Returns false if NAME is a duplicate of any existing variable name or
475 of any previously-declared z-var name; otherwise returns true. */
477 try_name (const struct dictionary *dict, struct dsc_proc *dsc,
482 if (dict_lookup_var (dict, name) != NULL)
484 for (i = 0; i < dsc->var_cnt; i++)
485 if (!strcasecmp (dsc->vars[i].z_name, name))
490 /* Generates a name for a Z-score variable based on a variable
491 named VAR_NAME, given that *Z_CNT generated variable names are
492 known to already exist. If successful, returns true and
493 copies the new name into Z_NAME. On failure, returns false. */
495 generate_z_varname (const struct dictionary *dict, struct dsc_proc *dsc, char *z_name,
496 const char *var_name, int *z_cnt)
498 char name[LONG_NAME_LEN + 1];
500 /* Try a name based on the original variable name. */
502 str_copy_trunc (name + 1, sizeof name - 1, var_name);
503 if (try_name (dict, dsc, name))
505 strcpy (z_name, name);
509 /* Generate a synthetic name. */
515 sprintf (name, "ZSC%03d", *z_cnt);
516 else if (*z_cnt <= 108)
517 sprintf (name, "STDZ%02d", *z_cnt - 99);
518 else if (*z_cnt <= 117)
519 sprintf (name, "ZZZZ%02d", *z_cnt - 108);
520 else if (*z_cnt <= 126)
521 sprintf (name, "ZQZQ%02d", *z_cnt - 117);
524 msg (SE, _("Ran out of generic names for Z-score variables. "
525 "There are only 126 generic names: ZSC001-ZSC0999, "
526 "STDZ01-STDZ09, ZZZZ01-ZZZZ09, ZQZQ01-ZQZQ09."));
530 if (try_name (dict, dsc, name))
532 strcpy (z_name, name);
539 /* Outputs a table describing the mapping between source
540 variables and Z-score variables. */
542 dump_z_table (struct dsc_proc *dsc)
550 for (i = 0; i < dsc->var_cnt; i++)
551 if (dsc->vars[i].z_name[0] != '\0')
555 t = tab_create (2, cnt + 1, 0);
556 tab_title (t, _("Mapping of variables to corresponding Z-scores."));
557 tab_columns (t, SOM_COL_DOWN, 1);
558 tab_headers (t, 0, 0, 1, 0);
559 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, cnt);
560 tab_hline (t, TAL_2, 0, 1, 1);
561 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Source"));
562 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Target"));
563 tab_dim (t, tab_natural_dimensions);
568 for (i = 0, y = 1; i < dsc->var_cnt; i++)
569 if (dsc->vars[i].z_name[0] != '\0')
571 tab_text (t, 0, y, TAB_LEFT, var_get_name (dsc->vars[i].v));
572 tab_text (t, 1, y++, TAB_LEFT, dsc->vars[i].z_name);
579 /* Transformation function to calculate Z-scores. Will return SYSMIS if any of
580 the following are true: 1) mean or standard deviation is SYSMIS 2) score is
581 SYSMIS 3) score is user missing and they were not included in the original
582 analyis. 4) any of the variables in the original analysis were missing
583 (either system or user-missing values that weren't included).
586 descriptives_trns_proc (void *trns_, struct ccase * c,
587 casenumber case_idx UNUSED)
589 struct dsc_trns *t = trns_;
590 struct dsc_z_score *z;
591 const struct variable **vars;
594 if (t->missing_type == DSC_LISTWISE)
597 for (vars = t->vars; vars < t->vars + t->var_cnt; vars++)
599 double score = case_num (c, *vars);
600 if (var_is_num_missing (*vars, score, t->exclude))
608 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
610 double input = case_num (c, z->src_var);
611 double *output = &case_data_rw (c, z->z_var)->f;
613 if (z->mean == SYSMIS || z->std_dev == SYSMIS || all_sysmis
614 || var_is_num_missing (z->src_var, input, t->exclude))
617 *output = (input - z->mean) / z->std_dev;
619 return TRNS_CONTINUE;
622 /* Frees a descriptives_trns struct. */
624 descriptives_trns_free (void *trns_)
626 struct dsc_trns *t = trns_;
629 assert((t->missing_type != DSC_LISTWISE) ^ (t->vars != NULL));
634 /* Sets up a transformation to calculate Z scores. */
636 setup_z_trns (struct dsc_proc *dsc, struct dataset *ds)
641 for (cnt = i = 0; i < dsc->var_cnt; i++)
642 if (dsc->vars[i].z_name[0] != '\0')
645 t = xmalloc (sizeof *t);
646 t->z_scores = xnmalloc (cnt, sizeof *t->z_scores);
647 t->z_score_cnt = cnt;
648 t->missing_type = dsc->missing_type;
649 t->exclude = dsc->exclude;
650 if ( t->missing_type == DSC_LISTWISE )
652 t->var_cnt = dsc->var_cnt;
653 t->vars = xnmalloc (t->var_cnt, sizeof *t->vars);
654 for (i = 0; i < t->var_cnt; i++)
655 t->vars[i] = dsc->vars[i].v;
663 for (cnt = i = 0; i < dsc->var_cnt; i++)
665 struct dsc_var *dv = &dsc->vars[i];
666 if (dv->z_name[0] != '\0')
668 struct dsc_z_score *z;
669 struct variable *dst_var;
671 dst_var = dict_create_var_assert (dataset_dict (ds), dv->z_name, 0);
672 var_set_label (dst_var, xasprintf (_("Z-score of %s"),
673 var_to_string (dv->v)));
675 z = &t->z_scores[cnt++];
678 z->mean = dv->stats[DSC_MEAN];
679 z->std_dev = dv->stats[DSC_STDDEV];
683 add_transformation (ds,
684 descriptives_trns_proc, descriptives_trns_free, t);
687 /* Statistical calculation. */
689 static bool listwise_missing (struct dsc_proc *dsc, const struct ccase *c);
691 /* Calculates and displays descriptive statistics for the cases
694 calc_descriptives (struct dsc_proc *dsc, struct casereader *group,
697 struct casereader *pass1, *pass2;
701 if (!casereader_peek (group, 0, &c))
703 output_split_file_values (ds, &c);
706 group = casereader_create_filter_weight (group, dataset_dict (ds),
709 casereader_split (group, &pass1, &pass2);
710 if (dsc->max_moment <= MOMENT_MEAN)
711 casereader_destroy (pass2);
713 for (i = 0; i < dsc->var_cnt; i++)
715 struct dsc_var *dv = &dsc->vars[i];
717 dv->valid = dv->missing = 0.0;
718 if (dv->moments != NULL)
719 moments_clear (dv->moments);
723 dsc->missing_listwise = 0.;
726 /* First pass to handle most of the work. */
727 for (; casereader_read (pass1, &c); case_destroy (&c))
729 double weight = dict_get_case_weight (dataset_dict (ds), &c, NULL);
731 /* Check for missing values. */
732 if (listwise_missing (dsc, &c))
734 dsc->missing_listwise += weight;
735 if (dsc->missing_type == DSC_LISTWISE)
738 dsc->valid += weight;
740 for (i = 0; i < dsc->var_cnt; i++)
742 struct dsc_var *dv = &dsc->vars[i];
743 double x = case_num (&c, dv->v);
745 if (var_is_num_missing (dv->v, x, dsc->exclude))
747 dv->missing += weight;
751 if (dv->moments != NULL)
752 moments_pass_one (dv->moments, x, weight);
760 if (!casereader_destroy (pass1))
763 /* Second pass for higher-order moments. */
764 if (dsc->max_moment > MOMENT_MEAN)
766 for (; casereader_read (pass2, &c); case_destroy (&c))
768 double weight = dict_get_case_weight (dataset_dict (ds), &c, NULL);
770 /* Check for missing values. */
771 if (dsc->missing_type == DSC_LISTWISE && listwise_missing (dsc, &c))
774 for (i = 0; i < dsc->var_cnt; i++)
776 struct dsc_var *dv = &dsc->vars[i];
777 double x = case_num (&c, dv->v);
779 if (var_is_num_missing (dv->v, x, dsc->exclude))
782 if (dv->moments != NULL)
783 moments_pass_two (dv->moments, x, weight);
786 if (!casereader_destroy (pass2))
790 /* Calculate results. */
791 for (i = 0; i < dsc->var_cnt; i++)
793 struct dsc_var *dv = &dsc->vars[i];
797 for (j = 0; j < DSC_N_STATS; j++)
798 dv->stats[j] = SYSMIS;
800 dv->valid = W = dsc->valid - dv->missing;
802 if (dv->moments != NULL)
803 moments_calculate (dv->moments, NULL,
804 &dv->stats[DSC_MEAN], &dv->stats[DSC_VARIANCE],
805 &dv->stats[DSC_SKEWNESS], &dv->stats[DSC_KURTOSIS]);
806 if (dsc->calc_stats & (1ul << DSC_SEMEAN)
807 && dv->stats[DSC_VARIANCE] != SYSMIS && W > 0.)
808 dv->stats[DSC_SEMEAN] = sqrt (dv->stats[DSC_VARIANCE]) / sqrt (W);
809 if (dsc->calc_stats & (1ul << DSC_STDDEV)
810 && dv->stats[DSC_VARIANCE] != SYSMIS)
811 dv->stats[DSC_STDDEV] = sqrt (dv->stats[DSC_VARIANCE]);
812 if (dsc->calc_stats & (1ul << DSC_SEKURT))
813 if (dv->stats[DSC_KURTOSIS] != SYSMIS)
814 dv->stats[DSC_SEKURT] = calc_sekurt (W);
815 if (dsc->calc_stats & (1ul << DSC_SESKEW)
816 && dv->stats[DSC_SKEWNESS] != SYSMIS)
817 dv->stats[DSC_SESKEW] = calc_seskew (W);
818 dv->stats[DSC_RANGE] = ((dv->min == DBL_MAX || dv->max == -DBL_MAX)
819 ? SYSMIS : dv->max - dv->min);
820 dv->stats[DSC_MIN] = dv->min == DBL_MAX ? SYSMIS : dv->min;
821 dv->stats[DSC_MAX] = dv->max == -DBL_MAX ? SYSMIS : dv->max;
822 if (dsc->calc_stats & (1ul << DSC_SUM))
823 dv->stats[DSC_SUM] = W * dv->stats[DSC_MEAN];
826 /* Output results. */
830 /* Returns true if any of the descriptives variables in DSC's
831 variable list have missing values in case C, false otherwise. */
833 listwise_missing (struct dsc_proc *dsc, const struct ccase *c)
837 for (i = 0; i < dsc->var_cnt; i++)
839 struct dsc_var *dv = &dsc->vars[i];
840 double x = case_num (c, dv->v);
842 if (var_is_num_missing (dv->v, x, dsc->exclude))
848 /* Statistical display. */
850 static algo_compare_func descriptives_compare_dsc_vars;
852 /* Displays a table of descriptive statistics for DSC. */
854 display (struct dsc_proc *dsc)
860 nc = 1 + (dsc->format == DSC_SERIAL ? 2 : 1);
861 for (i = 0; i < DSC_N_STATS; i++)
862 if (dsc->show_stats & (1ul << i))
865 if (dsc->sort_by_stat != DSC_NONE)
866 sort (dsc->vars, dsc->var_cnt, sizeof *dsc->vars,
867 descriptives_compare_dsc_vars, dsc);
869 t = tab_create (nc, dsc->var_cnt + 1, 0);
870 tab_headers (t, 1, 0, 1, 0);
871 tab_box (t, TAL_1, TAL_1, -1, -1, 0, 0, nc - 1, dsc->var_cnt);
872 tab_box (t, -1, -1, -1, TAL_1, 1, 0, nc - 1, dsc->var_cnt);
873 tab_hline (t, TAL_2, 0, nc - 1, 1);
874 tab_vline (t, TAL_2, 1, 0, dsc->var_cnt);
875 tab_dim (t, tab_natural_dimensions);
878 tab_text (t, nc++, 0, TAB_LEFT | TAT_TITLE, _("Variable"));
879 if (dsc->format == DSC_SERIAL)
881 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Valid N"));
882 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Missing N"));
885 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, "N");
887 for (i = 0; i < DSC_N_STATS; i++)
888 if (dsc->show_stats & (1ul << i))
890 const char *title = gettext (dsc_info[i].name);
891 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, title);
894 for (i = 0; i < dsc->var_cnt; i++)
896 struct dsc_var *dv = &dsc->vars[i];
900 tab_text (t, nc++, i + 1, TAB_LEFT, var_get_name (dv->v));
901 tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", dv->valid);
902 if (dsc->format == DSC_SERIAL)
903 tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", dv->missing);
904 for (j = 0; j < DSC_N_STATS; j++)
905 if (dsc->show_stats & (1ul << j))
906 tab_float (t, nc++, i + 1, TAB_NONE, dv->stats[j], 10, 3);
909 tab_title (t, _("Valid cases = %g; cases with missing value(s) = %g."),
910 dsc->valid, dsc->missing_listwise);
915 /* Compares `struct dsc_var's A and B according to the ordering
918 descriptives_compare_dsc_vars (const void *a_, const void *b_, const void *dsc_)
920 const struct dsc_var *a = a_;
921 const struct dsc_var *b = b_;
922 const struct dsc_proc *dsc = dsc_;
926 if (dsc->sort_by_stat == DSC_NAME)
927 result = strcasecmp (var_get_name (a->v), var_get_name (b->v));
930 double as = a->stats[dsc->sort_by_stat];
931 double bs = b->stats[dsc->sort_by_stat];
933 result = as < bs ? -1 : as > bs;
936 if (!dsc->sort_ascending)