1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
23 #include <data/casegrouper.h>
24 #include <data/casereader.h>
25 #include <data/dictionary.h>
26 #include <data/procedure.h>
27 #include <data/transformations.h>
28 #include <data/variable.h>
29 #include <language/command.h>
30 #include <language/dictionary/split-file.h>
31 #include <language/lexer/lexer.h>
32 #include <language/lexer/variable-parser.h>
33 #include <libpspp/array.h>
34 #include <libpspp/compiler.h>
35 #include <libpspp/message.h>
36 #include <libpspp/assertion.h>
37 #include <math/moments.h>
38 #include <output/manager.h>
39 #include <output/table.h>
44 #define _(msgid) gettext (msgid)
45 #define N_(msgid) msgid
47 /* DESCRIPTIVES private data. */
51 /* Handling of missing values. */
54 DSC_VARIABLE, /* Handle missing values on a per-variable basis. */
55 DSC_LISTWISE /* Discard entire case if any variable is missing. */
58 /* Describes properties of a distribution for the purpose of
59 calculating a Z-score. */
62 const struct variable *src_var; /* Variable on which z-score is based. */
63 struct variable *z_var; /* New z-score variable. */
64 double mean; /* Distribution mean. */
65 double std_dev; /* Distribution standard deviation. */
68 /* DESCRIPTIVES transformation (for calculating Z-scores). */
71 struct dsc_z_score *z_scores; /* Array of Z-scores. */
72 int z_score_cnt; /* Number of Z-scores. */
73 const struct variable **vars; /* Variables for listwise missing checks. */
74 size_t var_cnt; /* Number of variables. */
75 enum dsc_missing_type missing_type; /* Treatment of missing values. */
76 enum mv_class exclude; /* Classes of missing values to exclude. */
79 /* Statistics. Used as bit indexes, so must be 32 or fewer. */
82 DSC_MEAN = 0, DSC_SEMEAN, DSC_STDDEV, DSC_VARIANCE, DSC_KURTOSIS,
83 DSC_SEKURT, DSC_SKEWNESS, DSC_SESKEW, DSC_RANGE, DSC_MIN,
84 DSC_MAX, DSC_SUM, DSC_N_STATS,
86 /* Only valid as sort criteria. */
87 DSC_NAME = -2, /* Sort by name. */
88 DSC_NONE = -1 /* Unsorted. */
91 /* Describes one statistic. */
92 struct dsc_statistic_info
94 const char *identifier; /* Identifier. */
95 const char *name; /* Full name. */
96 enum moment moment; /* Highest moment needed to calculate. */
99 /* Table of statistics, indexed by DSC_*. */
100 static const struct dsc_statistic_info dsc_info[DSC_N_STATS] =
102 {"MEAN", N_("Mean"), MOMENT_MEAN},
103 {"SEMEAN", N_("S E Mean"), MOMENT_VARIANCE},
104 {"STDDEV", N_("Std Dev"), MOMENT_VARIANCE},
105 {"VARIANCE", N_("Variance"), MOMENT_VARIANCE},
106 {"KURTOSIS", N_("Kurtosis"), MOMENT_KURTOSIS},
107 {"SEKURTOSIS", N_("S E Kurt"), MOMENT_NONE},
108 {"SKEWNESS", N_("Skewness"), MOMENT_SKEWNESS},
109 {"SESKEWNESS", N_("S E Skew"), MOMENT_NONE},
110 {"RANGE", N_("Range"), MOMENT_NONE},
111 {"MINIMUM", N_("Minimum"), MOMENT_NONE},
112 {"MAXIMUM", N_("Maximum"), MOMENT_NONE},
113 {"SUM", N_("Sum"), MOMENT_MEAN},
116 /* Statistics calculated by default if none are explicitly
118 #define DEFAULT_STATS \
119 ((1ul << DSC_MEAN) | (1ul << DSC_STDDEV) | (1ul << DSC_MIN) \
122 /* A variable specified on DESCRIPTIVES. */
125 const struct variable *v; /* Variable to calculate on. */
126 char z_name[VAR_NAME_LEN + 1]; /* Name for z-score variable. */
127 double valid, missing; /* Valid, missing counts. */
128 struct moments *moments; /* Moments. */
129 double min, max; /* Maximum and mimimum values. */
130 double stats[DSC_N_STATS]; /* All the stats' values. */
136 DSC_LINE, /* Abbreviated format. */
137 DSC_SERIAL /* Long format. */
140 /* A DESCRIPTIVES procedure. */
143 /* Per-variable info. */
144 struct dsc_var *vars; /* Variables. */
145 size_t var_cnt; /* Number of variables. */
148 enum dsc_missing_type missing_type; /* Treatment of missing values. */
149 enum mv_class exclude; /* Classes of missing values to exclude. */
150 int show_var_labels; /* Nonzero to show variable labels. */
151 int show_index; /* Nonzero to show variable index. */
152 enum dsc_format format; /* Output format. */
154 /* Accumulated results. */
155 double missing_listwise; /* Sum of weights of cases missing listwise. */
156 double valid; /* Sum of weights of valid cases. */
157 bool bad_warn; /* Warn if bad weight found. */
158 enum dsc_statistic sort_by_stat; /* Statistic to sort by; -1: name. */
159 int sort_ascending; /* !0: ascending order; 0: descending. */
160 unsigned long show_stats; /* Statistics to display. */
161 unsigned long calc_stats; /* Statistics to calculate. */
162 enum moment max_moment; /* Highest moment needed for stats. */
166 static enum dsc_statistic match_statistic (struct lexer *);
167 static void free_dsc_proc (struct dsc_proc *);
169 /* Z-score functions. */
170 static bool try_name (const struct dictionary *dict,
171 struct dsc_proc *dsc, const char *name);
172 static bool generate_z_varname (const struct dictionary *dict,
173 struct dsc_proc *dsc, char *z_name,
174 const char *name, int *z_cnt);
175 static void dump_z_table (struct dsc_proc *);
176 static void setup_z_trns (struct dsc_proc *, struct dataset *);
178 /* Procedure execution functions. */
179 static void calc_descriptives (struct dsc_proc *, struct casereader *,
181 static void display (struct dsc_proc *dsc);
183 /* Parser and outline. */
185 /* Handles DESCRIPTIVES. */
187 cmd_descriptives (struct lexer *lexer, struct dataset *ds)
189 struct dictionary *dict = dataset_dict (ds);
190 struct dsc_proc *dsc;
191 const struct variable **vars = NULL;
193 int save_z_scores = 0;
198 struct casegrouper *grouper;
199 struct casereader *group;
201 /* Create and initialize dsc. */
202 dsc = xmalloc (sizeof *dsc);
205 dsc->missing_type = DSC_VARIABLE;
206 dsc->exclude = MV_ANY;
207 dsc->show_var_labels = 1;
209 dsc->format = DSC_LINE;
210 dsc->missing_listwise = 0.;
213 dsc->sort_by_stat = DSC_NONE;
214 dsc->sort_ascending = 1;
215 dsc->show_stats = dsc->calc_stats = DEFAULT_STATS;
217 /* Parse DESCRIPTIVES. */
218 while (lex_token (lexer) != '.')
220 if (lex_match_id (lexer, "MISSING"))
222 lex_match (lexer, '=');
223 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
225 if (lex_match_id (lexer, "VARIABLE"))
226 dsc->missing_type = DSC_VARIABLE;
227 else if (lex_match_id (lexer, "LISTWISE"))
228 dsc->missing_type = DSC_LISTWISE;
229 else if (lex_match_id (lexer, "INCLUDE"))
230 dsc->exclude = MV_SYSTEM;
233 lex_error (lexer, NULL);
236 lex_match (lexer, ',');
239 else if (lex_match_id (lexer, "SAVE"))
241 else if (lex_match_id (lexer, "FORMAT"))
243 lex_match (lexer, '=');
244 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
246 if (lex_match_id (lexer, "LABELS"))
247 dsc->show_var_labels = 1;
248 else if (lex_match_id (lexer, "NOLABELS"))
249 dsc->show_var_labels = 0;
250 else if (lex_match_id (lexer, "INDEX"))
252 else if (lex_match_id (lexer, "NOINDEX"))
254 else if (lex_match_id (lexer, "LINE"))
255 dsc->format = DSC_LINE;
256 else if (lex_match_id (lexer, "SERIAL"))
257 dsc->format = DSC_SERIAL;
260 lex_error (lexer, NULL);
263 lex_match (lexer, ',');
266 else if (lex_match_id (lexer, "STATISTICS"))
268 lex_match (lexer, '=');
270 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
272 if (lex_match (lexer, T_ALL))
273 dsc->show_stats |= (1ul << DSC_N_STATS) - 1;
274 else if (lex_match_id (lexer, "DEFAULT"))
275 dsc->show_stats |= DEFAULT_STATS;
277 dsc->show_stats |= 1ul << (match_statistic (lexer));
278 lex_match (lexer, ',');
280 if (dsc->show_stats == 0)
281 dsc->show_stats = DEFAULT_STATS;
283 else if (lex_match_id (lexer, "SORT"))
285 lex_match (lexer, '=');
286 if (lex_match_id (lexer, "NAME"))
287 dsc->sort_by_stat = DSC_NAME;
290 dsc->sort_by_stat = match_statistic (lexer);
291 if (dsc->sort_by_stat == DSC_NONE )
292 dsc->sort_by_stat = DSC_MEAN;
294 if (lex_match (lexer, '('))
296 if (lex_match_id (lexer, "A"))
297 dsc->sort_ascending = 1;
298 else if (lex_match_id (lexer, "D"))
299 dsc->sort_ascending = 0;
301 lex_error (lexer, NULL);
302 lex_force_match (lexer, ')');
305 else if (var_cnt == 0)
307 if (lex_look_ahead (lexer) == '=')
309 lex_match_id (lexer, "VARIABLES");
310 lex_match (lexer, '=');
313 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
317 if (!parse_variables_const (lexer, dict, &vars, &var_cnt,
318 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
321 dsc->vars = xnrealloc ((void *)dsc->vars, var_cnt, sizeof *dsc->vars);
322 for (i = dsc->var_cnt; i < var_cnt; i++)
324 struct dsc_var *dv = &dsc->vars[i];
326 dv->z_name[0] = '\0';
329 dsc->var_cnt = var_cnt;
331 if (lex_match (lexer, '('))
333 if (lex_token (lexer) != T_ID)
335 lex_error (lexer, NULL);
338 if (try_name (dict, dsc, lex_tokid (lexer)))
340 strcpy (dsc->vars[dsc->var_cnt - 1].z_name, lex_tokid (lexer));
344 msg (SE, _("Z-score variable name %s would be"
345 " a duplicate variable name."), lex_tokid (lexer));
347 if (!lex_force_match (lexer, ')'))
354 lex_error (lexer, NULL);
358 lex_match (lexer, '/');
362 msg (SE, _("No variables specified."));
366 /* Construct z-score varnames, show translation table. */
367 if (z_cnt || save_z_scores)
373 for (i = 0; i < dsc->var_cnt; i++)
374 if (dsc->vars[i].z_name[0] == 0)
376 if (!generate_z_varname (dict, dsc, dsc->vars[i].z_name,
377 var_get_name (dsc->vars[i].v),
386 /* Figure out statistics to display. */
387 if (dsc->show_stats & (1ul << DSC_SKEWNESS))
388 dsc->show_stats |= 1ul << DSC_SESKEW;
389 if (dsc->show_stats & (1ul << DSC_KURTOSIS))
390 dsc->show_stats |= 1ul << DSC_SEKURT;
392 /* Figure out which statistics to calculate. */
393 dsc->calc_stats = dsc->show_stats;
395 dsc->calc_stats |= (1ul << DSC_MEAN) | (1ul << DSC_STDDEV);
396 if (dsc->sort_by_stat >= 0)
397 dsc->calc_stats |= 1ul << dsc->sort_by_stat;
398 if (dsc->show_stats & (1ul << DSC_SESKEW))
399 dsc->calc_stats |= 1ul << DSC_SKEWNESS;
400 if (dsc->show_stats & (1ul << DSC_SEKURT))
401 dsc->calc_stats |= 1ul << DSC_KURTOSIS;
403 /* Figure out maximum moment needed and allocate moments for
405 dsc->max_moment = MOMENT_NONE;
406 for (i = 0; i < DSC_N_STATS; i++)
407 if (dsc->calc_stats & (1ul << i) && dsc_info[i].moment > dsc->max_moment)
408 dsc->max_moment = dsc_info[i].moment;
409 if (dsc->max_moment != MOMENT_NONE)
410 for (i = 0; i < dsc->var_cnt; i++)
411 dsc->vars[i].moments = moments_create (dsc->max_moment);
414 grouper = casegrouper_create_splits (proc_open (ds), dict);
415 while (casegrouper_get_next_group (grouper, &group))
416 calc_descriptives (dsc, group, ds);
417 ok = casegrouper_destroy (grouper);
418 ok = proc_commit (ds) && ok;
422 setup_z_trns (dsc, ds);
427 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
435 /* Returns the statistic named by the current token and skips past the token.
436 Returns DSC_NONE if no statistic is given (e.g., subcommand with no
437 specifiers). Emits an error if the current token ID does not name a
439 static enum dsc_statistic
440 match_statistic (struct lexer *lexer)
442 if (lex_token (lexer) == T_ID)
444 enum dsc_statistic stat;
446 for (stat = 0; stat < DSC_N_STATS; stat++)
447 if (lex_match_id (lexer, dsc_info[stat].identifier))
451 lex_error (lexer, _("expecting statistic name: reverting to default"));
459 free_dsc_proc (struct dsc_proc *dsc)
466 for (i = 0; i < dsc->var_cnt; i++)
467 moments_destroy (dsc->vars[i].moments);
474 /* Returns false if NAME is a duplicate of any existing variable name or
475 of any previously-declared z-var name; otherwise returns true. */
477 try_name (const struct dictionary *dict, struct dsc_proc *dsc,
482 if (dict_lookup_var (dict, name) != NULL)
484 for (i = 0; i < dsc->var_cnt; i++)
485 if (!strcasecmp (dsc->vars[i].z_name, name))
490 /* Generates a name for a Z-score variable based on a variable
491 named VAR_NAME, given that *Z_CNT generated variable names are
492 known to already exist. If successful, returns true and
493 copies the new name into Z_NAME. On failure, returns false. */
495 generate_z_varname (const struct dictionary *dict, struct dsc_proc *dsc, char *z_name,
496 const char *var_name, int *z_cnt)
498 char name[VAR_NAME_LEN + 1];
500 /* Try a name based on the original variable name. */
502 str_copy_trunc (name + 1, sizeof name - 1, var_name);
503 if (try_name (dict, dsc, name))
505 strcpy (z_name, name);
509 /* Generate a synthetic name. */
515 sprintf (name, "ZSC%03d", *z_cnt);
516 else if (*z_cnt <= 108)
517 sprintf (name, "STDZ%02d", *z_cnt - 99);
518 else if (*z_cnt <= 117)
519 sprintf (name, "ZZZZ%02d", *z_cnt - 108);
520 else if (*z_cnt <= 126)
521 sprintf (name, "ZQZQ%02d", *z_cnt - 117);
524 msg (SE, _("Ran out of generic names for Z-score variables. "
525 "There are only 126 generic names: ZSC001-ZSC0999, "
526 "STDZ01-STDZ09, ZZZZ01-ZZZZ09, ZQZQ01-ZQZQ09."));
530 if (try_name (dict, dsc, name))
532 strcpy (z_name, name);
539 /* Outputs a table describing the mapping between source
540 variables and Z-score variables. */
542 dump_z_table (struct dsc_proc *dsc)
550 for (i = 0; i < dsc->var_cnt; i++)
551 if (dsc->vars[i].z_name[0] != '\0')
555 t = tab_create (2, cnt + 1, 0);
556 tab_title (t, _("Mapping of variables to corresponding Z-scores."));
557 tab_columns (t, SOM_COL_DOWN, 1);
558 tab_headers (t, 0, 0, 1, 0);
559 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, cnt);
560 tab_hline (t, TAL_2, 0, 1, 1);
561 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Source"));
562 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Target"));
563 tab_dim (t, tab_natural_dimensions, NULL);
568 for (i = 0, y = 1; i < dsc->var_cnt; i++)
569 if (dsc->vars[i].z_name[0] != '\0')
571 tab_text (t, 0, y, TAB_LEFT, var_get_name (dsc->vars[i].v));
572 tab_text (t, 1, y++, TAB_LEFT, dsc->vars[i].z_name);
579 /* Transformation function to calculate Z-scores. Will return SYSMIS if any of
580 the following are true: 1) mean or standard deviation is SYSMIS 2) score is
581 SYSMIS 3) score is user missing and they were not included in the original
582 analyis. 4) any of the variables in the original analysis were missing
583 (either system or user-missing values that weren't included).
586 descriptives_trns_proc (void *trns_, struct ccase **c,
587 casenumber case_idx UNUSED)
589 struct dsc_trns *t = trns_;
590 struct dsc_z_score *z;
591 const struct variable **vars;
594 if (t->missing_type == DSC_LISTWISE)
597 for (vars = t->vars; vars < t->vars + t->var_cnt; vars++)
599 double score = case_num (*c, *vars);
600 if (var_is_num_missing (*vars, score, t->exclude))
608 *c = case_unshare (*c);
609 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
611 double input = case_num (*c, z->src_var);
612 double *output = &case_data_rw (*c, z->z_var)->f;
614 if (z->mean == SYSMIS || z->std_dev == SYSMIS || all_sysmis
615 || var_is_num_missing (z->src_var, input, t->exclude))
618 *output = (input - z->mean) / z->std_dev;
620 return TRNS_CONTINUE;
623 /* Frees a descriptives_trns struct. */
625 descriptives_trns_free (void *trns_)
627 struct dsc_trns *t = trns_;
630 assert((t->missing_type != DSC_LISTWISE) ^ (t->vars != NULL));
635 /* Sets up a transformation to calculate Z scores. */
637 setup_z_trns (struct dsc_proc *dsc, struct dataset *ds)
642 for (cnt = i = 0; i < dsc->var_cnt; i++)
643 if (dsc->vars[i].z_name[0] != '\0')
646 t = xmalloc (sizeof *t);
647 t->z_scores = xnmalloc (cnt, sizeof *t->z_scores);
648 t->z_score_cnt = cnt;
649 t->missing_type = dsc->missing_type;
650 t->exclude = dsc->exclude;
651 if ( t->missing_type == DSC_LISTWISE )
653 t->var_cnt = dsc->var_cnt;
654 t->vars = xnmalloc (t->var_cnt, sizeof *t->vars);
655 for (i = 0; i < t->var_cnt; i++)
656 t->vars[i] = dsc->vars[i].v;
664 for (cnt = i = 0; i < dsc->var_cnt; i++)
666 struct dsc_var *dv = &dsc->vars[i];
667 if (dv->z_name[0] != '\0')
669 struct dsc_z_score *z;
670 struct variable *dst_var;
672 dst_var = dict_create_var_assert (dataset_dict (ds), dv->z_name, 0);
673 var_set_label (dst_var, xasprintf (_("Z-score of %s"),
674 var_to_string (dv->v)));
676 z = &t->z_scores[cnt++];
679 z->mean = dv->stats[DSC_MEAN];
680 z->std_dev = dv->stats[DSC_STDDEV];
684 add_transformation (ds,
685 descriptives_trns_proc, descriptives_trns_free, t);
688 /* Statistical calculation. */
690 static bool listwise_missing (struct dsc_proc *dsc, const struct ccase *c);
692 /* Calculates and displays descriptive statistics for the cases
695 calc_descriptives (struct dsc_proc *dsc, struct casereader *group,
698 struct casereader *pass1, *pass2;
702 c = casereader_peek (group, 0);
705 casereader_destroy (group);
708 output_split_file_values (ds, c);
711 group = casereader_create_filter_weight (group, dataset_dict (ds),
715 pass2 = dsc->max_moment <= MOMENT_MEAN ? NULL : casereader_clone (pass1);
717 for (i = 0; i < dsc->var_cnt; i++)
719 struct dsc_var *dv = &dsc->vars[i];
721 dv->valid = dv->missing = 0.0;
722 if (dv->moments != NULL)
723 moments_clear (dv->moments);
727 dsc->missing_listwise = 0.;
730 /* First pass to handle most of the work. */
731 for (; (c = casereader_read (pass1)) != NULL; case_unref (c))
733 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
735 /* Check for missing values. */
736 if (listwise_missing (dsc, c))
738 dsc->missing_listwise += weight;
739 if (dsc->missing_type == DSC_LISTWISE)
742 dsc->valid += weight;
744 for (i = 0; i < dsc->var_cnt; i++)
746 struct dsc_var *dv = &dsc->vars[i];
747 double x = case_num (c, dv->v);
749 if (var_is_num_missing (dv->v, x, dsc->exclude))
751 dv->missing += weight;
755 if (dv->moments != NULL)
756 moments_pass_one (dv->moments, x, weight);
764 if (!casereader_destroy (pass1))
766 casereader_destroy (pass2);
770 /* Second pass for higher-order moments. */
771 if (dsc->max_moment > MOMENT_MEAN)
773 for (; (c = casereader_read (pass2)) != NULL; case_unref (c))
775 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
777 /* Check for missing values. */
778 if (dsc->missing_type == DSC_LISTWISE && listwise_missing (dsc, c))
781 for (i = 0; i < dsc->var_cnt; i++)
783 struct dsc_var *dv = &dsc->vars[i];
784 double x = case_num (c, dv->v);
786 if (var_is_num_missing (dv->v, x, dsc->exclude))
789 if (dv->moments != NULL)
790 moments_pass_two (dv->moments, x, weight);
793 if (!casereader_destroy (pass2))
797 /* Calculate results. */
798 for (i = 0; i < dsc->var_cnt; i++)
800 struct dsc_var *dv = &dsc->vars[i];
804 for (j = 0; j < DSC_N_STATS; j++)
805 dv->stats[j] = SYSMIS;
807 dv->valid = W = dsc->valid - dv->missing;
809 if (dv->moments != NULL)
810 moments_calculate (dv->moments, NULL,
811 &dv->stats[DSC_MEAN], &dv->stats[DSC_VARIANCE],
812 &dv->stats[DSC_SKEWNESS], &dv->stats[DSC_KURTOSIS]);
813 if (dsc->calc_stats & (1ul << DSC_SEMEAN)
814 && dv->stats[DSC_VARIANCE] != SYSMIS && W > 0.)
815 dv->stats[DSC_SEMEAN] = sqrt (dv->stats[DSC_VARIANCE]) / sqrt (W);
816 if (dsc->calc_stats & (1ul << DSC_STDDEV)
817 && dv->stats[DSC_VARIANCE] != SYSMIS)
818 dv->stats[DSC_STDDEV] = sqrt (dv->stats[DSC_VARIANCE]);
819 if (dsc->calc_stats & (1ul << DSC_SEKURT))
820 if (dv->stats[DSC_KURTOSIS] != SYSMIS)
821 dv->stats[DSC_SEKURT] = calc_sekurt (W);
822 if (dsc->calc_stats & (1ul << DSC_SESKEW)
823 && dv->stats[DSC_SKEWNESS] != SYSMIS)
824 dv->stats[DSC_SESKEW] = calc_seskew (W);
825 dv->stats[DSC_RANGE] = ((dv->min == DBL_MAX || dv->max == -DBL_MAX)
826 ? SYSMIS : dv->max - dv->min);
827 dv->stats[DSC_MIN] = dv->min == DBL_MAX ? SYSMIS : dv->min;
828 dv->stats[DSC_MAX] = dv->max == -DBL_MAX ? SYSMIS : dv->max;
829 if (dsc->calc_stats & (1ul << DSC_SUM))
830 dv->stats[DSC_SUM] = W * dv->stats[DSC_MEAN];
833 /* Output results. */
837 /* Returns true if any of the descriptives variables in DSC's
838 variable list have missing values in case C, false otherwise. */
840 listwise_missing (struct dsc_proc *dsc, const struct ccase *c)
844 for (i = 0; i < dsc->var_cnt; i++)
846 struct dsc_var *dv = &dsc->vars[i];
847 double x = case_num (c, dv->v);
849 if (var_is_num_missing (dv->v, x, dsc->exclude))
855 /* Statistical display. */
857 static algo_compare_func descriptives_compare_dsc_vars;
859 /* Displays a table of descriptive statistics for DSC. */
861 display (struct dsc_proc *dsc)
867 nc = 1 + (dsc->format == DSC_SERIAL ? 2 : 1);
868 for (i = 0; i < DSC_N_STATS; i++)
869 if (dsc->show_stats & (1ul << i))
872 if (dsc->sort_by_stat != DSC_NONE)
873 sort (dsc->vars, dsc->var_cnt, sizeof *dsc->vars,
874 descriptives_compare_dsc_vars, dsc);
876 t = tab_create (nc, dsc->var_cnt + 1, 0);
877 tab_headers (t, 1, 0, 1, 0);
878 tab_box (t, TAL_1, TAL_1, -1, -1, 0, 0, nc - 1, dsc->var_cnt);
879 tab_box (t, -1, -1, -1, TAL_1, 1, 0, nc - 1, dsc->var_cnt);
880 tab_hline (t, TAL_2, 0, nc - 1, 1);
881 tab_vline (t, TAL_2, 1, 0, dsc->var_cnt);
882 tab_dim (t, tab_natural_dimensions, NULL);
885 tab_text (t, nc++, 0, TAB_LEFT | TAT_TITLE, _("Variable"));
886 if (dsc->format == DSC_SERIAL)
888 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Valid N"));
889 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Missing N"));
892 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, "N");
894 for (i = 0; i < DSC_N_STATS; i++)
895 if (dsc->show_stats & (1ul << i))
897 const char *title = gettext (dsc_info[i].name);
898 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, title);
901 for (i = 0; i < dsc->var_cnt; i++)
903 struct dsc_var *dv = &dsc->vars[i];
907 tab_text (t, nc++, i + 1, TAB_LEFT, var_get_name (dv->v));
908 tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", dv->valid);
909 if (dsc->format == DSC_SERIAL)
910 tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", dv->missing);
912 for (j = 0; j < DSC_N_STATS; j++)
913 if (dsc->show_stats & (1ul << j))
914 tab_double (t, nc++, i + 1, TAB_NONE, dv->stats[j], NULL);
917 tab_title (t, _("Valid cases = %g; cases with missing value(s) = %g."),
918 dsc->valid, dsc->missing_listwise);
923 /* Compares `struct dsc_var's A and B according to the ordering
926 descriptives_compare_dsc_vars (const void *a_, const void *b_, const void *dsc_)
928 const struct dsc_var *a = a_;
929 const struct dsc_var *b = b_;
930 const struct dsc_proc *dsc = dsc_;
934 if (dsc->sort_by_stat == DSC_NAME)
935 result = strcasecmp (var_get_name (a->v), var_get_name (b->v));
938 double as = a->stats[dsc->sort_by_stat];
939 double bs = b->stats[dsc->sort_by_stat];
941 result = as < bs ? -1 : as > bs;
944 if (!dsc->sort_ascending)