1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 This program is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25 #include <data/casegrouper.h>
26 #include <data/casereader.h>
27 #include <data/dictionary.h>
28 #include <data/procedure.h>
29 #include <data/transformations.h>
30 #include <data/variable.h>
31 #include <language/command.h>
32 #include <language/dictionary/split-file.h>
33 #include <language/lexer/lexer.h>
34 #include <language/lexer/variable-parser.h>
35 #include <libpspp/alloc.h>
36 #include <libpspp/array.h>
37 #include <libpspp/compiler.h>
38 #include <libpspp/magic.h>
39 #include <libpspp/message.h>
40 #include <libpspp/assertion.h>
41 #include <math/moments.h>
42 #include <output/manager.h>
43 #include <output/table.h>
46 #define _(msgid) gettext (msgid)
47 #define N_(msgid) msgid
49 /* DESCRIPTIVES private data. */
53 /* Handling of missing values. */
56 DSC_VARIABLE, /* Handle missing values on a per-variable basis. */
57 DSC_LISTWISE /* Discard entire case if any variable is missing. */
60 /* Describes properties of a distribution for the purpose of
61 calculating a Z-score. */
64 const struct variable *src_var; /* Variable on which z-score is based. */
65 struct variable *z_var; /* New z-score variable. */
66 double mean; /* Distribution mean. */
67 double std_dev; /* Distribution standard deviation. */
70 /* DESCRIPTIVES transformation (for calculating Z-scores). */
73 struct dsc_z_score *z_scores; /* Array of Z-scores. */
74 int z_score_cnt; /* Number of Z-scores. */
75 const struct variable **vars; /* Variables for listwise missing checks. */
76 size_t var_cnt; /* Number of variables. */
77 enum dsc_missing_type missing_type; /* Treatment of missing values. */
78 enum mv_class exclude; /* Classes of missing values to exclude. */
81 /* Statistics. Used as bit indexes, so must be 32 or fewer. */
84 DSC_MEAN = 0, DSC_SEMEAN, DSC_STDDEV, DSC_VARIANCE, DSC_KURTOSIS,
85 DSC_SEKURT, DSC_SKEWNESS, DSC_SESKEW, DSC_RANGE, DSC_MIN,
86 DSC_MAX, DSC_SUM, DSC_N_STATS,
88 /* Only valid as sort criteria. */
89 DSC_NAME = -2, /* Sort by name. */
90 DSC_NONE = -1 /* Unsorted. */
93 /* Describes one statistic. */
94 struct dsc_statistic_info
96 const char *identifier; /* Identifier. */
97 const char *name; /* Full name. */
98 enum moment moment; /* Highest moment needed to calculate. */
101 /* Table of statistics, indexed by DSC_*. */
102 static const struct dsc_statistic_info dsc_info[DSC_N_STATS] =
104 {"MEAN", N_("Mean"), MOMENT_MEAN},
105 {"SEMEAN", N_("S E Mean"), MOMENT_VARIANCE},
106 {"STDDEV", N_("Std Dev"), MOMENT_VARIANCE},
107 {"VARIANCE", N_("Variance"), MOMENT_VARIANCE},
108 {"KURTOSIS", N_("Kurtosis"), MOMENT_KURTOSIS},
109 {"SEKURTOSIS", N_("S E Kurt"), MOMENT_NONE},
110 {"SKEWNESS", N_("Skewness"), MOMENT_SKEWNESS},
111 {"SESKEWNESS", N_("S E Skew"), MOMENT_NONE},
112 {"RANGE", N_("Range"), MOMENT_NONE},
113 {"MINIMUM", N_("Minimum"), MOMENT_NONE},
114 {"MAXIMUM", N_("Maximum"), MOMENT_NONE},
115 {"SUM", N_("Sum"), MOMENT_MEAN},
118 /* Statistics calculated by default if none are explicitly
120 #define DEFAULT_STATS \
121 ((1ul << DSC_MEAN) | (1ul << DSC_STDDEV) | (1ul << DSC_MIN) \
124 /* A variable specified on DESCRIPTIVES. */
127 const struct variable *v; /* Variable to calculate on. */
128 char z_name[LONG_NAME_LEN + 1]; /* Name for z-score variable. */
129 double valid, missing; /* Valid, missing counts. */
130 struct moments *moments; /* Moments. */
131 double min, max; /* Maximum and mimimum values. */
132 double stats[DSC_N_STATS]; /* All the stats' values. */
138 DSC_LINE, /* Abbreviated format. */
139 DSC_SERIAL /* Long format. */
142 /* A DESCRIPTIVES procedure. */
145 /* Per-variable info. */
146 struct dsc_var *vars; /* Variables. */
147 size_t var_cnt; /* Number of variables. */
150 enum dsc_missing_type missing_type; /* Treatment of missing values. */
151 enum mv_class exclude; /* Classes of missing values to exclude. */
152 int show_var_labels; /* Nonzero to show variable labels. */
153 int show_index; /* Nonzero to show variable index. */
154 enum dsc_format format; /* Output format. */
156 /* Accumulated results. */
157 double missing_listwise; /* Sum of weights of cases missing listwise. */
158 double valid; /* Sum of weights of valid cases. */
159 bool bad_warn; /* Warn if bad weight found. */
160 enum dsc_statistic sort_by_stat; /* Statistic to sort by; -1: name. */
161 int sort_ascending; /* !0: ascending order; 0: descending. */
162 unsigned long show_stats; /* Statistics to display. */
163 unsigned long calc_stats; /* Statistics to calculate. */
164 enum moment max_moment; /* Highest moment needed for stats. */
168 static enum dsc_statistic match_statistic (struct lexer *);
169 static void free_dsc_proc (struct dsc_proc *);
171 /* Z-score functions. */
172 static bool try_name (const struct dictionary *dict,
173 struct dsc_proc *dsc, const char *name);
174 static bool generate_z_varname (const struct dictionary *dict,
175 struct dsc_proc *dsc, char *z_name,
176 const char *name, int *z_cnt);
177 static void dump_z_table (struct dsc_proc *);
178 static void setup_z_trns (struct dsc_proc *, struct dataset *);
180 /* Procedure execution functions. */
181 static void calc_descriptives (struct dsc_proc *, struct casereader *,
183 static void display (struct dsc_proc *dsc);
185 /* Parser and outline. */
187 /* Handles DESCRIPTIVES. */
189 cmd_descriptives (struct lexer *lexer, struct dataset *ds)
191 struct dictionary *dict = dataset_dict (ds);
192 struct dsc_proc *dsc;
193 const struct variable **vars = NULL;
195 int save_z_scores = 0;
200 struct casegrouper *grouper;
201 struct casereader *group;
203 /* Create and initialize dsc. */
204 dsc = xmalloc (sizeof *dsc);
207 dsc->missing_type = DSC_VARIABLE;
208 dsc->exclude = MV_ANY;
209 dsc->show_var_labels = 1;
211 dsc->format = DSC_LINE;
212 dsc->missing_listwise = 0.;
215 dsc->sort_by_stat = DSC_NONE;
216 dsc->sort_ascending = 1;
217 dsc->show_stats = dsc->calc_stats = DEFAULT_STATS;
219 /* Parse DESCRIPTIVES. */
220 while (lex_token (lexer) != '.')
222 if (lex_match_id (lexer, "MISSING"))
224 lex_match (lexer, '=');
225 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
227 if (lex_match_id (lexer, "VARIABLE"))
228 dsc->missing_type = DSC_VARIABLE;
229 else if (lex_match_id (lexer, "LISTWISE"))
230 dsc->missing_type = DSC_LISTWISE;
231 else if (lex_match_id (lexer, "INCLUDE"))
232 dsc->exclude = MV_SYSTEM;
235 lex_error (lexer, NULL);
238 lex_match (lexer, ',');
241 else if (lex_match_id (lexer, "SAVE"))
243 else if (lex_match_id (lexer, "FORMAT"))
245 lex_match (lexer, '=');
246 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
248 if (lex_match_id (lexer, "LABELS"))
249 dsc->show_var_labels = 1;
250 else if (lex_match_id (lexer, "NOLABELS"))
251 dsc->show_var_labels = 0;
252 else if (lex_match_id (lexer, "INDEX"))
254 else if (lex_match_id (lexer, "NOINDEX"))
256 else if (lex_match_id (lexer, "LINE"))
257 dsc->format = DSC_LINE;
258 else if (lex_match_id (lexer, "SERIAL"))
259 dsc->format = DSC_SERIAL;
262 lex_error (lexer, NULL);
265 lex_match (lexer, ',');
268 else if (lex_match_id (lexer, "STATISTICS"))
270 lex_match (lexer, '=');
272 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
274 if (lex_match (lexer, T_ALL))
275 dsc->show_stats |= (1ul << DSC_N_STATS) - 1;
276 else if (lex_match_id (lexer, "DEFAULT"))
277 dsc->show_stats |= DEFAULT_STATS;
279 dsc->show_stats |= 1ul << (match_statistic (lexer));
280 lex_match (lexer, ',');
282 if (dsc->show_stats == 0)
283 dsc->show_stats = DEFAULT_STATS;
285 else if (lex_match_id (lexer, "SORT"))
287 lex_match (lexer, '=');
288 if (lex_match_id (lexer, "NAME"))
289 dsc->sort_by_stat = DSC_NAME;
292 dsc->sort_by_stat = match_statistic (lexer);
293 if (dsc->sort_by_stat == DSC_NONE )
294 dsc->sort_by_stat = DSC_MEAN;
296 if (lex_match (lexer, '('))
298 if (lex_match_id (lexer, "A"))
299 dsc->sort_ascending = 1;
300 else if (lex_match_id (lexer, "D"))
301 dsc->sort_ascending = 0;
303 lex_error (lexer, NULL);
304 lex_force_match (lexer, ')');
307 else if (var_cnt == 0)
309 if (lex_look_ahead (lexer) == '=')
311 lex_match_id (lexer, "VARIABLES");
312 lex_match (lexer, '=');
315 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
319 if (!parse_variables_const (lexer, dict, &vars, &var_cnt,
320 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
323 dsc->vars = xnrealloc ((void *)dsc->vars, var_cnt, sizeof *dsc->vars);
324 for (i = dsc->var_cnt; i < var_cnt; i++)
326 struct dsc_var *dv = &dsc->vars[i];
328 dv->z_name[0] = '\0';
331 dsc->var_cnt = var_cnt;
333 if (lex_match (lexer, '('))
335 if (lex_token (lexer) != T_ID)
337 lex_error (lexer, NULL);
340 if (try_name (dict, dsc, lex_tokid (lexer)))
342 strcpy (dsc->vars[dsc->var_cnt - 1].z_name, lex_tokid (lexer));
346 msg (SE, _("Z-score variable name %s would be"
347 " a duplicate variable name."), lex_tokid (lexer));
349 if (!lex_force_match (lexer, ')'))
356 lex_error (lexer, NULL);
360 lex_match (lexer, '/');
364 msg (SE, _("No variables specified."));
368 /* Construct z-score varnames, show translation table. */
369 if (z_cnt || save_z_scores)
375 for (i = 0; i < dsc->var_cnt; i++)
376 if (dsc->vars[i].z_name[0] == 0)
378 if (!generate_z_varname (dict, dsc, dsc->vars[i].z_name,
379 var_get_name (dsc->vars[i].v),
388 /* Figure out statistics to display. */
389 if (dsc->show_stats & (1ul << DSC_SKEWNESS))
390 dsc->show_stats |= 1ul << DSC_SESKEW;
391 if (dsc->show_stats & (1ul << DSC_KURTOSIS))
392 dsc->show_stats |= 1ul << DSC_SEKURT;
394 /* Figure out which statistics to calculate. */
395 dsc->calc_stats = dsc->show_stats;
397 dsc->calc_stats |= (1ul << DSC_MEAN) | (1ul << DSC_STDDEV);
398 if (dsc->sort_by_stat >= 0)
399 dsc->calc_stats |= 1ul << dsc->sort_by_stat;
400 if (dsc->show_stats & (1ul << DSC_SESKEW))
401 dsc->calc_stats |= 1ul << DSC_SKEWNESS;
402 if (dsc->show_stats & (1ul << DSC_SEKURT))
403 dsc->calc_stats |= 1ul << DSC_KURTOSIS;
405 /* Figure out maximum moment needed and allocate moments for
407 dsc->max_moment = MOMENT_NONE;
408 for (i = 0; i < DSC_N_STATS; i++)
409 if (dsc->calc_stats & (1ul << i) && dsc_info[i].moment > dsc->max_moment)
410 dsc->max_moment = dsc_info[i].moment;
411 if (dsc->max_moment != MOMENT_NONE)
412 for (i = 0; i < dsc->var_cnt; i++)
413 dsc->vars[i].moments = moments_create (dsc->max_moment);
415 /* Data pass. FIXME: error handling. */
416 grouper = casegrouper_create_splits (proc_open (ds), dict);
417 while (casegrouper_get_next_group (grouper, &group))
418 calc_descriptives (dsc, group, ds);
419 ok = casegrouper_destroy (grouper);
420 ok = proc_commit (ds) && ok;
424 setup_z_trns (dsc, ds);
429 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
437 /* Returns the statistic named by the current token and skips past the token.
438 Returns DSC_NONE if no statistic is given (e.g., subcommand with no
439 specifiers). Emits an error if the current token ID does not name a
441 static enum dsc_statistic
442 match_statistic (struct lexer *lexer)
444 if (lex_token (lexer) == T_ID)
446 enum dsc_statistic stat;
448 for (stat = 0; stat < DSC_N_STATS; stat++)
449 if (lex_match_id (lexer, dsc_info[stat].identifier))
453 lex_error (lexer, _("expecting statistic name: reverting to default"));
461 free_dsc_proc (struct dsc_proc *dsc)
468 for (i = 0; i < dsc->var_cnt; i++)
469 moments_destroy (dsc->vars[i].moments);
476 /* Returns false if NAME is a duplicate of any existing variable name or
477 of any previously-declared z-var name; otherwise returns true. */
479 try_name (const struct dictionary *dict, struct dsc_proc *dsc,
484 if (dict_lookup_var (dict, name) != NULL)
486 for (i = 0; i < dsc->var_cnt; i++)
487 if (!strcasecmp (dsc->vars[i].z_name, name))
492 /* Generates a name for a Z-score variable based on a variable
493 named VAR_NAME, given that *Z_CNT generated variable names are
494 known to already exist. If successful, returns true and
495 copies the new name into Z_NAME. On failure, returns false. */
497 generate_z_varname (const struct dictionary *dict, struct dsc_proc *dsc, char *z_name,
498 const char *var_name, int *z_cnt)
500 char name[LONG_NAME_LEN + 1];
502 /* Try a name based on the original variable name. */
504 str_copy_trunc (name + 1, sizeof name - 1, var_name);
505 if (try_name (dict, dsc, name))
507 strcpy (z_name, name);
511 /* Generate a synthetic name. */
517 sprintf (name, "ZSC%03d", *z_cnt);
518 else if (*z_cnt <= 108)
519 sprintf (name, "STDZ%02d", *z_cnt - 99);
520 else if (*z_cnt <= 117)
521 sprintf (name, "ZZZZ%02d", *z_cnt - 108);
522 else if (*z_cnt <= 126)
523 sprintf (name, "ZQZQ%02d", *z_cnt - 117);
526 msg (SE, _("Ran out of generic names for Z-score variables. "
527 "There are only 126 generic names: ZSC001-ZSC0999, "
528 "STDZ01-STDZ09, ZZZZ01-ZZZZ09, ZQZQ01-ZQZQ09."));
532 if (try_name (dict, dsc, name))
534 strcpy (z_name, name);
541 /* Outputs a table describing the mapping between source
542 variables and Z-score variables. */
544 dump_z_table (struct dsc_proc *dsc)
552 for (i = 0; i < dsc->var_cnt; i++)
553 if (dsc->vars[i].z_name[0] != '\0')
557 t = tab_create (2, cnt + 1, 0);
558 tab_title (t, _("Mapping of variables to corresponding Z-scores."));
559 tab_columns (t, SOM_COL_DOWN, 1);
560 tab_headers (t, 0, 0, 1, 0);
561 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, cnt);
562 tab_hline (t, TAL_2, 0, 1, 1);
563 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Source"));
564 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Target"));
565 tab_dim (t, tab_natural_dimensions);
570 for (i = 0, y = 1; i < dsc->var_cnt; i++)
571 if (dsc->vars[i].z_name[0] != '\0')
573 tab_text (t, 0, y, TAB_LEFT, var_get_name (dsc->vars[i].v));
574 tab_text (t, 1, y++, TAB_LEFT, dsc->vars[i].z_name);
581 /* Transformation function to calculate Z-scores. Will return SYSMIS if any of
582 the following are true: 1) mean or standard deviation is SYSMIS 2) score is
583 SYSMIS 3) score is user missing and they were not included in the original
584 analyis. 4) any of the variables in the original analysis were missing
585 (either system or user-missing values that weren't included).
588 descriptives_trns_proc (void *trns_, struct ccase * c,
589 casenumber case_idx UNUSED)
591 struct dsc_trns *t = trns_;
592 struct dsc_z_score *z;
593 const struct variable **vars;
596 if (t->missing_type == DSC_LISTWISE)
599 for (vars = t->vars; vars < t->vars + t->var_cnt; vars++)
601 double score = case_num (c, *vars);
602 if (var_is_num_missing (*vars, score, t->exclude))
610 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
612 double input = case_num (c, z->src_var);
613 double *output = &case_data_rw (c, z->z_var)->f;
615 if (z->mean == SYSMIS || z->std_dev == SYSMIS || all_sysmis
616 || var_is_num_missing (z->src_var, input, t->exclude))
619 *output = (input - z->mean) / z->std_dev;
621 return TRNS_CONTINUE;
624 /* Frees a descriptives_trns struct. */
626 descriptives_trns_free (void *trns_)
628 struct dsc_trns *t = trns_;
631 assert((t->missing_type != DSC_LISTWISE) ^ (t->vars != NULL));
636 /* Sets up a transformation to calculate Z scores. */
638 setup_z_trns (struct dsc_proc *dsc, struct dataset *ds)
643 for (cnt = i = 0; i < dsc->var_cnt; i++)
644 if (dsc->vars[i].z_name[0] != '\0')
647 t = xmalloc (sizeof *t);
648 t->z_scores = xnmalloc (cnt, sizeof *t->z_scores);
649 t->z_score_cnt = cnt;
650 t->missing_type = dsc->missing_type;
651 t->exclude = dsc->exclude;
652 if ( t->missing_type == DSC_LISTWISE )
654 t->var_cnt = dsc->var_cnt;
655 t->vars = xnmalloc (t->var_cnt, sizeof *t->vars);
656 for (i = 0; i < t->var_cnt; i++)
657 t->vars[i] = dsc->vars[i].v;
665 for (cnt = i = 0; i < dsc->var_cnt; i++)
667 struct dsc_var *dv = &dsc->vars[i];
668 if (dv->z_name[0] != '\0')
670 struct dsc_z_score *z;
671 struct variable *dst_var;
673 dst_var = dict_create_var_assert (dataset_dict (ds), dv->z_name, 0);
674 var_set_label (dst_var, xasprintf (_("Z-score of %s"),
675 var_to_string (dv->v)));
677 z = &t->z_scores[cnt++];
680 z->mean = dv->stats[DSC_MEAN];
681 z->std_dev = dv->stats[DSC_STDDEV];
685 add_transformation (ds,
686 descriptives_trns_proc, descriptives_trns_free, t);
689 /* Statistical calculation. */
691 static bool listwise_missing (struct dsc_proc *dsc, const struct ccase *c);
693 /* Calculates and displays descriptive statistics for the cases
696 calc_descriptives (struct dsc_proc *dsc, struct casereader *group,
699 struct casereader *pass1, *pass2;
703 if (!casereader_peek (group, 0, &c))
705 output_split_file_values (ds, &c);
708 group = casereader_create_filter_weight (group, dataset_dict (ds),
711 casereader_split (group, &pass1, &pass2);
712 if (dsc->max_moment <= MOMENT_MEAN)
713 casereader_destroy (pass2);
715 for (i = 0; i < dsc->var_cnt; i++)
717 struct dsc_var *dv = &dsc->vars[i];
719 dv->valid = dv->missing = 0.0;
720 if (dv->moments != NULL)
721 moments_clear (dv->moments);
725 dsc->missing_listwise = 0.;
728 /* First pass to handle most of the work. */
729 for (; casereader_read (pass1, &c); case_destroy (&c))
731 double weight = dict_get_case_weight (dataset_dict (ds), &c, NULL);
733 /* Check for missing values. */
734 if (listwise_missing (dsc, &c))
736 dsc->missing_listwise += weight;
737 if (dsc->missing_type == DSC_LISTWISE)
740 dsc->valid += weight;
742 for (i = 0; i < dsc->var_cnt; i++)
744 struct dsc_var *dv = &dsc->vars[i];
745 double x = case_num (&c, dv->v);
747 if (var_is_num_missing (dv->v, x, dsc->exclude))
749 dv->missing += weight;
753 if (dv->moments != NULL)
754 moments_pass_one (dv->moments, x, weight);
762 if (!casereader_destroy (pass1))
765 /* Second pass for higher-order moments. */
766 if (dsc->max_moment > MOMENT_MEAN)
768 for (; casereader_read (pass2, &c); case_destroy (&c))
770 double weight = dict_get_case_weight (dataset_dict (ds), &c, NULL);
772 /* Check for missing values. */
773 if (dsc->missing_type == DSC_LISTWISE && listwise_missing (dsc, &c))
776 for (i = 0; i < dsc->var_cnt; i++)
778 struct dsc_var *dv = &dsc->vars[i];
779 double x = case_num (&c, dv->v);
781 if (var_is_num_missing (dv->v, x, dsc->exclude))
784 if (dv->moments != NULL)
785 moments_pass_two (dv->moments, x, weight);
788 if (!casereader_destroy (pass2))
792 /* Calculate results. */
793 for (i = 0; i < dsc->var_cnt; i++)
795 struct dsc_var *dv = &dsc->vars[i];
799 for (j = 0; j < DSC_N_STATS; j++)
800 dv->stats[j] = SYSMIS;
802 dv->valid = W = dsc->valid - dv->missing;
804 if (dv->moments != NULL)
805 moments_calculate (dv->moments, NULL,
806 &dv->stats[DSC_MEAN], &dv->stats[DSC_VARIANCE],
807 &dv->stats[DSC_SKEWNESS], &dv->stats[DSC_KURTOSIS]);
808 if (dsc->calc_stats & (1ul << DSC_SEMEAN)
809 && dv->stats[DSC_VARIANCE] != SYSMIS && W > 0.)
810 dv->stats[DSC_SEMEAN] = sqrt (dv->stats[DSC_VARIANCE]) / sqrt (W);
811 if (dsc->calc_stats & (1ul << DSC_STDDEV)
812 && dv->stats[DSC_VARIANCE] != SYSMIS)
813 dv->stats[DSC_STDDEV] = sqrt (dv->stats[DSC_VARIANCE]);
814 if (dsc->calc_stats & (1ul << DSC_SEKURT))
815 if (dv->stats[DSC_KURTOSIS] != SYSMIS)
816 dv->stats[DSC_SEKURT] = calc_sekurt (W);
817 if (dsc->calc_stats & (1ul << DSC_SESKEW)
818 && dv->stats[DSC_SKEWNESS] != SYSMIS)
819 dv->stats[DSC_SESKEW] = calc_seskew (W);
820 dv->stats[DSC_RANGE] = ((dv->min == DBL_MAX || dv->max == -DBL_MAX)
821 ? SYSMIS : dv->max - dv->min);
822 dv->stats[DSC_MIN] = dv->min == DBL_MAX ? SYSMIS : dv->min;
823 dv->stats[DSC_MAX] = dv->max == -DBL_MAX ? SYSMIS : dv->max;
824 if (dsc->calc_stats & (1ul << DSC_SUM))
825 dv->stats[DSC_SUM] = W * dv->stats[DSC_MEAN];
828 /* Output results. */
832 /* Returns true if any of the descriptives variables in DSC's
833 variable list have missing values in case C, false otherwise. */
835 listwise_missing (struct dsc_proc *dsc, const struct ccase *c)
839 for (i = 0; i < dsc->var_cnt; i++)
841 struct dsc_var *dv = &dsc->vars[i];
842 double x = case_num (c, dv->v);
844 if (var_is_num_missing (dv->v, x, dsc->exclude))
850 /* Statistical display. */
852 static algo_compare_func descriptives_compare_dsc_vars;
854 /* Displays a table of descriptive statistics for DSC. */
856 display (struct dsc_proc *dsc)
862 nc = 1 + (dsc->format == DSC_SERIAL ? 2 : 1);
863 for (i = 0; i < DSC_N_STATS; i++)
864 if (dsc->show_stats & (1ul << i))
867 if (dsc->sort_by_stat != DSC_NONE)
868 sort (dsc->vars, dsc->var_cnt, sizeof *dsc->vars,
869 descriptives_compare_dsc_vars, dsc);
871 t = tab_create (nc, dsc->var_cnt + 1, 0);
872 tab_headers (t, 1, 0, 1, 0);
873 tab_box (t, TAL_1, TAL_1, -1, -1, 0, 0, nc - 1, dsc->var_cnt);
874 tab_box (t, -1, -1, -1, TAL_1, 1, 0, nc - 1, dsc->var_cnt);
875 tab_hline (t, TAL_2, 0, nc - 1, 1);
876 tab_vline (t, TAL_2, 1, 0, dsc->var_cnt);
877 tab_dim (t, tab_natural_dimensions);
880 tab_text (t, nc++, 0, TAB_LEFT | TAT_TITLE, _("Variable"));
881 if (dsc->format == DSC_SERIAL)
883 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Valid N"));
884 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Missing N"));
887 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, "N");
889 for (i = 0; i < DSC_N_STATS; i++)
890 if (dsc->show_stats & (1ul << i))
892 const char *title = gettext (dsc_info[i].name);
893 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, title);
896 for (i = 0; i < dsc->var_cnt; i++)
898 struct dsc_var *dv = &dsc->vars[i];
902 tab_text (t, nc++, i + 1, TAB_LEFT, var_get_name (dv->v));
903 tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", dv->valid);
904 if (dsc->format == DSC_SERIAL)
905 tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", dv->missing);
906 for (j = 0; j < DSC_N_STATS; j++)
907 if (dsc->show_stats & (1ul << j))
908 tab_float (t, nc++, i + 1, TAB_NONE, dv->stats[j], 10, 3);
911 tab_title (t, _("Valid cases = %g; cases with missing value(s) = %g."),
912 dsc->valid, dsc->missing_listwise);
917 /* Compares `struct dsc_var's A and B according to the ordering
920 descriptives_compare_dsc_vars (const void *a_, const void *b_, const void *dsc_)
922 const struct dsc_var *a = a_;
923 const struct dsc_var *b = b_;
924 const struct dsc_proc *dsc = dsc_;
928 if (dsc->sort_by_stat == DSC_NAME)
929 result = strcasecmp (var_get_name (a->v), var_get_name (b->v));
932 double as = a->stats[dsc->sort_by_stat];
933 double bs = b->stats[dsc->sort_by_stat];
935 result = as < bs ? -1 : as > bs;
938 if (!dsc->sort_ascending)