1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 /* FIXME: Many possible optimizations. */
28 #include <data/case.h>
29 #include <data/casefile.h>
30 #include <data/dictionary.h>
31 #include <data/procedure.h>
32 #include <data/transformations.h>
33 #include <data/variable.h>
34 #include <language/command.h>
35 #include <language/dictionary/split-file.h>
36 #include <language/lexer/lexer.h>
37 #include <language/lexer/variable-parser.h>
38 #include <libpspp/alloc.h>
39 #include <libpspp/array.h>
40 #include <libpspp/compiler.h>
41 #include <libpspp/magic.h>
42 #include <libpspp/message.h>
43 #include <libpspp/assertion.h>
44 #include <math/moments.h>
45 #include <output/manager.h>
46 #include <output/table.h>
49 #define _(msgid) gettext (msgid)
50 #define N_(msgid) msgid
52 /* DESCRIPTIVES private data. */
56 /* Handling of missing values. */
59 DSC_VARIABLE, /* Handle missing values on a per-variable basis. */
60 DSC_LISTWISE /* Discard entire case if any variable is missing. */
63 /* Describes properties of a distribution for the purpose of
64 calculating a Z-score. */
67 int src_idx; /* Source index into case data. */
68 int dst_idx; /* Destination index into case data. */
69 double mean; /* Distribution mean. */
70 double std_dev; /* Distribution standard deviation. */
71 struct variable *v; /* Variable on which z-score is based. */
74 /* DESCRIPTIVES transformation (for calculating Z-scores). */
77 struct dsc_z_score *z_scores; /* Array of Z-scores. */
78 int z_score_cnt; /* Number of Z-scores. */
79 struct variable **vars; /* Variables for listwise missing checks. */
80 size_t var_cnt; /* Number of variables. */
81 enum dsc_missing_type missing_type; /* Treatment of missing values. */
82 int include_user_missing; /* Nonzero to include user-missing values. */
85 /* Statistics. Used as bit indexes, so must be 32 or fewer. */
88 DSC_MEAN = 0, DSC_SEMEAN, DSC_STDDEV, DSC_VARIANCE, DSC_KURTOSIS,
89 DSC_SEKURT, DSC_SKEWNESS, DSC_SESKEW, DSC_RANGE, DSC_MIN,
90 DSC_MAX, DSC_SUM, DSC_N_STATS,
92 /* Only valid as sort criteria. */
93 DSC_NAME = -2, /* Sort by name. */
94 DSC_NONE = -1 /* Unsorted. */
97 /* Describes one statistic. */
98 struct dsc_statistic_info
100 const char *identifier; /* Identifier. */
101 const char *name; /* Full name. */
102 enum moment moment; /* Highest moment needed to calculate. */
105 /* Table of statistics, indexed by DSC_*. */
106 static const struct dsc_statistic_info dsc_info[DSC_N_STATS] =
108 {"MEAN", N_("Mean"), MOMENT_MEAN},
109 {"SEMEAN", N_("S E Mean"), MOMENT_VARIANCE},
110 {"STDDEV", N_("Std Dev"), MOMENT_VARIANCE},
111 {"VARIANCE", N_("Variance"), MOMENT_VARIANCE},
112 {"KURTOSIS", N_("Kurtosis"), MOMENT_KURTOSIS},
113 {"SEKURTOSIS", N_("S E Kurt"), MOMENT_NONE},
114 {"SKEWNESS", N_("Skewness"), MOMENT_SKEWNESS},
115 {"SESKEWNESS", N_("S E Skew"), MOMENT_NONE},
116 {"RANGE", N_("Range"), MOMENT_NONE},
117 {"MINIMUM", N_("Minimum"), MOMENT_NONE},
118 {"MAXIMUM", N_("Maximum"), MOMENT_NONE},
119 {"SUM", N_("Sum"), MOMENT_MEAN},
122 /* Statistics calculated by default if none are explicitly
124 #define DEFAULT_STATS \
125 ((1ul << DSC_MEAN) | (1ul << DSC_STDDEV) | (1ul << DSC_MIN) \
128 /* A variable specified on DESCRIPTIVES. */
131 struct variable *v; /* Variable to calculate on. */
132 char z_name[LONG_NAME_LEN + 1]; /* Name for z-score variable. */
133 double valid, missing; /* Valid, missing counts. */
134 struct moments *moments; /* Moments. */
135 double min, max; /* Maximum and mimimum values. */
136 double stats[DSC_N_STATS]; /* All the stats' values. */
142 DSC_LINE, /* Abbreviated format. */
143 DSC_SERIAL /* Long format. */
146 /* A DESCRIPTIVES procedure. */
149 /* Per-variable info. */
150 struct dsc_var *vars; /* Variables. */
151 size_t var_cnt; /* Number of variables. */
154 enum dsc_missing_type missing_type; /* Treatment of missing values. */
155 int include_user_missing; /* Nonzero to include user-missing values. */
156 int show_var_labels; /* Nonzero to show variable labels. */
157 int show_index; /* Nonzero to show variable index. */
158 enum dsc_format format; /* Output format. */
160 /* Accumulated results. */
161 double missing_listwise; /* Sum of weights of cases missing listwise. */
162 double valid; /* Sum of weights of valid cases. */
163 bool bad_warn; /* Warn if bad weight found. */
164 enum dsc_statistic sort_by_stat; /* Statistic to sort by; -1: name. */
165 int sort_ascending; /* !0: ascending order; 0: descending. */
166 unsigned long show_stats; /* Statistics to display. */
167 unsigned long calc_stats; /* Statistics to calculate. */
168 enum moment max_moment; /* Highest moment needed for stats. */
172 static enum dsc_statistic match_statistic (struct lexer *);
173 static void free_dsc_proc (struct dsc_proc *);
175 /* Z-score functions. */
176 static bool try_name (const struct dictionary *dict,
177 struct dsc_proc *dsc, const char *name);
178 static bool generate_z_varname (const struct dictionary *dict,
179 struct dsc_proc *dsc, char *z_name,
180 const char *name, size_t *z_cnt);
181 static void dump_z_table (struct dsc_proc *);
182 static void setup_z_trns (struct dsc_proc *, struct dataset *);
184 /* Procedure execution functions. */
185 static bool calc_descriptives (const struct ccase *first,
186 const struct casefile *, void *dsc_,
187 const struct dataset *);
188 static void display (struct dsc_proc *dsc);
190 /* Parser and outline. */
192 /* Handles DESCRIPTIVES. */
194 cmd_descriptives (struct lexer *lexer, struct dataset *ds)
196 struct dictionary *dict = dataset_dict (ds);
197 struct dsc_proc *dsc;
198 struct variable **vars = NULL;
200 int save_z_scores = 0;
205 /* Create and initialize dsc. */
206 dsc = xmalloc (sizeof *dsc);
209 dsc->missing_type = DSC_VARIABLE;
210 dsc->include_user_missing = 0;
211 dsc->show_var_labels = 1;
213 dsc->format = DSC_LINE;
214 dsc->missing_listwise = 0.;
217 dsc->sort_by_stat = DSC_NONE;
218 dsc->sort_ascending = 1;
219 dsc->show_stats = dsc->calc_stats = DEFAULT_STATS;
221 /* Parse DESCRIPTIVES. */
222 while (lex_token (lexer) != '.')
224 if (lex_match_id (lexer, "MISSING"))
226 lex_match (lexer, '=');
227 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
229 if (lex_match_id (lexer, "VARIABLE"))
230 dsc->missing_type = DSC_VARIABLE;
231 else if (lex_match_id (lexer, "LISTWISE"))
232 dsc->missing_type = DSC_LISTWISE;
233 else if (lex_match_id (lexer, "INCLUDE"))
234 dsc->include_user_missing = 1;
237 lex_error (lexer, NULL);
240 lex_match (lexer, ',');
243 else if (lex_match_id (lexer, "SAVE"))
245 else if (lex_match_id (lexer, "FORMAT"))
247 lex_match (lexer, '=');
248 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
250 if (lex_match_id (lexer, "LABELS"))
251 dsc->show_var_labels = 1;
252 else if (lex_match_id (lexer, "NOLABELS"))
253 dsc->show_var_labels = 0;
254 else if (lex_match_id (lexer, "INDEX"))
256 else if (lex_match_id (lexer, "NOINDEX"))
258 else if (lex_match_id (lexer, "LINE"))
259 dsc->format = DSC_LINE;
260 else if (lex_match_id (lexer, "SERIAL"))
261 dsc->format = DSC_SERIAL;
264 lex_error (lexer, NULL);
267 lex_match (lexer, ',');
270 else if (lex_match_id (lexer, "STATISTICS"))
272 lex_match (lexer, '=');
274 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
276 if (lex_match (lexer, T_ALL))
277 dsc->show_stats |= (1ul << DSC_N_STATS) - 1;
278 else if (lex_match_id (lexer, "DEFAULT"))
279 dsc->show_stats |= DEFAULT_STATS;
281 dsc->show_stats |= 1ul << (match_statistic (lexer));
282 lex_match (lexer, ',');
284 if (dsc->show_stats == 0)
285 dsc->show_stats = DEFAULT_STATS;
287 else if (lex_match_id (lexer, "SORT"))
289 lex_match (lexer, '=');
290 if (lex_match_id (lexer, "NAME"))
291 dsc->sort_by_stat = DSC_NAME;
294 dsc->sort_by_stat = match_statistic (lexer);
295 if (dsc->sort_by_stat == DSC_NONE )
296 dsc->sort_by_stat = DSC_MEAN;
298 if (lex_match (lexer, '('))
300 if (lex_match_id (lexer, "A"))
301 dsc->sort_ascending = 1;
302 else if (lex_match_id (lexer, "D"))
303 dsc->sort_ascending = 0;
305 lex_error (lexer, NULL);
306 lex_force_match (lexer, ')');
309 else if (var_cnt == 0)
311 if (lex_look_ahead (lexer) == '=')
313 lex_match_id (lexer, "VARIABLES");
314 lex_match (lexer, '=');
317 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
321 if (!parse_variables (lexer, dataset_dict (ds), &vars, &var_cnt,
322 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
325 dsc->vars = xnrealloc (dsc->vars, var_cnt, sizeof *dsc->vars);
326 for (i = dsc->var_cnt; i < var_cnt; i++)
328 struct dsc_var *dv = &dsc->vars[i];
330 dv->z_name[0] = '\0';
333 dsc->var_cnt = var_cnt;
335 if (lex_match (lexer, '('))
337 if (lex_token (lexer) != T_ID)
339 lex_error (lexer, NULL);
342 if (try_name (dict, dsc, lex_tokid (lexer)))
344 strcpy (dsc->vars[dsc->var_cnt - 1].z_name, lex_tokid (lexer));
348 msg (SE, _("Z-score variable name %s would be"
349 " a duplicate variable name."), lex_tokid (lexer));
351 if (!lex_force_match (lexer, ')'))
358 lex_error (lexer, NULL);
362 lex_match (lexer, '/');
366 msg (SE, _("No variables specified."));
370 /* Construct z-score varnames, show translation table. */
371 if (z_cnt || save_z_scores)
377 for (i = 0; i < dsc->var_cnt; i++)
378 if (dsc->vars[i].z_name[0] == 0)
380 if (!generate_z_varname (dict, dsc, dsc->vars[i].z_name,
381 var_get_name (dsc->vars[i].v),
390 /* Figure out statistics to display. */
391 if (dsc->show_stats & (1ul << DSC_SKEWNESS))
392 dsc->show_stats |= 1ul << DSC_SESKEW;
393 if (dsc->show_stats & (1ul << DSC_KURTOSIS))
394 dsc->show_stats |= 1ul << DSC_SEKURT;
396 /* Figure out which statistics to calculate. */
397 dsc->calc_stats = dsc->show_stats;
399 dsc->calc_stats |= (1ul << DSC_MEAN) | (1ul << DSC_STDDEV);
400 if (dsc->sort_by_stat >= 0)
401 dsc->calc_stats |= 1ul << dsc->sort_by_stat;
402 if (dsc->show_stats & (1ul << DSC_SESKEW))
403 dsc->calc_stats |= 1ul << DSC_SKEWNESS;
404 if (dsc->show_stats & (1ul << DSC_SEKURT))
405 dsc->calc_stats |= 1ul << DSC_KURTOSIS;
407 /* Figure out maximum moment needed and allocate moments for
409 dsc->max_moment = MOMENT_NONE;
410 for (i = 0; i < DSC_N_STATS; i++)
411 if (dsc->calc_stats & (1ul << i) && dsc_info[i].moment > dsc->max_moment)
412 dsc->max_moment = dsc_info[i].moment;
413 if (dsc->max_moment != MOMENT_NONE)
414 for (i = 0; i < dsc->var_cnt; i++)
415 dsc->vars[i].moments = moments_create (dsc->max_moment);
418 ok = multipass_procedure_with_splits (ds, calc_descriptives, dsc);
422 setup_z_trns (dsc, ds);
427 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
435 /* Returns the statistic named by the current token and skips past the token.
436 Returns DSC_NONE if no statistic is given (e.g., subcommand with no
437 specifiers). Emits an error if the current token ID does not name a
439 static enum dsc_statistic
440 match_statistic (struct lexer *lexer)
442 if (lex_token (lexer) == T_ID)
444 enum dsc_statistic stat;
446 for (stat = 0; stat < DSC_N_STATS; stat++)
447 if (lex_match_id (lexer, dsc_info[stat].identifier))
451 lex_error (lexer, _("expecting statistic name: reverting to default"));
459 free_dsc_proc (struct dsc_proc *dsc)
466 for (i = 0; i < dsc->var_cnt; i++)
467 moments_destroy (dsc->vars[i].moments);
474 /* Returns false if NAME is a duplicate of any existing variable name or
475 of any previously-declared z-var name; otherwise returns true. */
477 try_name (const struct dictionary *dict, struct dsc_proc *dsc,
482 if (dict_lookup_var (dict, name) != NULL)
484 for (i = 0; i < dsc->var_cnt; i++)
485 if (!strcasecmp (dsc->vars[i].z_name, name))
490 /* Generates a name for a Z-score variable based on a variable
491 named VAR_NAME, given that *Z_CNT generated variable names are
492 known to already exist. If successful, returns true and
493 copies the new name into Z_NAME. On failure, returns false. */
495 generate_z_varname (const struct dictionary *dict, struct dsc_proc *dsc, char *z_name,
496 const char *var_name, size_t *z_cnt)
498 char name[LONG_NAME_LEN + 1];
500 /* Try a name based on the original variable name. */
502 str_copy_trunc (name + 1, sizeof name - 1, var_name);
503 if (try_name (dict, dsc, name))
505 strcpy (z_name, name);
509 /* Generate a synthetic name. */
515 sprintf (name, "ZSC%03d", *z_cnt);
516 else if (*z_cnt <= 108)
517 sprintf (name, "STDZ%02d", *z_cnt - 99);
518 else if (*z_cnt <= 117)
519 sprintf (name, "ZZZZ%02d", *z_cnt - 108);
520 else if (*z_cnt <= 126)
521 sprintf (name, "ZQZQ%02d", *z_cnt - 117);
524 msg (SE, _("Ran out of generic names for Z-score variables. "
525 "There are only 126 generic names: ZSC001-ZSC0999, "
526 "STDZ01-STDZ09, ZZZZ01-ZZZZ09, ZQZQ01-ZQZQ09."));
530 if (try_name (dict, dsc, name))
532 strcpy (z_name, name);
539 /* Outputs a table describing the mapping between source
540 variables and Z-score variables. */
542 dump_z_table (struct dsc_proc *dsc)
550 for (i = 0; i < dsc->var_cnt; i++)
551 if (dsc->vars[i].z_name[0] != '\0')
555 t = tab_create (2, cnt + 1, 0);
556 tab_title (t, _("Mapping of variables to corresponding Z-scores."));
557 tab_columns (t, SOM_COL_DOWN, 1);
558 tab_headers (t, 0, 0, 1, 0);
559 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, cnt);
560 tab_hline (t, TAL_2, 0, 1, 1);
561 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Source"));
562 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Target"));
563 tab_dim (t, tab_natural_dimensions);
568 for (i = 0, y = 1; i < dsc->var_cnt; i++)
569 if (dsc->vars[i].z_name[0] != '\0')
571 tab_text (t, 0, y, TAB_LEFT, var_get_name (dsc->vars[i].v));
572 tab_text (t, 1, y++, TAB_LEFT, dsc->vars[i].z_name);
579 /* Transformation function to calculate Z-scores. Will return SYSMIS if any of
580 the following are true: 1) mean or standard deviation is SYSMIS 2) score is
581 SYSMIS 3) score is user missing and they were not included in the original
582 analyis. 4) any of the variables in the original analysis were missing
583 (either system or user-missing values that weren't included).
586 descriptives_trns_proc (void *trns_, struct ccase * c,
587 casenumber case_idx UNUSED)
589 struct dsc_trns *t = trns_;
590 struct dsc_z_score *z;
591 struct variable **vars;
594 if (t->missing_type == DSC_LISTWISE)
597 for (vars = t->vars; vars < t->vars + t->var_cnt; vars++)
599 double score = case_num (c, (*vars)->fv);
601 || (!t->include_user_missing
602 && var_is_num_user_missing (*vars, score)))
610 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
612 double input = case_num (c, z->src_idx);
613 double *output = &case_data_rw (c, z->dst_idx)->f;
615 if (z->mean == SYSMIS || z->std_dev == SYSMIS
616 || all_sysmis || input == SYSMIS
617 || (!t->include_user_missing
618 && var_is_num_user_missing (z->v, input)))
621 *output = (input - z->mean) / z->std_dev;
623 return TRNS_CONTINUE;
626 /* Frees a descriptives_trns struct. */
628 descriptives_trns_free (void *trns_)
630 struct dsc_trns *t = trns_;
633 assert((t->missing_type != DSC_LISTWISE) ^ (t->vars != NULL));
638 /* Sets up a transformation to calculate Z scores. */
640 setup_z_trns (struct dsc_proc *dsc, struct dataset *ds)
645 for (cnt = i = 0; i < dsc->var_cnt; i++)
646 if (dsc->vars[i].z_name[0] != '\0')
649 t = xmalloc (sizeof *t);
650 t->z_scores = xnmalloc (cnt, sizeof *t->z_scores);
651 t->z_score_cnt = cnt;
652 t->missing_type = dsc->missing_type;
653 t->include_user_missing = dsc->include_user_missing;
654 if ( t->missing_type == DSC_LISTWISE )
656 t->var_cnt = dsc->var_cnt;
657 t->vars = xnmalloc (t->var_cnt, sizeof *t->vars);
658 for (i = 0; i < t->var_cnt; i++)
659 t->vars[i] = dsc->vars[i].v;
667 for (cnt = i = 0; i < dsc->var_cnt; i++)
669 struct dsc_var *dv = &dsc->vars[i];
670 if (dv->z_name[0] != '\0')
672 struct dsc_z_score *z;
673 struct variable *dst_var;
675 dst_var = dict_create_var_assert (dataset_dict (ds), dv->z_name, 0);
676 var_set_label (dst_var, xasprintf (_("Z-score of %s"),
677 var_to_string (dv->v)));
679 z = &t->z_scores[cnt++];
680 z->src_idx = dv->v->fv;
681 z->dst_idx = dst_var->fv;
682 z->mean = dv->stats[DSC_MEAN];
683 z->std_dev = dv->stats[DSC_STDDEV];
688 add_transformation (ds,
689 descriptives_trns_proc, descriptives_trns_free, t);
692 /* Statistical calculation. */
694 static bool listwise_missing (struct dsc_proc *dsc, const struct ccase *c);
696 /* Calculates and displays descriptive statistics for the cases
699 calc_descriptives (const struct ccase *first,
700 const struct casefile *cf, void *dsc_,
701 const struct dataset *ds)
703 struct dsc_proc *dsc = dsc_;
704 struct casereader *reader;
708 output_split_file_values (ds, first);
710 for (i = 0; i < dsc->var_cnt; i++)
712 struct dsc_var *dv = &dsc->vars[i];
714 dv->valid = dv->missing = 0.0;
715 if (dv->moments != NULL)
716 moments_clear (dv->moments);
720 dsc->missing_listwise = 0.;
723 /* First pass to handle most of the work. */
724 for (reader = casefile_get_reader (cf, NULL);
725 casereader_read (reader, &c);
728 double weight = dict_get_case_weight (dataset_dict (ds), &c, &dsc->bad_warn);
732 /* Check for missing values. */
733 if (listwise_missing (dsc, &c))
735 dsc->missing_listwise += weight;
736 if (dsc->missing_type == DSC_LISTWISE)
739 dsc->valid += weight;
741 for (i = 0; i < dsc->var_cnt; i++)
743 struct dsc_var *dv = &dsc->vars[i];
744 double x = case_num (&c, dv->v->fv);
746 if (dsc->missing_type != DSC_LISTWISE
748 || (!dsc->include_user_missing
749 && var_is_num_user_missing (dv->v, x))))
751 dv->missing += weight;
755 if (dv->moments != NULL)
756 moments_pass_one (dv->moments, x, weight);
764 casereader_destroy (reader);
766 /* Second pass for higher-order moments. */
767 if (dsc->max_moment > MOMENT_MEAN)
769 for (reader = casefile_get_reader (cf, NULL);
770 casereader_read (reader, &c);
773 double weight = dict_get_case_weight (dataset_dict (ds), &c,
778 /* Check for missing values. */
779 if (listwise_missing (dsc, &c)
780 && dsc->missing_type == DSC_LISTWISE)
783 for (i = 0; i < dsc->var_cnt; i++)
785 struct dsc_var *dv = &dsc->vars[i];
786 double x = case_num (&c, dv->v->fv);
788 if (dsc->missing_type != DSC_LISTWISE
790 || (!dsc->include_user_missing
791 && var_is_num_user_missing (dv->v, x))))
794 if (dv->moments != NULL)
795 moments_pass_two (dv->moments, x, weight);
798 casereader_destroy (reader);
801 /* Calculate results. */
802 for (i = 0; i < dsc->var_cnt; i++)
804 struct dsc_var *dv = &dsc->vars[i];
808 for (j = 0; j < DSC_N_STATS; j++)
809 dv->stats[j] = SYSMIS;
811 dv->valid = W = dsc->valid - dv->missing;
813 if (dv->moments != NULL)
814 moments_calculate (dv->moments, NULL,
815 &dv->stats[DSC_MEAN], &dv->stats[DSC_VARIANCE],
816 &dv->stats[DSC_SKEWNESS], &dv->stats[DSC_KURTOSIS]);
817 if (dsc->calc_stats & (1ul << DSC_SEMEAN)
818 && dv->stats[DSC_VARIANCE] != SYSMIS && W > 0.)
819 dv->stats[DSC_SEMEAN] = sqrt (dv->stats[DSC_VARIANCE]) / sqrt (W);
820 if (dsc->calc_stats & (1ul << DSC_STDDEV)
821 && dv->stats[DSC_VARIANCE] != SYSMIS)
822 dv->stats[DSC_STDDEV] = sqrt (dv->stats[DSC_VARIANCE]);
823 if (dsc->calc_stats & (1ul << DSC_SEKURT))
824 if (dv->stats[DSC_KURTOSIS] != SYSMIS)
825 dv->stats[DSC_SEKURT] = calc_sekurt (W);
826 if (dsc->calc_stats & (1ul << DSC_SESKEW)
827 && dv->stats[DSC_SKEWNESS] != SYSMIS)
828 dv->stats[DSC_SESKEW] = calc_seskew (W);
829 dv->stats[DSC_RANGE] = ((dv->min == DBL_MAX || dv->max == -DBL_MAX)
830 ? SYSMIS : dv->max - dv->min);
831 dv->stats[DSC_MIN] = dv->min == DBL_MAX ? SYSMIS : dv->min;
832 dv->stats[DSC_MAX] = dv->max == -DBL_MAX ? SYSMIS : dv->max;
833 if (dsc->calc_stats & (1ul << DSC_SUM))
834 dv->stats[DSC_SUM] = W * dv->stats[DSC_MEAN];
837 /* Output results. */
843 /* Returns true if any of the descriptives variables in DSC's
844 variable list have missing values in case C, false otherwise. */
846 listwise_missing (struct dsc_proc *dsc, const struct ccase *c)
850 for (i = 0; i < dsc->var_cnt; i++)
852 struct dsc_var *dv = &dsc->vars[i];
853 double x = case_num (c, dv->v->fv);
856 || (!dsc->include_user_missing
857 && var_is_num_user_missing (dv->v, x)))
863 /* Statistical display. */
865 static algo_compare_func descriptives_compare_dsc_vars;
867 /* Displays a table of descriptive statistics for DSC. */
869 display (struct dsc_proc *dsc)
875 nc = 1 + (dsc->format == DSC_SERIAL ? 2 : 1);
876 for (i = 0; i < DSC_N_STATS; i++)
877 if (dsc->show_stats & (1ul << i))
880 if (dsc->sort_by_stat != DSC_NONE)
881 sort (dsc->vars, dsc->var_cnt, sizeof *dsc->vars,
882 descriptives_compare_dsc_vars, dsc);
884 t = tab_create (nc, dsc->var_cnt + 1, 0);
885 tab_headers (t, 1, 0, 1, 0);
886 tab_box (t, TAL_1, TAL_1, -1, -1, 0, 0, nc - 1, dsc->var_cnt);
887 tab_box (t, -1, -1, -1, TAL_1, 1, 0, nc - 1, dsc->var_cnt);
888 tab_hline (t, TAL_2, 0, nc - 1, 1);
889 tab_vline (t, TAL_2, 1, 0, dsc->var_cnt);
890 tab_dim (t, tab_natural_dimensions);
893 tab_text (t, nc++, 0, TAB_LEFT | TAT_TITLE, _("Variable"));
894 if (dsc->format == DSC_SERIAL)
896 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Valid N"));
897 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Missing N"));
900 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, "N");
902 for (i = 0; i < DSC_N_STATS; i++)
903 if (dsc->show_stats & (1ul << i))
905 const char *title = gettext (dsc_info[i].name);
906 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, title);
909 for (i = 0; i < dsc->var_cnt; i++)
911 struct dsc_var *dv = &dsc->vars[i];
915 tab_text (t, nc++, i + 1, TAB_LEFT, var_get_name (dv->v));
916 tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", dv->valid);
917 if (dsc->format == DSC_SERIAL)
918 tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", dv->missing);
919 for (j = 0; j < DSC_N_STATS; j++)
920 if (dsc->show_stats & (1ul << j))
921 tab_float (t, nc++, i + 1, TAB_NONE, dv->stats[j], 10, 3);
924 tab_title (t, _("Valid cases = %g; cases with missing value(s) = %g."),
925 dsc->valid, dsc->missing_listwise);
930 /* Compares `struct dsc_var's A and B according to the ordering
933 descriptives_compare_dsc_vars (const void *a_, const void *b_, const void *dsc_)
935 const struct dsc_var *a = a_;
936 const struct dsc_var *b = b_;
937 const struct dsc_proc *dsc = dsc_;
941 if (dsc->sort_by_stat == DSC_NAME)
942 result = strcasecmp (var_get_name (a->v), var_get_name (b->v));
945 double as = a->stats[dsc->sort_by_stat];
946 double bs = b->stats[dsc->sort_by_stat];
948 result = as < bs ? -1 : as > bs;
951 if (!dsc->sort_ascending)