1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 /* FIXME: Many possible optimizations. */
28 #include <data/case.h>
29 #include <data/casefile.h>
30 #include <data/dictionary.h>
31 #include <data/procedure.h>
32 #include <data/transformations.h>
33 #include <data/variable.h>
34 #include <language/command.h>
35 #include <language/dictionary/split-file.h>
36 #include <language/lexer/lexer.h>
37 #include <language/lexer/variable-parser.h>
38 #include <libpspp/alloc.h>
39 #include <libpspp/array.h>
40 #include <libpspp/compiler.h>
41 #include <libpspp/magic.h>
42 #include <libpspp/message.h>
43 #include <libpspp/assertion.h>
44 #include <math/moments.h>
45 #include <output/manager.h>
46 #include <output/table.h>
49 #define _(msgid) gettext (msgid)
50 #define N_(msgid) msgid
52 /* DESCRIPTIVES private data. */
56 /* Handling of missing values. */
59 DSC_VARIABLE, /* Handle missing values on a per-variable basis. */
60 DSC_LISTWISE /* Discard entire case if any variable is missing. */
63 /* Describes properties of a distribution for the purpose of
64 calculating a Z-score. */
67 int src_idx; /* Source index into case data. */
68 int dst_idx; /* Destination index into case data. */
69 double mean; /* Distribution mean. */
70 double std_dev; /* Distribution standard deviation. */
71 struct variable *v; /* Variable on which z-score is based. */
74 /* DESCRIPTIVES transformation (for calculating Z-scores). */
77 struct dsc_z_score *z_scores; /* Array of Z-scores. */
78 int z_score_cnt; /* Number of Z-scores. */
79 struct variable **vars; /* Variables for listwise missing checks. */
80 size_t var_cnt; /* Number of variables. */
81 enum dsc_missing_type missing_type; /* Treatment of missing values. */
82 int include_user_missing; /* Nonzero to include user-missing values. */
85 /* Statistics. Used as bit indexes, so must be 32 or fewer. */
88 DSC_MEAN = 0, DSC_SEMEAN, DSC_STDDEV, DSC_VARIANCE, DSC_KURTOSIS,
89 DSC_SEKURT, DSC_SKEWNESS, DSC_SESKEW, DSC_RANGE, DSC_MIN,
90 DSC_MAX, DSC_SUM, DSC_N_STATS,
92 /* Only valid as sort criteria. */
93 DSC_NAME = -2, /* Sort by name. */
94 DSC_NONE = -1 /* Unsorted. */
97 /* Describes one statistic. */
98 struct dsc_statistic_info
100 const char *identifier; /* Identifier. */
101 const char *name; /* Full name. */
102 enum moment moment; /* Highest moment needed to calculate. */
105 /* Table of statistics, indexed by DSC_*. */
106 static const struct dsc_statistic_info dsc_info[DSC_N_STATS] =
108 {"MEAN", N_("Mean"), MOMENT_MEAN},
109 {"SEMEAN", N_("S E Mean"), MOMENT_VARIANCE},
110 {"STDDEV", N_("Std Dev"), MOMENT_VARIANCE},
111 {"VARIANCE", N_("Variance"), MOMENT_VARIANCE},
112 {"KURTOSIS", N_("Kurtosis"), MOMENT_KURTOSIS},
113 {"SEKURTOSIS", N_("S E Kurt"), MOMENT_NONE},
114 {"SKEWNESS", N_("Skewness"), MOMENT_SKEWNESS},
115 {"SESKEWNESS", N_("S E Skew"), MOMENT_NONE},
116 {"RANGE", N_("Range"), MOMENT_NONE},
117 {"MINIMUM", N_("Minimum"), MOMENT_NONE},
118 {"MAXIMUM", N_("Maximum"), MOMENT_NONE},
119 {"SUM", N_("Sum"), MOMENT_MEAN},
122 /* Statistics calculated by default if none are explicitly
124 #define DEFAULT_STATS \
125 ((1ul << DSC_MEAN) | (1ul << DSC_STDDEV) | (1ul << DSC_MIN) \
128 /* A variable specified on DESCRIPTIVES. */
131 struct variable *v; /* Variable to calculate on. */
132 char z_name[LONG_NAME_LEN + 1]; /* Name for z-score variable. */
133 double valid, missing; /* Valid, missing counts. */
134 struct moments *moments; /* Moments. */
135 double min, max; /* Maximum and mimimum values. */
136 double stats[DSC_N_STATS]; /* All the stats' values. */
142 DSC_LINE, /* Abbreviated format. */
143 DSC_SERIAL /* Long format. */
146 /* A DESCRIPTIVES procedure. */
149 /* Per-variable info. */
150 struct dsc_var *vars; /* Variables. */
151 size_t var_cnt; /* Number of variables. */
154 enum dsc_missing_type missing_type; /* Treatment of missing values. */
155 int include_user_missing; /* Nonzero to include user-missing values. */
156 int show_var_labels; /* Nonzero to show variable labels. */
157 int show_index; /* Nonzero to show variable index. */
158 enum dsc_format format; /* Output format. */
160 /* Accumulated results. */
161 double missing_listwise; /* Sum of weights of cases missing listwise. */
162 double valid; /* Sum of weights of valid cases. */
163 bool bad_warn; /* Warn if bad weight found. */
164 enum dsc_statistic sort_by_stat; /* Statistic to sort by; -1: name. */
165 int sort_ascending; /* !0: ascending order; 0: descending. */
166 unsigned long show_stats; /* Statistics to display. */
167 unsigned long calc_stats; /* Statistics to calculate. */
168 enum moment max_moment; /* Highest moment needed for stats. */
172 static enum dsc_statistic match_statistic (struct lexer *);
173 static void free_dsc_proc (struct dsc_proc *);
175 /* Z-score functions. */
176 static bool try_name (const struct dictionary *dict,
177 struct dsc_proc *dsc, const char *name);
178 static bool generate_z_varname (const struct dictionary *dict,
179 struct dsc_proc *dsc, char *z_name,
180 const char *name, size_t *z_cnt);
181 static void dump_z_table (struct dsc_proc *);
182 static void setup_z_trns (struct dsc_proc *, struct dataset *);
184 /* Procedure execution functions. */
185 static bool calc_descriptives (const struct ccase *first,
186 const struct casefile *, void *dsc_,
187 const struct dataset *);
188 static void display (struct dsc_proc *dsc);
190 /* Parser and outline. */
192 /* Handles DESCRIPTIVES. */
194 cmd_descriptives (struct lexer *lexer, struct dataset *ds)
196 struct dictionary *dict = dataset_dict (ds);
197 struct dsc_proc *dsc;
198 struct variable **vars = NULL;
200 int save_z_scores = 0;
205 /* Create and initialize dsc. */
206 dsc = xmalloc (sizeof *dsc);
209 dsc->missing_type = DSC_VARIABLE;
210 dsc->include_user_missing = 0;
211 dsc->show_var_labels = 1;
213 dsc->format = DSC_LINE;
214 dsc->missing_listwise = 0.;
217 dsc->sort_by_stat = DSC_NONE;
218 dsc->sort_ascending = 1;
219 dsc->show_stats = dsc->calc_stats = DEFAULT_STATS;
221 /* Parse DESCRIPTIVES. */
222 while (lex_token (lexer) != '.')
224 if (lex_match_id (lexer, "MISSING"))
226 lex_match (lexer, '=');
227 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
229 if (lex_match_id (lexer, "VARIABLE"))
230 dsc->missing_type = DSC_VARIABLE;
231 else if (lex_match_id (lexer, "LISTWISE"))
232 dsc->missing_type = DSC_LISTWISE;
233 else if (lex_match_id (lexer, "INCLUDE"))
234 dsc->include_user_missing = 1;
237 lex_error (lexer, NULL);
240 lex_match (lexer, ',');
243 else if (lex_match_id (lexer, "SAVE"))
245 else if (lex_match_id (lexer, "FORMAT"))
247 lex_match (lexer, '=');
248 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
250 if (lex_match_id (lexer, "LABELS"))
251 dsc->show_var_labels = 1;
252 else if (lex_match_id (lexer, "NOLABELS"))
253 dsc->show_var_labels = 0;
254 else if (lex_match_id (lexer, "INDEX"))
256 else if (lex_match_id (lexer, "NOINDEX"))
258 else if (lex_match_id (lexer, "LINE"))
259 dsc->format = DSC_LINE;
260 else if (lex_match_id (lexer, "SERIAL"))
261 dsc->format = DSC_SERIAL;
264 lex_error (lexer, NULL);
267 lex_match (lexer, ',');
270 else if (lex_match_id (lexer, "STATISTICS"))
272 lex_match (lexer, '=');
274 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
276 if (lex_match (lexer, T_ALL))
277 dsc->show_stats |= (1ul << DSC_N_STATS) - 1;
278 else if (lex_match_id (lexer, "DEFAULT"))
279 dsc->show_stats |= DEFAULT_STATS;
281 dsc->show_stats |= 1ul << (match_statistic (lexer));
282 lex_match (lexer, ',');
284 if (dsc->show_stats == 0)
285 dsc->show_stats = DEFAULT_STATS;
287 else if (lex_match_id (lexer, "SORT"))
289 lex_match (lexer, '=');
290 if (lex_match_id (lexer, "NAME"))
291 dsc->sort_by_stat = DSC_NAME;
294 dsc->sort_by_stat = match_statistic (lexer);
295 if (dsc->sort_by_stat == DSC_NONE )
296 dsc->sort_by_stat = DSC_MEAN;
298 if (lex_match (lexer, '('))
300 if (lex_match_id (lexer, "A"))
301 dsc->sort_ascending = 1;
302 else if (lex_match_id (lexer, "D"))
303 dsc->sort_ascending = 0;
305 lex_error (lexer, NULL);
306 lex_force_match (lexer, ')');
309 else if (var_cnt == 0)
311 if (lex_look_ahead (lexer) == '=')
313 lex_match_id (lexer, "VARIABLES");
314 lex_match (lexer, '=');
317 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
321 if (!parse_variables (lexer, dataset_dict (ds), &vars, &var_cnt,
322 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
325 dsc->vars = xnrealloc (dsc->vars, var_cnt, sizeof *dsc->vars);
326 for (i = dsc->var_cnt; i < var_cnt; i++)
328 struct dsc_var *dv = &dsc->vars[i];
330 dv->z_name[0] = '\0';
333 dsc->var_cnt = var_cnt;
335 if (lex_match (lexer, '('))
337 if (lex_token (lexer) != T_ID)
339 lex_error (lexer, NULL);
342 if (try_name (dict, dsc, lex_tokid (lexer)))
344 strcpy (dsc->vars[dsc->var_cnt - 1].z_name, lex_tokid (lexer));
348 msg (SE, _("Z-score variable name %s would be"
349 " a duplicate variable name."), lex_tokid (lexer));
351 if (!lex_force_match (lexer, ')'))
358 lex_error (lexer, NULL);
362 lex_match (lexer, '/');
366 msg (SE, _("No variables specified."));
370 /* Construct z-score varnames, show translation table. */
371 if (z_cnt || save_z_scores)
377 for (i = 0; i < dsc->var_cnt; i++)
378 if (dsc->vars[i].z_name[0] == 0)
380 if (!generate_z_varname (dict, dsc, dsc->vars[i].z_name,
381 dsc->vars[i].v->name, &gen_cnt))
389 /* Figure out statistics to display. */
390 if (dsc->show_stats & (1ul << DSC_SKEWNESS))
391 dsc->show_stats |= 1ul << DSC_SESKEW;
392 if (dsc->show_stats & (1ul << DSC_KURTOSIS))
393 dsc->show_stats |= 1ul << DSC_SEKURT;
395 /* Figure out which statistics to calculate. */
396 dsc->calc_stats = dsc->show_stats;
398 dsc->calc_stats |= (1ul << DSC_MEAN) | (1ul << DSC_STDDEV);
399 if (dsc->sort_by_stat >= 0)
400 dsc->calc_stats |= 1ul << dsc->sort_by_stat;
401 if (dsc->show_stats & (1ul << DSC_SESKEW))
402 dsc->calc_stats |= 1ul << DSC_SKEWNESS;
403 if (dsc->show_stats & (1ul << DSC_SEKURT))
404 dsc->calc_stats |= 1ul << DSC_KURTOSIS;
406 /* Figure out maximum moment needed and allocate moments for
408 dsc->max_moment = MOMENT_NONE;
409 for (i = 0; i < DSC_N_STATS; i++)
410 if (dsc->calc_stats & (1ul << i) && dsc_info[i].moment > dsc->max_moment)
411 dsc->max_moment = dsc_info[i].moment;
412 if (dsc->max_moment != MOMENT_NONE)
413 for (i = 0; i < dsc->var_cnt; i++)
414 dsc->vars[i].moments = moments_create (dsc->max_moment);
417 ok = multipass_procedure_with_splits (ds, calc_descriptives, dsc);
421 setup_z_trns (dsc, ds);
426 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
434 /* Returns the statistic named by the current token and skips past the token.
435 Returns DSC_NONE if no statistic is given (e.g., subcommand with no
436 specifiers). Emits an error if the current token ID does not name a
438 static enum dsc_statistic
439 match_statistic (struct lexer *lexer)
441 if (lex_token (lexer) == T_ID)
443 enum dsc_statistic stat;
445 for (stat = 0; stat < DSC_N_STATS; stat++)
446 if (lex_match_id (lexer, dsc_info[stat].identifier))
450 lex_error (lexer, _("expecting statistic name: reverting to default"));
458 free_dsc_proc (struct dsc_proc *dsc)
465 for (i = 0; i < dsc->var_cnt; i++)
466 moments_destroy (dsc->vars[i].moments);
473 /* Returns false if NAME is a duplicate of any existing variable name or
474 of any previously-declared z-var name; otherwise returns true. */
476 try_name (const struct dictionary *dict, struct dsc_proc *dsc,
481 if (dict_lookup_var (dict, name) != NULL)
483 for (i = 0; i < dsc->var_cnt; i++)
484 if (!strcasecmp (dsc->vars[i].z_name, name))
489 /* Generates a name for a Z-score variable based on a variable
490 named VAR_NAME, given that *Z_CNT generated variable names are
491 known to already exist. If successful, returns true and
492 copies the new name into Z_NAME. On failure, returns false. */
494 generate_z_varname (const struct dictionary *dict, struct dsc_proc *dsc, char *z_name,
495 const char *var_name, size_t *z_cnt)
497 char name[LONG_NAME_LEN + 1];
499 /* Try a name based on the original variable name. */
501 str_copy_trunc (name + 1, sizeof name - 1, var_name);
502 if (try_name (dict, dsc, name))
504 strcpy (z_name, name);
508 /* Generate a synthetic name. */
514 sprintf (name, "ZSC%03d", *z_cnt);
515 else if (*z_cnt <= 108)
516 sprintf (name, "STDZ%02d", *z_cnt - 99);
517 else if (*z_cnt <= 117)
518 sprintf (name, "ZZZZ%02d", *z_cnt - 108);
519 else if (*z_cnt <= 126)
520 sprintf (name, "ZQZQ%02d", *z_cnt - 117);
523 msg (SE, _("Ran out of generic names for Z-score variables. "
524 "There are only 126 generic names: ZSC001-ZSC0999, "
525 "STDZ01-STDZ09, ZZZZ01-ZZZZ09, ZQZQ01-ZQZQ09."));
529 if (try_name (dict, dsc, name))
531 strcpy (z_name, name);
538 /* Outputs a table describing the mapping between source
539 variables and Z-score variables. */
541 dump_z_table (struct dsc_proc *dsc)
549 for (i = 0; i < dsc->var_cnt; i++)
550 if (dsc->vars[i].z_name[0] != '\0')
554 t = tab_create (2, cnt + 1, 0);
555 tab_title (t, _("Mapping of variables to corresponding Z-scores."));
556 tab_columns (t, SOM_COL_DOWN, 1);
557 tab_headers (t, 0, 0, 1, 0);
558 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, cnt);
559 tab_hline (t, TAL_2, 0, 1, 1);
560 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Source"));
561 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Target"));
562 tab_dim (t, tab_natural_dimensions);
567 for (i = 0, y = 1; i < dsc->var_cnt; i++)
568 if (dsc->vars[i].z_name[0] != '\0')
570 tab_text (t, 0, y, TAB_LEFT, dsc->vars[i].v->name);
571 tab_text (t, 1, y++, TAB_LEFT, dsc->vars[i].z_name);
578 /* Transformation function to calculate Z-scores. Will return SYSMIS if any of
579 the following are true: 1) mean or standard deviation is SYSMIS 2) score is
580 SYSMIS 3) score is user missing and they were not included in the original
581 analyis. 4) any of the variables in the original analysis were missing
582 (either system or user-missing values that weren't included).
585 descriptives_trns_proc (void *trns_, struct ccase * c,
586 casenumber case_idx UNUSED)
588 struct dsc_trns *t = trns_;
589 struct dsc_z_score *z;
590 struct variable **vars;
593 if (t->missing_type == DSC_LISTWISE)
596 for (vars = t->vars; vars < t->vars + t->var_cnt; vars++)
598 double score = case_num (c, (*vars)->fv);
600 || (!t->include_user_missing
601 && mv_is_num_user_missing (&(*vars)->miss, score)))
609 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
611 double input = case_num (c, z->src_idx);
612 double *output = &case_data_rw (c, z->dst_idx)->f;
614 if (z->mean == SYSMIS || z->std_dev == SYSMIS
615 || all_sysmis || input == SYSMIS
616 || (!t->include_user_missing
617 && mv_is_num_user_missing (&z->v->miss, input)))
620 *output = (input - z->mean) / z->std_dev;
622 return TRNS_CONTINUE;
625 /* Frees a descriptives_trns struct. */
627 descriptives_trns_free (void *trns_)
629 struct dsc_trns *t = trns_;
632 assert((t->missing_type != DSC_LISTWISE) ^ (t->vars != NULL));
637 /* Sets up a transformation to calculate Z scores. */
639 setup_z_trns (struct dsc_proc *dsc, struct dataset *ds)
644 for (cnt = i = 0; i < dsc->var_cnt; i++)
645 if (dsc->vars[i].z_name[0] != '\0')
648 t = xmalloc (sizeof *t);
649 t->z_scores = xnmalloc (cnt, sizeof *t->z_scores);
650 t->z_score_cnt = cnt;
651 t->missing_type = dsc->missing_type;
652 t->include_user_missing = dsc->include_user_missing;
653 if ( t->missing_type == DSC_LISTWISE )
655 t->var_cnt = dsc->var_cnt;
656 t->vars = xnmalloc (t->var_cnt, sizeof *t->vars);
657 for (i = 0; i < t->var_cnt; i++)
658 t->vars[i] = dsc->vars[i].v;
666 for (cnt = i = 0; i < dsc->var_cnt; i++)
668 struct dsc_var *dv = &dsc->vars[i];
669 if (dv->z_name[0] != '\0')
671 struct dsc_z_score *z;
673 struct variable *dst_var;
675 dst_var = dict_create_var_assert (dataset_dict (ds), dv->z_name, 0);
678 dst_var->label = xmalloc (strlen (dv->v->label) + 12);
679 cp = stpcpy (dst_var->label, _("Z-score of "));
680 strcpy (cp, dv->v->label);
684 dst_var->label = xmalloc (strlen (dv->v->name) + 12);
685 cp = stpcpy (dst_var->label, _("Z-score of "));
686 strcpy (cp, dv->v->name);
689 z = &t->z_scores[cnt++];
690 z->src_idx = dv->v->fv;
691 z->dst_idx = dst_var->fv;
692 z->mean = dv->stats[DSC_MEAN];
693 z->std_dev = dv->stats[DSC_STDDEV];
698 add_transformation (ds,
699 descriptives_trns_proc, descriptives_trns_free, t);
702 /* Statistical calculation. */
704 static bool listwise_missing (struct dsc_proc *dsc, const struct ccase *c);
706 /* Calculates and displays descriptive statistics for the cases
709 calc_descriptives (const struct ccase *first,
710 const struct casefile *cf, void *dsc_,
711 const struct dataset *ds)
713 struct dsc_proc *dsc = dsc_;
714 struct casereader *reader;
718 output_split_file_values (ds, first);
720 for (i = 0; i < dsc->var_cnt; i++)
722 struct dsc_var *dv = &dsc->vars[i];
724 dv->valid = dv->missing = 0.0;
725 if (dv->moments != NULL)
726 moments_clear (dv->moments);
730 dsc->missing_listwise = 0.;
733 /* First pass to handle most of the work. */
734 for (reader = casefile_get_reader (cf, NULL);
735 casereader_read (reader, &c);
738 double weight = dict_get_case_weight (dataset_dict (ds), &c, &dsc->bad_warn);
742 /* Check for missing values. */
743 if (listwise_missing (dsc, &c))
745 dsc->missing_listwise += weight;
746 if (dsc->missing_type == DSC_LISTWISE)
749 dsc->valid += weight;
751 for (i = 0; i < dsc->var_cnt; i++)
753 struct dsc_var *dv = &dsc->vars[i];
754 double x = case_num (&c, dv->v->fv);
756 if (dsc->missing_type != DSC_LISTWISE
758 || (!dsc->include_user_missing
759 && mv_is_num_user_missing (&dv->v->miss, x))))
761 dv->missing += weight;
765 if (dv->moments != NULL)
766 moments_pass_one (dv->moments, x, weight);
774 casereader_destroy (reader);
776 /* Second pass for higher-order moments. */
777 if (dsc->max_moment > MOMENT_MEAN)
779 for (reader = casefile_get_reader (cf, NULL);
780 casereader_read (reader, &c);
783 double weight = dict_get_case_weight (dataset_dict (ds), &c,
788 /* Check for missing values. */
789 if (listwise_missing (dsc, &c)
790 && dsc->missing_type == DSC_LISTWISE)
793 for (i = 0; i < dsc->var_cnt; i++)
795 struct dsc_var *dv = &dsc->vars[i];
796 double x = case_num (&c, dv->v->fv);
798 if (dsc->missing_type != DSC_LISTWISE
800 || (!dsc->include_user_missing
801 && mv_is_num_user_missing (&dv->v->miss, x))))
804 if (dv->moments != NULL)
805 moments_pass_two (dv->moments, x, weight);
808 casereader_destroy (reader);
811 /* Calculate results. */
812 for (i = 0; i < dsc->var_cnt; i++)
814 struct dsc_var *dv = &dsc->vars[i];
818 for (j = 0; j < DSC_N_STATS; j++)
819 dv->stats[j] = SYSMIS;
821 dv->valid = W = dsc->valid - dv->missing;
823 if (dv->moments != NULL)
824 moments_calculate (dv->moments, NULL,
825 &dv->stats[DSC_MEAN], &dv->stats[DSC_VARIANCE],
826 &dv->stats[DSC_SKEWNESS], &dv->stats[DSC_KURTOSIS]);
827 if (dsc->calc_stats & (1ul << DSC_SEMEAN)
828 && dv->stats[DSC_VARIANCE] != SYSMIS && W > 0.)
829 dv->stats[DSC_SEMEAN] = sqrt (dv->stats[DSC_VARIANCE]) / sqrt (W);
830 if (dsc->calc_stats & (1ul << DSC_STDDEV)
831 && dv->stats[DSC_VARIANCE] != SYSMIS)
832 dv->stats[DSC_STDDEV] = sqrt (dv->stats[DSC_VARIANCE]);
833 if (dsc->calc_stats & (1ul << DSC_SEKURT))
834 if (dv->stats[DSC_KURTOSIS] != SYSMIS)
835 dv->stats[DSC_SEKURT] = calc_sekurt (W);
836 if (dsc->calc_stats & (1ul << DSC_SESKEW)
837 && dv->stats[DSC_SKEWNESS] != SYSMIS)
838 dv->stats[DSC_SESKEW] = calc_seskew (W);
839 dv->stats[DSC_RANGE] = ((dv->min == DBL_MAX || dv->max == -DBL_MAX)
840 ? SYSMIS : dv->max - dv->min);
841 dv->stats[DSC_MIN] = dv->min == DBL_MAX ? SYSMIS : dv->min;
842 dv->stats[DSC_MAX] = dv->max == -DBL_MAX ? SYSMIS : dv->max;
843 if (dsc->calc_stats & (1ul << DSC_SUM))
844 dv->stats[DSC_SUM] = W * dv->stats[DSC_MEAN];
847 /* Output results. */
853 /* Returns true if any of the descriptives variables in DSC's
854 variable list have missing values in case C, false otherwise. */
856 listwise_missing (struct dsc_proc *dsc, const struct ccase *c)
860 for (i = 0; i < dsc->var_cnt; i++)
862 struct dsc_var *dv = &dsc->vars[i];
863 double x = case_num (c, dv->v->fv);
866 || (!dsc->include_user_missing
867 && mv_is_num_user_missing (&dv->v->miss, x)))
873 /* Statistical display. */
875 static algo_compare_func descriptives_compare_dsc_vars;
877 /* Displays a table of descriptive statistics for DSC. */
879 display (struct dsc_proc *dsc)
885 nc = 1 + (dsc->format == DSC_SERIAL ? 2 : 1);
886 for (i = 0; i < DSC_N_STATS; i++)
887 if (dsc->show_stats & (1ul << i))
890 if (dsc->sort_by_stat != DSC_NONE)
891 sort (dsc->vars, dsc->var_cnt, sizeof *dsc->vars,
892 descriptives_compare_dsc_vars, dsc);
894 t = tab_create (nc, dsc->var_cnt + 1, 0);
895 tab_headers (t, 1, 0, 1, 0);
896 tab_box (t, TAL_1, TAL_1, -1, -1, 0, 0, nc - 1, dsc->var_cnt);
897 tab_box (t, -1, -1, -1, TAL_1, 1, 0, nc - 1, dsc->var_cnt);
898 tab_hline (t, TAL_2, 0, nc - 1, 1);
899 tab_vline (t, TAL_2, 1, 0, dsc->var_cnt);
900 tab_dim (t, tab_natural_dimensions);
903 tab_text (t, nc++, 0, TAB_LEFT | TAT_TITLE, _("Variable"));
904 if (dsc->format == DSC_SERIAL)
906 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Valid N"));
907 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Missing N"));
910 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, "N");
912 for (i = 0; i < DSC_N_STATS; i++)
913 if (dsc->show_stats & (1ul << i))
915 const char *title = gettext (dsc_info[i].name);
916 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, title);
919 for (i = 0; i < dsc->var_cnt; i++)
921 struct dsc_var *dv = &dsc->vars[i];
925 tab_text (t, nc++, i + 1, TAB_LEFT, dv->v->name);
926 tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", dv->valid);
927 if (dsc->format == DSC_SERIAL)
928 tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", dv->missing);
929 for (j = 0; j < DSC_N_STATS; j++)
930 if (dsc->show_stats & (1ul << j))
931 tab_float (t, nc++, i + 1, TAB_NONE, dv->stats[j], 10, 3);
934 tab_title (t, _("Valid cases = %g; cases with missing value(s) = %g."),
935 dsc->valid, dsc->missing_listwise);
940 /* Compares `struct dsc_var's A and B according to the ordering
943 descriptives_compare_dsc_vars (const void *a_, const void *b_, const void *dsc_)
945 const struct dsc_var *a = a_;
946 const struct dsc_var *b = b_;
947 const struct dsc_proc *dsc = dsc_;
951 if (dsc->sort_by_stat == DSC_NAME)
952 result = strcasecmp (a->v->name, b->v->name);
955 double as = a->stats[dsc->sort_by_stat];
956 double bs = b->stats[dsc->sort_by_stat];
958 result = as < bs ? -1 : as > bs;
961 if (!dsc->sort_ascending)