1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2009, 2010, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
23 #include "data/casegrouper.h"
24 #include "data/casereader.h"
25 #include "data/dataset.h"
26 #include "data/dictionary.h"
27 #include "data/transformations.h"
28 #include "data/variable.h"
29 #include "language/command.h"
30 #include "language/dictionary/split-file.h"
31 #include "language/lexer/lexer.h"
32 #include "language/lexer/variable-parser.h"
33 #include "libpspp/array.h"
34 #include "libpspp/assertion.h"
35 #include "libpspp/compiler.h"
36 #include "libpspp/i18n.h"
37 #include "libpspp/message.h"
38 #include "math/moments.h"
39 #include "output/tab.h"
41 #include "gl/xalloc.h"
44 #define _(msgid) gettext (msgid)
45 #define N_(msgid) msgid
47 /* DESCRIPTIVES private data. */
51 /* Handling of missing values. */
54 DSC_VARIABLE, /* Handle missing values on a per-variable basis. */
55 DSC_LISTWISE /* Discard entire case if any variable is missing. */
58 /* Describes properties of a distribution for the purpose of
59 calculating a Z-score. */
62 const struct variable *src_var; /* Variable on which z-score is based. */
63 struct variable *z_var; /* New z-score variable. */
64 double mean; /* Distribution mean. */
65 double std_dev; /* Distribution standard deviation. */
68 /* DESCRIPTIVES transformation (for calculating Z-scores). */
71 struct dsc_z_score *z_scores; /* Array of Z-scores. */
72 int z_score_cnt; /* Number of Z-scores. */
73 const struct variable **vars; /* Variables for listwise missing checks. */
74 size_t var_cnt; /* Number of variables. */
75 enum dsc_missing_type missing_type; /* Treatment of missing values. */
76 enum mv_class exclude; /* Classes of missing values to exclude. */
79 /* Statistics. Used as bit indexes, so must be 32 or fewer. */
82 DSC_MEAN = 0, DSC_SEMEAN, DSC_STDDEV, DSC_VARIANCE, DSC_KURTOSIS,
83 DSC_SEKURT, DSC_SKEWNESS, DSC_SESKEW, DSC_RANGE, DSC_MIN,
84 DSC_MAX, DSC_SUM, DSC_N_STATS,
86 /* Only valid as sort criteria. */
87 DSC_NAME = -2, /* Sort by name. */
88 DSC_NONE = -1 /* Unsorted. */
91 /* Describes one statistic. */
92 struct dsc_statistic_info
94 const char *identifier; /* Identifier. */
95 const char *name; /* Full name. */
96 enum moment moment; /* Highest moment needed to calculate. */
99 /* Table of statistics, indexed by DSC_*. */
100 static const struct dsc_statistic_info dsc_info[DSC_N_STATS] =
102 {"MEAN", N_("Mean"), MOMENT_MEAN},
103 {"SEMEAN", N_("S.E. Mean"), MOMENT_VARIANCE},
104 {"STDDEV", N_("Std Dev"), MOMENT_VARIANCE},
105 {"VARIANCE", N_("Variance"), MOMENT_VARIANCE},
106 {"KURTOSIS", N_("Kurtosis"), MOMENT_KURTOSIS},
107 {"SEKURTOSIS", N_("S.E. Kurt"), MOMENT_NONE},
108 {"SKEWNESS", N_("Skewness"), MOMENT_SKEWNESS},
109 {"SESKEWNESS", N_("S.E. Skew"), MOMENT_NONE},
110 {"RANGE", N_("Range"), MOMENT_NONE},
111 {"MINIMUM", N_("Minimum"), MOMENT_NONE},
112 {"MAXIMUM", N_("Maximum"), MOMENT_NONE},
113 {"SUM", N_("Sum"), MOMENT_MEAN},
116 /* Statistics calculated by default if none are explicitly
118 #define DEFAULT_STATS \
119 ((1ul << DSC_MEAN) | (1ul << DSC_STDDEV) | (1ul << DSC_MIN) \
122 /* A variable specified on DESCRIPTIVES. */
125 const struct variable *v; /* Variable to calculate on. */
126 char *z_name; /* Name for z-score variable. */
127 double valid, missing; /* Valid, missing counts. */
128 struct moments *moments; /* Moments. */
129 double min, max; /* Maximum and mimimum values. */
130 double stats[DSC_N_STATS]; /* All the stats' values. */
136 DSC_LINE, /* Abbreviated format. */
137 DSC_SERIAL /* Long format. */
140 /* A DESCRIPTIVES procedure. */
143 /* Per-variable info. */
144 struct dsc_var *vars; /* Variables. */
145 size_t var_cnt; /* Number of variables. */
148 enum dsc_missing_type missing_type; /* Treatment of missing values. */
149 enum mv_class exclude; /* Classes of missing values to exclude. */
150 int show_var_labels; /* Nonzero to show variable labels. */
151 int show_index; /* Nonzero to show variable index. */
152 enum dsc_format format; /* Output format. */
154 /* Accumulated results. */
155 double missing_listwise; /* Sum of weights of cases missing listwise. */
156 double valid; /* Sum of weights of valid cases. */
157 bool bad_warn; /* Warn if bad weight found. */
158 enum dsc_statistic sort_by_stat; /* Statistic to sort by; -1: name. */
159 int sort_ascending; /* !0: ascending order; 0: descending. */
160 unsigned long show_stats; /* Statistics to display. */
161 unsigned long calc_stats; /* Statistics to calculate. */
162 enum moment max_moment; /* Highest moment needed for stats. */
166 static enum dsc_statistic match_statistic (struct lexer *);
167 static void free_dsc_proc (struct dsc_proc *);
169 /* Z-score functions. */
170 static bool try_name (const struct dictionary *dict,
171 struct dsc_proc *dsc, const char *name);
172 static char *generate_z_varname (const struct dictionary *dict,
173 struct dsc_proc *dsc,
174 const char *name, int *z_cnt);
175 static void dump_z_table (struct dsc_proc *);
176 static void setup_z_trns (struct dsc_proc *, struct dataset *);
178 /* Procedure execution functions. */
179 static void calc_descriptives (struct dsc_proc *, struct casereader *,
181 static void display (struct dsc_proc *dsc);
183 /* Parser and outline. */
185 /* Handles DESCRIPTIVES. */
187 cmd_descriptives (struct lexer *lexer, struct dataset *ds)
189 struct dictionary *dict = dataset_dict (ds);
190 struct dsc_proc *dsc;
191 const struct variable **vars = NULL;
193 int save_z_scores = 0;
198 struct casegrouper *grouper;
199 struct casereader *group;
201 /* Create and initialize dsc. */
202 dsc = xmalloc (sizeof *dsc);
205 dsc->missing_type = DSC_VARIABLE;
206 dsc->exclude = MV_ANY;
207 dsc->show_var_labels = 1;
209 dsc->format = DSC_LINE;
210 dsc->missing_listwise = 0.;
213 dsc->sort_by_stat = DSC_NONE;
214 dsc->sort_ascending = 1;
215 dsc->show_stats = dsc->calc_stats = DEFAULT_STATS;
217 /* Parse DESCRIPTIVES. */
218 while (lex_token (lexer) != T_ENDCMD)
220 if (lex_match_id (lexer, "MISSING"))
222 lex_match (lexer, T_EQUALS);
223 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
225 if (lex_match_id (lexer, "VARIABLE"))
226 dsc->missing_type = DSC_VARIABLE;
227 else if (lex_match_id (lexer, "LISTWISE"))
228 dsc->missing_type = DSC_LISTWISE;
229 else if (lex_match_id (lexer, "INCLUDE"))
230 dsc->exclude = MV_SYSTEM;
233 lex_error (lexer, NULL);
236 lex_match (lexer, T_COMMA);
239 else if (lex_match_id (lexer, "SAVE"))
241 else if (lex_match_id (lexer, "FORMAT"))
243 lex_match (lexer, T_EQUALS);
244 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
246 if (lex_match_id (lexer, "LABELS"))
247 dsc->show_var_labels = 1;
248 else if (lex_match_id (lexer, "NOLABELS"))
249 dsc->show_var_labels = 0;
250 else if (lex_match_id (lexer, "INDEX"))
252 else if (lex_match_id (lexer, "NOINDEX"))
254 else if (lex_match_id (lexer, "LINE"))
255 dsc->format = DSC_LINE;
256 else if (lex_match_id (lexer, "SERIAL"))
257 dsc->format = DSC_SERIAL;
260 lex_error (lexer, NULL);
263 lex_match (lexer, T_COMMA);
266 else if (lex_match_id (lexer, "STATISTICS"))
268 lex_match (lexer, T_EQUALS);
270 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
272 if (lex_match (lexer, T_ALL))
273 dsc->show_stats |= (1ul << DSC_N_STATS) - 1;
274 else if (lex_match_id (lexer, "DEFAULT"))
275 dsc->show_stats |= DEFAULT_STATS;
277 dsc->show_stats |= 1ul << (match_statistic (lexer));
278 lex_match (lexer, T_COMMA);
280 if (dsc->show_stats == 0)
281 dsc->show_stats = DEFAULT_STATS;
283 else if (lex_match_id (lexer, "SORT"))
285 lex_match (lexer, T_EQUALS);
286 if (lex_match_id (lexer, "NAME"))
287 dsc->sort_by_stat = DSC_NAME;
290 dsc->sort_by_stat = match_statistic (lexer);
291 if (dsc->sort_by_stat == DSC_NONE )
292 dsc->sort_by_stat = DSC_MEAN;
294 if (lex_match (lexer, T_LPAREN))
296 if (lex_match_id (lexer, "A"))
297 dsc->sort_ascending = 1;
298 else if (lex_match_id (lexer, "D"))
299 dsc->sort_ascending = 0;
301 lex_error (lexer, NULL);
302 lex_force_match (lexer, T_RPAREN);
305 else if (var_cnt == 0)
307 if (lex_next_token (lexer, 1) == T_EQUALS)
309 lex_match_id (lexer, "VARIABLES");
310 lex_match (lexer, T_EQUALS);
313 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
317 if (!parse_variables_const (lexer, dict, &vars, &var_cnt,
318 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
321 dsc->vars = xnrealloc ((void *)dsc->vars, var_cnt, sizeof *dsc->vars);
322 for (i = dsc->var_cnt; i < var_cnt; i++)
324 struct dsc_var *dv = &dsc->vars[i];
329 dsc->var_cnt = var_cnt;
331 if (lex_match (lexer, T_LPAREN))
333 if (lex_token (lexer) != T_ID)
335 lex_error (lexer, NULL);
338 if (try_name (dict, dsc, lex_tokcstr (lexer)))
340 struct dsc_var *dsc_var = &dsc->vars[dsc->var_cnt - 1];
341 dsc_var->z_name = xstrdup (lex_tokcstr (lexer));
345 msg (SE, _("Z-score variable name %s would be"
346 " a duplicate variable name."), lex_tokcstr (lexer));
348 if (!lex_force_match (lexer, T_RPAREN))
355 lex_error (lexer, NULL);
359 lex_match (lexer, T_SLASH);
363 msg (SE, _("No variables specified."));
367 /* Construct z-score varnames, show translation table. */
368 if (z_cnt || save_z_scores)
374 for (i = 0; i < dsc->var_cnt; i++)
376 struct dsc_var *dsc_var = &dsc->vars[i];
377 if (dsc_var->z_name == NULL)
379 const char *name = var_get_name (dsc_var->v);
380 dsc_var->z_name = generate_z_varname (dict, dsc, name,
382 if (dsc_var->z_name == NULL)
392 /* Figure out statistics to display. */
393 if (dsc->show_stats & (1ul << DSC_SKEWNESS))
394 dsc->show_stats |= 1ul << DSC_SESKEW;
395 if (dsc->show_stats & (1ul << DSC_KURTOSIS))
396 dsc->show_stats |= 1ul << DSC_SEKURT;
398 /* Figure out which statistics to calculate. */
399 dsc->calc_stats = dsc->show_stats;
401 dsc->calc_stats |= (1ul << DSC_MEAN) | (1ul << DSC_STDDEV);
402 if (dsc->sort_by_stat >= 0)
403 dsc->calc_stats |= 1ul << dsc->sort_by_stat;
404 if (dsc->show_stats & (1ul << DSC_SESKEW))
405 dsc->calc_stats |= 1ul << DSC_SKEWNESS;
406 if (dsc->show_stats & (1ul << DSC_SEKURT))
407 dsc->calc_stats |= 1ul << DSC_KURTOSIS;
409 /* Figure out maximum moment needed and allocate moments for
411 dsc->max_moment = MOMENT_NONE;
412 for (i = 0; i < DSC_N_STATS; i++)
413 if (dsc->calc_stats & (1ul << i) && dsc_info[i].moment > dsc->max_moment)
414 dsc->max_moment = dsc_info[i].moment;
415 if (dsc->max_moment != MOMENT_NONE)
416 for (i = 0; i < dsc->var_cnt; i++)
417 dsc->vars[i].moments = moments_create (dsc->max_moment);
420 grouper = casegrouper_create_splits (proc_open (ds), dict);
421 while (casegrouper_get_next_group (grouper, &group))
422 calc_descriptives (dsc, group, ds);
423 ok = casegrouper_destroy (grouper);
424 ok = proc_commit (ds) && ok;
428 setup_z_trns (dsc, ds);
433 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
441 /* Returns the statistic named by the current token and skips past the token.
442 Returns DSC_NONE if no statistic is given (e.g., subcommand with no
443 specifiers). Emits an error if the current token ID does not name a
445 static enum dsc_statistic
446 match_statistic (struct lexer *lexer)
448 if (lex_token (lexer) == T_ID)
450 enum dsc_statistic stat;
452 for (stat = 0; stat < DSC_N_STATS; stat++)
453 if (lex_match_id (lexer, dsc_info[stat].identifier))
457 lex_error (lexer, _("expecting statistic name: reverting to default"));
465 free_dsc_proc (struct dsc_proc *dsc)
472 for (i = 0; i < dsc->var_cnt; i++)
474 struct dsc_var *dsc_var = &dsc->vars[i];
475 free (dsc_var->z_name);
476 moments_destroy (dsc_var->moments);
484 /* Returns false if NAME is a duplicate of any existing variable name or
485 of any previously-declared z-var name; otherwise returns true. */
487 try_name (const struct dictionary *dict, struct dsc_proc *dsc,
492 if (dict_lookup_var (dict, name) != NULL)
494 for (i = 0; i < dsc->var_cnt; i++)
496 struct dsc_var *dsc_var = &dsc->vars[i];
497 if (dsc_var->z_name != NULL && !strcasecmp (dsc_var->z_name, name))
503 /* Generates a name for a Z-score variable based on a variable
504 named VAR_NAME, given that *Z_CNT generated variable names are
505 known to already exist. If successful, returns the new name
506 as a dynamically allocated string. On failure, returns NULL. */
508 generate_z_varname (const struct dictionary *dict, struct dsc_proc *dsc,
509 const char *var_name, int *z_cnt)
511 char *z_name, *trunc_name;
513 /* Try a name based on the original variable name. */
514 z_name = xasprintf ("Z%s", var_name);
515 trunc_name = utf8_encoding_trunc (z_name, dict_get_encoding (dict),
518 if (try_name (dict, dsc, trunc_name))
522 /* Generate a synthetic name. */
530 sprintf (name, "ZSC%03d", *z_cnt);
531 else if (*z_cnt <= 108)
532 sprintf (name, "STDZ%02d", *z_cnt - 99);
533 else if (*z_cnt <= 117)
534 sprintf (name, "ZZZZ%02d", *z_cnt - 108);
535 else if (*z_cnt <= 126)
536 sprintf (name, "ZQZQ%02d", *z_cnt - 117);
539 msg (SE, _("Ran out of generic names for Z-score variables. "
540 "There are only 126 generic names: ZSC001-ZSC0999, "
541 "STDZ01-STDZ09, ZZZZ01-ZZZZ09, ZQZQ01-ZQZQ09."));
545 if (try_name (dict, dsc, name))
546 return xstrdup (name);
551 /* Outputs a table describing the mapping between source
552 variables and Z-score variables. */
554 dump_z_table (struct dsc_proc *dsc)
562 for (i = 0; i < dsc->var_cnt; i++)
563 if (dsc->vars[i].z_name != NULL)
567 t = tab_create (2, cnt + 1);
568 tab_title (t, _("Mapping of variables to corresponding Z-scores."));
569 tab_headers (t, 0, 0, 1, 0);
570 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, cnt);
571 tab_hline (t, TAL_2, 0, 1, 1);
572 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Source"));
573 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Target"));
578 for (i = 0, y = 1; i < dsc->var_cnt; i++)
579 if (dsc->vars[i].z_name != NULL)
581 tab_text (t, 0, y, TAB_LEFT, var_to_string (dsc->vars[i].v));
582 tab_text (t, 1, y++, TAB_LEFT, dsc->vars[i].z_name);
589 /* Transformation function to calculate Z-scores. Will return SYSMIS if any of
590 the following are true: 1) mean or standard deviation is SYSMIS 2) score is
591 SYSMIS 3) score is user missing and they were not included in the original
592 analyis. 4) any of the variables in the original analysis were missing
593 (either system or user-missing values that weren't included).
596 descriptives_trns_proc (void *trns_, struct ccase **c,
597 casenumber case_idx UNUSED)
599 struct dsc_trns *t = trns_;
600 struct dsc_z_score *z;
601 const struct variable **vars;
604 if (t->missing_type == DSC_LISTWISE)
607 for (vars = t->vars; vars < t->vars + t->var_cnt; vars++)
609 double score = case_num (*c, *vars);
610 if (var_is_num_missing (*vars, score, t->exclude))
618 *c = case_unshare (*c);
619 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
621 double input = case_num (*c, z->src_var);
622 double *output = &case_data_rw (*c, z->z_var)->f;
624 if (z->mean == SYSMIS || z->std_dev == SYSMIS || all_sysmis
625 || var_is_num_missing (z->src_var, input, t->exclude))
628 *output = (input - z->mean) / z->std_dev;
630 return TRNS_CONTINUE;
633 /* Frees a descriptives_trns struct. */
635 descriptives_trns_free (void *trns_)
637 struct dsc_trns *t = trns_;
640 assert((t->missing_type != DSC_LISTWISE) ^ (t->vars != NULL));
645 /* Sets up a transformation to calculate Z scores. */
647 setup_z_trns (struct dsc_proc *dsc, struct dataset *ds)
652 for (cnt = i = 0; i < dsc->var_cnt; i++)
653 if (dsc->vars[i].z_name != NULL)
656 t = xmalloc (sizeof *t);
657 t->z_scores = xnmalloc (cnt, sizeof *t->z_scores);
658 t->z_score_cnt = cnt;
659 t->missing_type = dsc->missing_type;
660 t->exclude = dsc->exclude;
661 if ( t->missing_type == DSC_LISTWISE )
663 t->var_cnt = dsc->var_cnt;
664 t->vars = xnmalloc (t->var_cnt, sizeof *t->vars);
665 for (i = 0; i < t->var_cnt; i++)
666 t->vars[i] = dsc->vars[i].v;
674 for (cnt = i = 0; i < dsc->var_cnt; i++)
676 struct dsc_var *dv = &dsc->vars[i];
677 if (dv->z_name != NULL)
679 struct dsc_z_score *z;
680 struct variable *dst_var;
682 dst_var = dict_create_var_assert (dataset_dict (ds), dv->z_name, 0);
683 var_set_label (dst_var,
684 xasprintf (_("Z-score of %s"),var_to_string (dv->v)),
687 z = &t->z_scores[cnt++];
690 z->mean = dv->stats[DSC_MEAN];
691 z->std_dev = dv->stats[DSC_STDDEV];
695 add_transformation (ds,
696 descriptives_trns_proc, descriptives_trns_free, t);
699 /* Statistical calculation. */
701 static bool listwise_missing (struct dsc_proc *dsc, const struct ccase *c);
703 /* Calculates and displays descriptive statistics for the cases
706 calc_descriptives (struct dsc_proc *dsc, struct casereader *group,
709 struct casereader *pass1, *pass2;
713 c = casereader_peek (group, 0);
716 casereader_destroy (group);
719 output_split_file_values (ds, c);
722 group = casereader_create_filter_weight (group, dataset_dict (ds),
726 pass2 = dsc->max_moment <= MOMENT_MEAN ? NULL : casereader_clone (pass1);
728 for (i = 0; i < dsc->var_cnt; i++)
730 struct dsc_var *dv = &dsc->vars[i];
732 dv->valid = dv->missing = 0.0;
733 if (dv->moments != NULL)
734 moments_clear (dv->moments);
738 dsc->missing_listwise = 0.;
741 /* First pass to handle most of the work. */
742 for (; (c = casereader_read (pass1)) != NULL; case_unref (c))
744 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
746 /* Check for missing values. */
747 if (listwise_missing (dsc, c))
749 dsc->missing_listwise += weight;
750 if (dsc->missing_type == DSC_LISTWISE)
753 dsc->valid += weight;
755 for (i = 0; i < dsc->var_cnt; i++)
757 struct dsc_var *dv = &dsc->vars[i];
758 double x = case_num (c, dv->v);
760 if (var_is_num_missing (dv->v, x, dsc->exclude))
762 dv->missing += weight;
766 if (dv->moments != NULL)
767 moments_pass_one (dv->moments, x, weight);
775 if (!casereader_destroy (pass1))
777 casereader_destroy (pass2);
781 /* Second pass for higher-order moments. */
782 if (dsc->max_moment > MOMENT_MEAN)
784 for (; (c = casereader_read (pass2)) != NULL; case_unref (c))
786 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
788 /* Check for missing values. */
789 if (dsc->missing_type == DSC_LISTWISE && listwise_missing (dsc, c))
792 for (i = 0; i < dsc->var_cnt; i++)
794 struct dsc_var *dv = &dsc->vars[i];
795 double x = case_num (c, dv->v);
797 if (var_is_num_missing (dv->v, x, dsc->exclude))
800 if (dv->moments != NULL)
801 moments_pass_two (dv->moments, x, weight);
804 if (!casereader_destroy (pass2))
808 /* Calculate results. */
809 for (i = 0; i < dsc->var_cnt; i++)
811 struct dsc_var *dv = &dsc->vars[i];
815 for (j = 0; j < DSC_N_STATS; j++)
816 dv->stats[j] = SYSMIS;
818 dv->valid = W = dsc->valid - dv->missing;
820 if (dv->moments != NULL)
821 moments_calculate (dv->moments, NULL,
822 &dv->stats[DSC_MEAN], &dv->stats[DSC_VARIANCE],
823 &dv->stats[DSC_SKEWNESS], &dv->stats[DSC_KURTOSIS]);
824 if (dsc->calc_stats & (1ul << DSC_SEMEAN)
825 && dv->stats[DSC_VARIANCE] != SYSMIS && W > 0.)
826 dv->stats[DSC_SEMEAN] = sqrt (dv->stats[DSC_VARIANCE]) / sqrt (W);
827 if (dsc->calc_stats & (1ul << DSC_STDDEV)
828 && dv->stats[DSC_VARIANCE] != SYSMIS)
829 dv->stats[DSC_STDDEV] = sqrt (dv->stats[DSC_VARIANCE]);
830 if (dsc->calc_stats & (1ul << DSC_SEKURT))
831 if (dv->stats[DSC_KURTOSIS] != SYSMIS)
832 dv->stats[DSC_SEKURT] = calc_sekurt (W);
833 if (dsc->calc_stats & (1ul << DSC_SESKEW)
834 && dv->stats[DSC_SKEWNESS] != SYSMIS)
835 dv->stats[DSC_SESKEW] = calc_seskew (W);
836 dv->stats[DSC_RANGE] = ((dv->min == DBL_MAX || dv->max == -DBL_MAX)
837 ? SYSMIS : dv->max - dv->min);
838 dv->stats[DSC_MIN] = dv->min == DBL_MAX ? SYSMIS : dv->min;
839 dv->stats[DSC_MAX] = dv->max == -DBL_MAX ? SYSMIS : dv->max;
840 if (dsc->calc_stats & (1ul << DSC_SUM))
841 dv->stats[DSC_SUM] = W * dv->stats[DSC_MEAN];
844 /* Output results. */
848 /* Returns true if any of the descriptives variables in DSC's
849 variable list have missing values in case C, false otherwise. */
851 listwise_missing (struct dsc_proc *dsc, const struct ccase *c)
855 for (i = 0; i < dsc->var_cnt; i++)
857 struct dsc_var *dv = &dsc->vars[i];
858 double x = case_num (c, dv->v);
860 if (var_is_num_missing (dv->v, x, dsc->exclude))
866 /* Statistical display. */
868 static algo_compare_func descriptives_compare_dsc_vars;
870 /* Displays a table of descriptive statistics for DSC. */
872 display (struct dsc_proc *dsc)
878 nc = 1 + (dsc->format == DSC_SERIAL ? 2 : 1);
879 for (i = 0; i < DSC_N_STATS; i++)
880 if (dsc->show_stats & (1ul << i))
883 if (dsc->sort_by_stat != DSC_NONE)
884 sort (dsc->vars, dsc->var_cnt, sizeof *dsc->vars,
885 descriptives_compare_dsc_vars, dsc);
887 t = tab_create (nc, dsc->var_cnt + 1);
888 tab_headers (t, 1, 0, 1, 0);
889 tab_box (t, TAL_1, TAL_1, -1, -1, 0, 0, nc - 1, dsc->var_cnt);
890 tab_box (t, -1, -1, -1, TAL_1, 1, 0, nc - 1, dsc->var_cnt);
891 tab_hline (t, TAL_2, 0, nc - 1, 1);
892 tab_vline (t, TAL_2, 1, 0, dsc->var_cnt);
895 tab_text (t, nc++, 0, TAB_LEFT | TAT_TITLE, _("Variable"));
896 if (dsc->format == DSC_SERIAL)
898 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Valid N"));
899 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Missing N"));
902 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, "N");
904 for (i = 0; i < DSC_N_STATS; i++)
905 if (dsc->show_stats & (1ul << i))
907 const char *title = gettext (dsc_info[i].name);
908 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, title);
911 for (i = 0; i < dsc->var_cnt; i++)
913 struct dsc_var *dv = &dsc->vars[i];
917 tab_text (t, nc++, i + 1, TAB_LEFT, var_to_string (dv->v));
918 tab_text_format (t, nc++, i + 1, 0, "%g", dv->valid);
919 if (dsc->format == DSC_SERIAL)
920 tab_text_format (t, nc++, i + 1, 0, "%g", dv->missing);
922 for (j = 0; j < DSC_N_STATS; j++)
923 if (dsc->show_stats & (1ul << j))
924 tab_double (t, nc++, i + 1, TAB_NONE, dv->stats[j], NULL);
927 tab_title (t, _("Valid cases = %g; cases with missing value(s) = %g."),
928 dsc->valid, dsc->missing_listwise);
933 /* Compares `struct dsc_var's A and B according to the ordering
936 descriptives_compare_dsc_vars (const void *a_, const void *b_, const void *dsc_)
938 const struct dsc_var *a = a_;
939 const struct dsc_var *b = b_;
940 const struct dsc_proc *dsc = dsc_;
944 if (dsc->sort_by_stat == DSC_NAME)
945 result = strcasecmp (var_get_name (a->v), var_get_name (b->v));
948 double as = a->stats[dsc->sort_by_stat];
949 double bs = b->stats[dsc->sort_by_stat];
951 result = as < bs ? -1 : as > bs;
954 if (!dsc->sort_ascending)