1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2009, 2010, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
23 #include <data/casegrouper.h>
24 #include <data/casereader.h>
25 #include <data/dictionary.h>
26 #include <data/procedure.h>
27 #include <data/transformations.h>
28 #include <data/variable.h>
29 #include <language/command.h>
30 #include <language/dictionary/split-file.h>
31 #include <language/lexer/lexer.h>
32 #include <language/lexer/variable-parser.h>
33 #include <libpspp/array.h>
34 #include <libpspp/compiler.h>
35 #include <libpspp/message.h>
36 #include <libpspp/assertion.h>
37 #include <math/moments.h>
38 #include <output/tab.h>
43 #define _(msgid) gettext (msgid)
44 #define N_(msgid) msgid
46 /* DESCRIPTIVES private data. */
50 /* Handling of missing values. */
53 DSC_VARIABLE, /* Handle missing values on a per-variable basis. */
54 DSC_LISTWISE /* Discard entire case if any variable is missing. */
57 /* Describes properties of a distribution for the purpose of
58 calculating a Z-score. */
61 const struct variable *src_var; /* Variable on which z-score is based. */
62 struct variable *z_var; /* New z-score variable. */
63 double mean; /* Distribution mean. */
64 double std_dev; /* Distribution standard deviation. */
67 /* DESCRIPTIVES transformation (for calculating Z-scores). */
70 struct dsc_z_score *z_scores; /* Array of Z-scores. */
71 int z_score_cnt; /* Number of Z-scores. */
72 const struct variable **vars; /* Variables for listwise missing checks. */
73 size_t var_cnt; /* Number of variables. */
74 enum dsc_missing_type missing_type; /* Treatment of missing values. */
75 enum mv_class exclude; /* Classes of missing values to exclude. */
78 /* Statistics. Used as bit indexes, so must be 32 or fewer. */
81 DSC_MEAN = 0, DSC_SEMEAN, DSC_STDDEV, DSC_VARIANCE, DSC_KURTOSIS,
82 DSC_SEKURT, DSC_SKEWNESS, DSC_SESKEW, DSC_RANGE, DSC_MIN,
83 DSC_MAX, DSC_SUM, DSC_N_STATS,
85 /* Only valid as sort criteria. */
86 DSC_NAME = -2, /* Sort by name. */
87 DSC_NONE = -1 /* Unsorted. */
90 /* Describes one statistic. */
91 struct dsc_statistic_info
93 const char *identifier; /* Identifier. */
94 const char *name; /* Full name. */
95 enum moment moment; /* Highest moment needed to calculate. */
98 /* Table of statistics, indexed by DSC_*. */
99 static const struct dsc_statistic_info dsc_info[DSC_N_STATS] =
101 {"MEAN", N_("Mean"), MOMENT_MEAN},
102 {"SEMEAN", N_("S.E. Mean"), MOMENT_VARIANCE},
103 {"STDDEV", N_("Std Dev"), MOMENT_VARIANCE},
104 {"VARIANCE", N_("Variance"), MOMENT_VARIANCE},
105 {"KURTOSIS", N_("Kurtosis"), MOMENT_KURTOSIS},
106 {"SEKURTOSIS", N_("S.E. Kurt"), MOMENT_NONE},
107 {"SKEWNESS", N_("Skewness"), MOMENT_SKEWNESS},
108 {"SESKEWNESS", N_("S.E. Skew"), MOMENT_NONE},
109 {"RANGE", N_("Range"), MOMENT_NONE},
110 {"MINIMUM", N_("Minimum"), MOMENT_NONE},
111 {"MAXIMUM", N_("Maximum"), MOMENT_NONE},
112 {"SUM", N_("Sum"), MOMENT_MEAN},
115 /* Statistics calculated by default if none are explicitly
117 #define DEFAULT_STATS \
118 ((1ul << DSC_MEAN) | (1ul << DSC_STDDEV) | (1ul << DSC_MIN) \
121 /* A variable specified on DESCRIPTIVES. */
124 const struct variable *v; /* Variable to calculate on. */
125 char *z_name; /* Name for z-score variable. */
126 double valid, missing; /* Valid, missing counts. */
127 struct moments *moments; /* Moments. */
128 double min, max; /* Maximum and mimimum values. */
129 double stats[DSC_N_STATS]; /* All the stats' values. */
135 DSC_LINE, /* Abbreviated format. */
136 DSC_SERIAL /* Long format. */
139 /* A DESCRIPTIVES procedure. */
142 /* Per-variable info. */
143 struct dsc_var *vars; /* Variables. */
144 size_t var_cnt; /* Number of variables. */
147 enum dsc_missing_type missing_type; /* Treatment of missing values. */
148 enum mv_class exclude; /* Classes of missing values to exclude. */
149 int show_var_labels; /* Nonzero to show variable labels. */
150 int show_index; /* Nonzero to show variable index. */
151 enum dsc_format format; /* Output format. */
153 /* Accumulated results. */
154 double missing_listwise; /* Sum of weights of cases missing listwise. */
155 double valid; /* Sum of weights of valid cases. */
156 bool bad_warn; /* Warn if bad weight found. */
157 enum dsc_statistic sort_by_stat; /* Statistic to sort by; -1: name. */
158 int sort_ascending; /* !0: ascending order; 0: descending. */
159 unsigned long show_stats; /* Statistics to display. */
160 unsigned long calc_stats; /* Statistics to calculate. */
161 enum moment max_moment; /* Highest moment needed for stats. */
165 static enum dsc_statistic match_statistic (struct lexer *);
166 static void free_dsc_proc (struct dsc_proc *);
168 /* Z-score functions. */
169 static bool try_name (const struct dictionary *dict,
170 struct dsc_proc *dsc, const char *name);
171 static char *generate_z_varname (const struct dictionary *dict,
172 struct dsc_proc *dsc,
173 const char *name, int *z_cnt);
174 static void dump_z_table (struct dsc_proc *);
175 static void setup_z_trns (struct dsc_proc *, struct dataset *);
177 /* Procedure execution functions. */
178 static void calc_descriptives (struct dsc_proc *, struct casereader *,
180 static void display (struct dsc_proc *dsc);
182 /* Parser and outline. */
184 /* Handles DESCRIPTIVES. */
186 cmd_descriptives (struct lexer *lexer, struct dataset *ds)
188 struct dictionary *dict = dataset_dict (ds);
189 struct dsc_proc *dsc;
190 const struct variable **vars = NULL;
192 int save_z_scores = 0;
197 struct casegrouper *grouper;
198 struct casereader *group;
200 /* Create and initialize dsc. */
201 dsc = xmalloc (sizeof *dsc);
204 dsc->missing_type = DSC_VARIABLE;
205 dsc->exclude = MV_ANY;
206 dsc->show_var_labels = 1;
208 dsc->format = DSC_LINE;
209 dsc->missing_listwise = 0.;
212 dsc->sort_by_stat = DSC_NONE;
213 dsc->sort_ascending = 1;
214 dsc->show_stats = dsc->calc_stats = DEFAULT_STATS;
216 /* Parse DESCRIPTIVES. */
217 while (lex_token (lexer) != T_ENDCMD)
219 if (lex_match_id (lexer, "MISSING"))
221 lex_match (lexer, T_EQUALS);
222 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
224 if (lex_match_id (lexer, "VARIABLE"))
225 dsc->missing_type = DSC_VARIABLE;
226 else if (lex_match_id (lexer, "LISTWISE"))
227 dsc->missing_type = DSC_LISTWISE;
228 else if (lex_match_id (lexer, "INCLUDE"))
229 dsc->exclude = MV_SYSTEM;
232 lex_error (lexer, NULL);
235 lex_match (lexer, T_COMMA);
238 else if (lex_match_id (lexer, "SAVE"))
240 else if (lex_match_id (lexer, "FORMAT"))
242 lex_match (lexer, T_EQUALS);
243 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
245 if (lex_match_id (lexer, "LABELS"))
246 dsc->show_var_labels = 1;
247 else if (lex_match_id (lexer, "NOLABELS"))
248 dsc->show_var_labels = 0;
249 else if (lex_match_id (lexer, "INDEX"))
251 else if (lex_match_id (lexer, "NOINDEX"))
253 else if (lex_match_id (lexer, "LINE"))
254 dsc->format = DSC_LINE;
255 else if (lex_match_id (lexer, "SERIAL"))
256 dsc->format = DSC_SERIAL;
259 lex_error (lexer, NULL);
262 lex_match (lexer, T_COMMA);
265 else if (lex_match_id (lexer, "STATISTICS"))
267 lex_match (lexer, T_EQUALS);
269 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
271 if (lex_match (lexer, T_ALL))
272 dsc->show_stats |= (1ul << DSC_N_STATS) - 1;
273 else if (lex_match_id (lexer, "DEFAULT"))
274 dsc->show_stats |= DEFAULT_STATS;
276 dsc->show_stats |= 1ul << (match_statistic (lexer));
277 lex_match (lexer, T_COMMA);
279 if (dsc->show_stats == 0)
280 dsc->show_stats = DEFAULT_STATS;
282 else if (lex_match_id (lexer, "SORT"))
284 lex_match (lexer, T_EQUALS);
285 if (lex_match_id (lexer, "NAME"))
286 dsc->sort_by_stat = DSC_NAME;
289 dsc->sort_by_stat = match_statistic (lexer);
290 if (dsc->sort_by_stat == DSC_NONE )
291 dsc->sort_by_stat = DSC_MEAN;
293 if (lex_match (lexer, T_LPAREN))
295 if (lex_match_id (lexer, "A"))
296 dsc->sort_ascending = 1;
297 else if (lex_match_id (lexer, "D"))
298 dsc->sort_ascending = 0;
300 lex_error (lexer, NULL);
301 lex_force_match (lexer, T_RPAREN);
304 else if (var_cnt == 0)
306 if (lex_look_ahead (lexer) == T_EQUALS)
308 lex_match_id (lexer, "VARIABLES");
309 lex_match (lexer, T_EQUALS);
312 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
316 if (!parse_variables_const (lexer, dict, &vars, &var_cnt,
317 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
320 dsc->vars = xnrealloc ((void *)dsc->vars, var_cnt, sizeof *dsc->vars);
321 for (i = dsc->var_cnt; i < var_cnt; i++)
323 struct dsc_var *dv = &dsc->vars[i];
328 dsc->var_cnt = var_cnt;
330 if (lex_match (lexer, T_LPAREN))
332 if (lex_token (lexer) != T_ID)
334 lex_error (lexer, NULL);
337 if (try_name (dict, dsc, lex_tokcstr (lexer)))
339 struct dsc_var *dsc_var = &dsc->vars[dsc->var_cnt - 1];
340 dsc_var->z_name = xstrdup (lex_tokcstr (lexer));
344 msg (SE, _("Z-score variable name %s would be"
345 " a duplicate variable name."), lex_tokcstr (lexer));
347 if (!lex_force_match (lexer, T_RPAREN))
354 lex_error (lexer, NULL);
358 lex_match (lexer, T_SLASH);
362 msg (SE, _("No variables specified."));
366 /* Construct z-score varnames, show translation table. */
367 if (z_cnt || save_z_scores)
373 for (i = 0; i < dsc->var_cnt; i++)
375 struct dsc_var *dsc_var = &dsc->vars[i];
376 if (dsc_var->z_name == NULL)
378 const char *name = var_get_name (dsc_var->v);
379 dsc_var->z_name = generate_z_varname (dict, dsc, name,
381 if (dsc_var->z_name == NULL)
391 /* Figure out statistics to display. */
392 if (dsc->show_stats & (1ul << DSC_SKEWNESS))
393 dsc->show_stats |= 1ul << DSC_SESKEW;
394 if (dsc->show_stats & (1ul << DSC_KURTOSIS))
395 dsc->show_stats |= 1ul << DSC_SEKURT;
397 /* Figure out which statistics to calculate. */
398 dsc->calc_stats = dsc->show_stats;
400 dsc->calc_stats |= (1ul << DSC_MEAN) | (1ul << DSC_STDDEV);
401 if (dsc->sort_by_stat >= 0)
402 dsc->calc_stats |= 1ul << dsc->sort_by_stat;
403 if (dsc->show_stats & (1ul << DSC_SESKEW))
404 dsc->calc_stats |= 1ul << DSC_SKEWNESS;
405 if (dsc->show_stats & (1ul << DSC_SEKURT))
406 dsc->calc_stats |= 1ul << DSC_KURTOSIS;
408 /* Figure out maximum moment needed and allocate moments for
410 dsc->max_moment = MOMENT_NONE;
411 for (i = 0; i < DSC_N_STATS; i++)
412 if (dsc->calc_stats & (1ul << i) && dsc_info[i].moment > dsc->max_moment)
413 dsc->max_moment = dsc_info[i].moment;
414 if (dsc->max_moment != MOMENT_NONE)
415 for (i = 0; i < dsc->var_cnt; i++)
416 dsc->vars[i].moments = moments_create (dsc->max_moment);
419 grouper = casegrouper_create_splits (proc_open (ds), dict);
420 while (casegrouper_get_next_group (grouper, &group))
421 calc_descriptives (dsc, group, ds);
422 ok = casegrouper_destroy (grouper);
423 ok = proc_commit (ds) && ok;
427 setup_z_trns (dsc, ds);
432 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
440 /* Returns the statistic named by the current token and skips past the token.
441 Returns DSC_NONE if no statistic is given (e.g., subcommand with no
442 specifiers). Emits an error if the current token ID does not name a
444 static enum dsc_statistic
445 match_statistic (struct lexer *lexer)
447 if (lex_token (lexer) == T_ID)
449 enum dsc_statistic stat;
451 for (stat = 0; stat < DSC_N_STATS; stat++)
452 if (lex_match_id (lexer, dsc_info[stat].identifier))
456 lex_error (lexer, _("expecting statistic name: reverting to default"));
464 free_dsc_proc (struct dsc_proc *dsc)
471 for (i = 0; i < dsc->var_cnt; i++)
473 struct dsc_var *dsc_var = &dsc->vars[i];
474 free (dsc_var->z_name);
475 moments_destroy (dsc_var->moments);
483 /* Returns false if NAME is a duplicate of any existing variable name or
484 of any previously-declared z-var name; otherwise returns true. */
486 try_name (const struct dictionary *dict, struct dsc_proc *dsc,
491 if (dict_lookup_var (dict, name) != NULL)
493 for (i = 0; i < dsc->var_cnt; i++)
495 struct dsc_var *dsc_var = &dsc->vars[i];
496 if (dsc_var->z_name != NULL && !strcasecmp (dsc_var->z_name, name))
502 /* Generates a name for a Z-score variable based on a variable
503 named VAR_NAME, given that *Z_CNT generated variable names are
504 known to already exist. If successful, returns the new name
505 as a dynamically allocated string. On failure, returns NULL. */
507 generate_z_varname (const struct dictionary *dict, struct dsc_proc *dsc,
508 const char *var_name, int *z_cnt)
510 char name[VAR_NAME_LEN + 1];
512 /* Try a name based on the original variable name. */
514 str_copy_trunc (name + 1, sizeof name - 1, var_name);
515 if (try_name (dict, dsc, name))
516 return xstrdup (name);
518 /* Generate a synthetic name. */
524 sprintf (name, "ZSC%03d", *z_cnt);
525 else if (*z_cnt <= 108)
526 sprintf (name, "STDZ%02d", *z_cnt - 99);
527 else if (*z_cnt <= 117)
528 sprintf (name, "ZZZZ%02d", *z_cnt - 108);
529 else if (*z_cnt <= 126)
530 sprintf (name, "ZQZQ%02d", *z_cnt - 117);
533 msg (SE, _("Ran out of generic names for Z-score variables. "
534 "There are only 126 generic names: ZSC001-ZSC0999, "
535 "STDZ01-STDZ09, ZZZZ01-ZZZZ09, ZQZQ01-ZQZQ09."));
539 if (try_name (dict, dsc, name))
540 return xstrdup (name);
545 /* Outputs a table describing the mapping between source
546 variables and Z-score variables. */
548 dump_z_table (struct dsc_proc *dsc)
556 for (i = 0; i < dsc->var_cnt; i++)
557 if (dsc->vars[i].z_name != NULL)
561 t = tab_create (2, cnt + 1);
562 tab_title (t, _("Mapping of variables to corresponding Z-scores."));
563 tab_headers (t, 0, 0, 1, 0);
564 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, cnt);
565 tab_hline (t, TAL_2, 0, 1, 1);
566 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Source"));
567 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Target"));
572 for (i = 0, y = 1; i < dsc->var_cnt; i++)
573 if (dsc->vars[i].z_name != NULL)
575 tab_text (t, 0, y, TAB_LEFT, var_get_name (dsc->vars[i].v));
576 tab_text (t, 1, y++, TAB_LEFT, dsc->vars[i].z_name);
583 /* Transformation function to calculate Z-scores. Will return SYSMIS if any of
584 the following are true: 1) mean or standard deviation is SYSMIS 2) score is
585 SYSMIS 3) score is user missing and they were not included in the original
586 analyis. 4) any of the variables in the original analysis were missing
587 (either system or user-missing values that weren't included).
590 descriptives_trns_proc (void *trns_, struct ccase **c,
591 casenumber case_idx UNUSED)
593 struct dsc_trns *t = trns_;
594 struct dsc_z_score *z;
595 const struct variable **vars;
598 if (t->missing_type == DSC_LISTWISE)
601 for (vars = t->vars; vars < t->vars + t->var_cnt; vars++)
603 double score = case_num (*c, *vars);
604 if (var_is_num_missing (*vars, score, t->exclude))
612 *c = case_unshare (*c);
613 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
615 double input = case_num (*c, z->src_var);
616 double *output = &case_data_rw (*c, z->z_var)->f;
618 if (z->mean == SYSMIS || z->std_dev == SYSMIS || all_sysmis
619 || var_is_num_missing (z->src_var, input, t->exclude))
622 *output = (input - z->mean) / z->std_dev;
624 return TRNS_CONTINUE;
627 /* Frees a descriptives_trns struct. */
629 descriptives_trns_free (void *trns_)
631 struct dsc_trns *t = trns_;
634 assert((t->missing_type != DSC_LISTWISE) ^ (t->vars != NULL));
639 /* Sets up a transformation to calculate Z scores. */
641 setup_z_trns (struct dsc_proc *dsc, struct dataset *ds)
646 for (cnt = i = 0; i < dsc->var_cnt; i++)
647 if (dsc->vars[i].z_name != NULL)
650 t = xmalloc (sizeof *t);
651 t->z_scores = xnmalloc (cnt, sizeof *t->z_scores);
652 t->z_score_cnt = cnt;
653 t->missing_type = dsc->missing_type;
654 t->exclude = dsc->exclude;
655 if ( t->missing_type == DSC_LISTWISE )
657 t->var_cnt = dsc->var_cnt;
658 t->vars = xnmalloc (t->var_cnt, sizeof *t->vars);
659 for (i = 0; i < t->var_cnt; i++)
660 t->vars[i] = dsc->vars[i].v;
668 for (cnt = i = 0; i < dsc->var_cnt; i++)
670 struct dsc_var *dv = &dsc->vars[i];
671 if (dv->z_name != NULL)
673 struct dsc_z_score *z;
674 struct variable *dst_var;
676 dst_var = dict_create_var_assert (dataset_dict (ds), dv->z_name, 0);
677 var_set_label (dst_var, xasprintf (_("Z-score of %s"),
678 var_to_string (dv->v)));
680 z = &t->z_scores[cnt++];
683 z->mean = dv->stats[DSC_MEAN];
684 z->std_dev = dv->stats[DSC_STDDEV];
688 add_transformation (ds,
689 descriptives_trns_proc, descriptives_trns_free, t);
692 /* Statistical calculation. */
694 static bool listwise_missing (struct dsc_proc *dsc, const struct ccase *c);
696 /* Calculates and displays descriptive statistics for the cases
699 calc_descriptives (struct dsc_proc *dsc, struct casereader *group,
702 struct casereader *pass1, *pass2;
706 c = casereader_peek (group, 0);
709 casereader_destroy (group);
712 output_split_file_values (ds, c);
715 group = casereader_create_filter_weight (group, dataset_dict (ds),
719 pass2 = dsc->max_moment <= MOMENT_MEAN ? NULL : casereader_clone (pass1);
721 for (i = 0; i < dsc->var_cnt; i++)
723 struct dsc_var *dv = &dsc->vars[i];
725 dv->valid = dv->missing = 0.0;
726 if (dv->moments != NULL)
727 moments_clear (dv->moments);
731 dsc->missing_listwise = 0.;
734 /* First pass to handle most of the work. */
735 for (; (c = casereader_read (pass1)) != NULL; case_unref (c))
737 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
739 /* Check for missing values. */
740 if (listwise_missing (dsc, c))
742 dsc->missing_listwise += weight;
743 if (dsc->missing_type == DSC_LISTWISE)
746 dsc->valid += weight;
748 for (i = 0; i < dsc->var_cnt; i++)
750 struct dsc_var *dv = &dsc->vars[i];
751 double x = case_num (c, dv->v);
753 if (var_is_num_missing (dv->v, x, dsc->exclude))
755 dv->missing += weight;
759 if (dv->moments != NULL)
760 moments_pass_one (dv->moments, x, weight);
768 if (!casereader_destroy (pass1))
770 casereader_destroy (pass2);
774 /* Second pass for higher-order moments. */
775 if (dsc->max_moment > MOMENT_MEAN)
777 for (; (c = casereader_read (pass2)) != NULL; case_unref (c))
779 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
781 /* Check for missing values. */
782 if (dsc->missing_type == DSC_LISTWISE && listwise_missing (dsc, c))
785 for (i = 0; i < dsc->var_cnt; i++)
787 struct dsc_var *dv = &dsc->vars[i];
788 double x = case_num (c, dv->v);
790 if (var_is_num_missing (dv->v, x, dsc->exclude))
793 if (dv->moments != NULL)
794 moments_pass_two (dv->moments, x, weight);
797 if (!casereader_destroy (pass2))
801 /* Calculate results. */
802 for (i = 0; i < dsc->var_cnt; i++)
804 struct dsc_var *dv = &dsc->vars[i];
808 for (j = 0; j < DSC_N_STATS; j++)
809 dv->stats[j] = SYSMIS;
811 dv->valid = W = dsc->valid - dv->missing;
813 if (dv->moments != NULL)
814 moments_calculate (dv->moments, NULL,
815 &dv->stats[DSC_MEAN], &dv->stats[DSC_VARIANCE],
816 &dv->stats[DSC_SKEWNESS], &dv->stats[DSC_KURTOSIS]);
817 if (dsc->calc_stats & (1ul << DSC_SEMEAN)
818 && dv->stats[DSC_VARIANCE] != SYSMIS && W > 0.)
819 dv->stats[DSC_SEMEAN] = sqrt (dv->stats[DSC_VARIANCE]) / sqrt (W);
820 if (dsc->calc_stats & (1ul << DSC_STDDEV)
821 && dv->stats[DSC_VARIANCE] != SYSMIS)
822 dv->stats[DSC_STDDEV] = sqrt (dv->stats[DSC_VARIANCE]);
823 if (dsc->calc_stats & (1ul << DSC_SEKURT))
824 if (dv->stats[DSC_KURTOSIS] != SYSMIS)
825 dv->stats[DSC_SEKURT] = calc_sekurt (W);
826 if (dsc->calc_stats & (1ul << DSC_SESKEW)
827 && dv->stats[DSC_SKEWNESS] != SYSMIS)
828 dv->stats[DSC_SESKEW] = calc_seskew (W);
829 dv->stats[DSC_RANGE] = ((dv->min == DBL_MAX || dv->max == -DBL_MAX)
830 ? SYSMIS : dv->max - dv->min);
831 dv->stats[DSC_MIN] = dv->min == DBL_MAX ? SYSMIS : dv->min;
832 dv->stats[DSC_MAX] = dv->max == -DBL_MAX ? SYSMIS : dv->max;
833 if (dsc->calc_stats & (1ul << DSC_SUM))
834 dv->stats[DSC_SUM] = W * dv->stats[DSC_MEAN];
837 /* Output results. */
841 /* Returns true if any of the descriptives variables in DSC's
842 variable list have missing values in case C, false otherwise. */
844 listwise_missing (struct dsc_proc *dsc, const struct ccase *c)
848 for (i = 0; i < dsc->var_cnt; i++)
850 struct dsc_var *dv = &dsc->vars[i];
851 double x = case_num (c, dv->v);
853 if (var_is_num_missing (dv->v, x, dsc->exclude))
859 /* Statistical display. */
861 static algo_compare_func descriptives_compare_dsc_vars;
863 /* Displays a table of descriptive statistics for DSC. */
865 display (struct dsc_proc *dsc)
871 nc = 1 + (dsc->format == DSC_SERIAL ? 2 : 1);
872 for (i = 0; i < DSC_N_STATS; i++)
873 if (dsc->show_stats & (1ul << i))
876 if (dsc->sort_by_stat != DSC_NONE)
877 sort (dsc->vars, dsc->var_cnt, sizeof *dsc->vars,
878 descriptives_compare_dsc_vars, dsc);
880 t = tab_create (nc, dsc->var_cnt + 1);
881 tab_headers (t, 1, 0, 1, 0);
882 tab_box (t, TAL_1, TAL_1, -1, -1, 0, 0, nc - 1, dsc->var_cnt);
883 tab_box (t, -1, -1, -1, TAL_1, 1, 0, nc - 1, dsc->var_cnt);
884 tab_hline (t, TAL_2, 0, nc - 1, 1);
885 tab_vline (t, TAL_2, 1, 0, dsc->var_cnt);
888 tab_text (t, nc++, 0, TAB_LEFT | TAT_TITLE, _("Variable"));
889 if (dsc->format == DSC_SERIAL)
891 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Valid N"));
892 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Missing N"));
895 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, "N");
897 for (i = 0; i < DSC_N_STATS; i++)
898 if (dsc->show_stats & (1ul << i))
900 const char *title = gettext (dsc_info[i].name);
901 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, title);
904 for (i = 0; i < dsc->var_cnt; i++)
906 struct dsc_var *dv = &dsc->vars[i];
910 tab_text (t, nc++, i + 1, TAB_LEFT, var_get_name (dv->v));
911 tab_text_format (t, nc++, i + 1, 0, "%g", dv->valid);
912 if (dsc->format == DSC_SERIAL)
913 tab_text_format (t, nc++, i + 1, 0, "%g", dv->missing);
915 for (j = 0; j < DSC_N_STATS; j++)
916 if (dsc->show_stats & (1ul << j))
917 tab_double (t, nc++, i + 1, TAB_NONE, dv->stats[j], NULL);
920 tab_title (t, _("Valid cases = %g; cases with missing value(s) = %g."),
921 dsc->valid, dsc->missing_listwise);
926 /* Compares `struct dsc_var's A and B according to the ordering
929 descriptives_compare_dsc_vars (const void *a_, const void *b_, const void *dsc_)
931 const struct dsc_var *a = a_;
932 const struct dsc_var *b = b_;
933 const struct dsc_proc *dsc = dsc_;
937 if (dsc->sort_by_stat == DSC_NAME)
938 result = strcasecmp (var_get_name (a->v), var_get_name (b->v));
941 double as = a->stats[dsc->sort_by_stat];
942 double bs = b->stats[dsc->sort_by_stat];
944 result = as < bs ? -1 : as > bs;
947 if (!dsc->sort_ascending)