1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 This program is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 /* FIXME: Many possible optimizations. */
27 #include <data/case.h>
28 #include <data/casefile.h>
29 #include <data/dictionary.h>
30 #include <data/procedure.h>
31 #include <data/transformations.h>
32 #include <data/variable.h>
33 #include <language/command.h>
34 #include <language/dictionary/split-file.h>
35 #include <language/lexer/lexer.h>
36 #include <language/lexer/variable-parser.h>
37 #include <libpspp/alloc.h>
38 #include <libpspp/array.h>
39 #include <libpspp/compiler.h>
40 #include <libpspp/magic.h>
41 #include <libpspp/message.h>
42 #include <libpspp/assertion.h>
43 #include <math/moments.h>
44 #include <output/manager.h>
45 #include <output/table.h>
48 #define _(msgid) gettext (msgid)
49 #define N_(msgid) msgid
51 /* DESCRIPTIVES private data. */
55 /* Handling of missing values. */
58 DSC_VARIABLE, /* Handle missing values on a per-variable basis. */
59 DSC_LISTWISE /* Discard entire case if any variable is missing. */
62 /* Describes properties of a distribution for the purpose of
63 calculating a Z-score. */
66 const struct variable *src_var; /* Variable on which z-score is based. */
67 struct variable *z_var; /* New z-score variable. */
68 double mean; /* Distribution mean. */
69 double std_dev; /* Distribution standard deviation. */
72 /* DESCRIPTIVES transformation (for calculating Z-scores). */
75 struct dsc_z_score *z_scores; /* Array of Z-scores. */
76 int z_score_cnt; /* Number of Z-scores. */
77 const struct variable **vars; /* Variables for listwise missing checks. */
78 size_t var_cnt; /* Number of variables. */
79 enum dsc_missing_type missing_type; /* Treatment of missing values. */
80 enum mv_class exclude; /* Classes of missing values to exclude. */
83 /* Statistics. Used as bit indexes, so must be 32 or fewer. */
86 DSC_MEAN = 0, DSC_SEMEAN, DSC_STDDEV, DSC_VARIANCE, DSC_KURTOSIS,
87 DSC_SEKURT, DSC_SKEWNESS, DSC_SESKEW, DSC_RANGE, DSC_MIN,
88 DSC_MAX, DSC_SUM, DSC_N_STATS,
90 /* Only valid as sort criteria. */
91 DSC_NAME = -2, /* Sort by name. */
92 DSC_NONE = -1 /* Unsorted. */
95 /* Describes one statistic. */
96 struct dsc_statistic_info
98 const char *identifier; /* Identifier. */
99 const char *name; /* Full name. */
100 enum moment moment; /* Highest moment needed to calculate. */
103 /* Table of statistics, indexed by DSC_*. */
104 static const struct dsc_statistic_info dsc_info[DSC_N_STATS] =
106 {"MEAN", N_("Mean"), MOMENT_MEAN},
107 {"SEMEAN", N_("S E Mean"), MOMENT_VARIANCE},
108 {"STDDEV", N_("Std Dev"), MOMENT_VARIANCE},
109 {"VARIANCE", N_("Variance"), MOMENT_VARIANCE},
110 {"KURTOSIS", N_("Kurtosis"), MOMENT_KURTOSIS},
111 {"SEKURTOSIS", N_("S E Kurt"), MOMENT_NONE},
112 {"SKEWNESS", N_("Skewness"), MOMENT_SKEWNESS},
113 {"SESKEWNESS", N_("S E Skew"), MOMENT_NONE},
114 {"RANGE", N_("Range"), MOMENT_NONE},
115 {"MINIMUM", N_("Minimum"), MOMENT_NONE},
116 {"MAXIMUM", N_("Maximum"), MOMENT_NONE},
117 {"SUM", N_("Sum"), MOMENT_MEAN},
120 /* Statistics calculated by default if none are explicitly
122 #define DEFAULT_STATS \
123 ((1ul << DSC_MEAN) | (1ul << DSC_STDDEV) | (1ul << DSC_MIN) \
126 /* A variable specified on DESCRIPTIVES. */
129 const struct variable *v; /* Variable to calculate on. */
130 char z_name[LONG_NAME_LEN + 1]; /* Name for z-score variable. */
131 double valid, missing; /* Valid, missing counts. */
132 struct moments *moments; /* Moments. */
133 double min, max; /* Maximum and mimimum values. */
134 double stats[DSC_N_STATS]; /* All the stats' values. */
140 DSC_LINE, /* Abbreviated format. */
141 DSC_SERIAL /* Long format. */
144 /* A DESCRIPTIVES procedure. */
147 /* Per-variable info. */
148 struct dsc_var *vars; /* Variables. */
149 size_t var_cnt; /* Number of variables. */
152 enum dsc_missing_type missing_type; /* Treatment of missing values. */
153 enum mv_class exclude; /* Classes of missing values to exclude. */
154 int show_var_labels; /* Nonzero to show variable labels. */
155 int show_index; /* Nonzero to show variable index. */
156 enum dsc_format format; /* Output format. */
158 /* Accumulated results. */
159 double missing_listwise; /* Sum of weights of cases missing listwise. */
160 double valid; /* Sum of weights of valid cases. */
161 bool bad_warn; /* Warn if bad weight found. */
162 enum dsc_statistic sort_by_stat; /* Statistic to sort by; -1: name. */
163 int sort_ascending; /* !0: ascending order; 0: descending. */
164 unsigned long show_stats; /* Statistics to display. */
165 unsigned long calc_stats; /* Statistics to calculate. */
166 enum moment max_moment; /* Highest moment needed for stats. */
170 static enum dsc_statistic match_statistic (struct lexer *);
171 static void free_dsc_proc (struct dsc_proc *);
173 /* Z-score functions. */
174 static bool try_name (const struct dictionary *dict,
175 struct dsc_proc *dsc, const char *name);
176 static bool generate_z_varname (const struct dictionary *dict,
177 struct dsc_proc *dsc, char *z_name,
178 const char *name, int *z_cnt);
179 static void dump_z_table (struct dsc_proc *);
180 static void setup_z_trns (struct dsc_proc *, struct dataset *);
182 /* Procedure execution functions. */
183 static bool calc_descriptives (const struct ccase *first,
184 const struct casefile *, void *dsc_,
185 const struct dataset *);
186 static void display (struct dsc_proc *dsc);
188 /* Parser and outline. */
190 /* Handles DESCRIPTIVES. */
192 cmd_descriptives (struct lexer *lexer, struct dataset *ds)
194 struct dictionary *dict = dataset_dict (ds);
195 struct dsc_proc *dsc;
196 const struct variable **vars = NULL;
198 int save_z_scores = 0;
203 /* Create and initialize dsc. */
204 dsc = xmalloc (sizeof *dsc);
207 dsc->missing_type = DSC_VARIABLE;
208 dsc->exclude = MV_ANY;
209 dsc->show_var_labels = 1;
211 dsc->format = DSC_LINE;
212 dsc->missing_listwise = 0.;
215 dsc->sort_by_stat = DSC_NONE;
216 dsc->sort_ascending = 1;
217 dsc->show_stats = dsc->calc_stats = DEFAULT_STATS;
219 /* Parse DESCRIPTIVES. */
220 while (lex_token (lexer) != '.')
222 if (lex_match_id (lexer, "MISSING"))
224 lex_match (lexer, '=');
225 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
227 if (lex_match_id (lexer, "VARIABLE"))
228 dsc->missing_type = DSC_VARIABLE;
229 else if (lex_match_id (lexer, "LISTWISE"))
230 dsc->missing_type = DSC_LISTWISE;
231 else if (lex_match_id (lexer, "INCLUDE"))
232 dsc->exclude = MV_SYSTEM;
235 lex_error (lexer, NULL);
238 lex_match (lexer, ',');
241 else if (lex_match_id (lexer, "SAVE"))
243 else if (lex_match_id (lexer, "FORMAT"))
245 lex_match (lexer, '=');
246 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
248 if (lex_match_id (lexer, "LABELS"))
249 dsc->show_var_labels = 1;
250 else if (lex_match_id (lexer, "NOLABELS"))
251 dsc->show_var_labels = 0;
252 else if (lex_match_id (lexer, "INDEX"))
254 else if (lex_match_id (lexer, "NOINDEX"))
256 else if (lex_match_id (lexer, "LINE"))
257 dsc->format = DSC_LINE;
258 else if (lex_match_id (lexer, "SERIAL"))
259 dsc->format = DSC_SERIAL;
262 lex_error (lexer, NULL);
265 lex_match (lexer, ',');
268 else if (lex_match_id (lexer, "STATISTICS"))
270 lex_match (lexer, '=');
272 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
274 if (lex_match (lexer, T_ALL))
275 dsc->show_stats |= (1ul << DSC_N_STATS) - 1;
276 else if (lex_match_id (lexer, "DEFAULT"))
277 dsc->show_stats |= DEFAULT_STATS;
279 dsc->show_stats |= 1ul << (match_statistic (lexer));
280 lex_match (lexer, ',');
282 if (dsc->show_stats == 0)
283 dsc->show_stats = DEFAULT_STATS;
285 else if (lex_match_id (lexer, "SORT"))
287 lex_match (lexer, '=');
288 if (lex_match_id (lexer, "NAME"))
289 dsc->sort_by_stat = DSC_NAME;
292 dsc->sort_by_stat = match_statistic (lexer);
293 if (dsc->sort_by_stat == DSC_NONE )
294 dsc->sort_by_stat = DSC_MEAN;
296 if (lex_match (lexer, '('))
298 if (lex_match_id (lexer, "A"))
299 dsc->sort_ascending = 1;
300 else if (lex_match_id (lexer, "D"))
301 dsc->sort_ascending = 0;
303 lex_error (lexer, NULL);
304 lex_force_match (lexer, ')');
307 else if (var_cnt == 0)
309 if (lex_look_ahead (lexer) == '=')
311 lex_match_id (lexer, "VARIABLES");
312 lex_match (lexer, '=');
315 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
319 if (!parse_variables_const (lexer, dataset_dict (ds),
321 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
324 dsc->vars = xnrealloc ((void *)dsc->vars, var_cnt, sizeof *dsc->vars);
325 for (i = dsc->var_cnt; i < var_cnt; i++)
327 struct dsc_var *dv = &dsc->vars[i];
329 dv->z_name[0] = '\0';
332 dsc->var_cnt = var_cnt;
334 if (lex_match (lexer, '('))
336 if (lex_token (lexer) != T_ID)
338 lex_error (lexer, NULL);
341 if (try_name (dict, dsc, lex_tokid (lexer)))
343 strcpy (dsc->vars[dsc->var_cnt - 1].z_name, lex_tokid (lexer));
347 msg (SE, _("Z-score variable name %s would be"
348 " a duplicate variable name."), lex_tokid (lexer));
350 if (!lex_force_match (lexer, ')'))
357 lex_error (lexer, NULL);
361 lex_match (lexer, '/');
365 msg (SE, _("No variables specified."));
369 /* Construct z-score varnames, show translation table. */
370 if (z_cnt || save_z_scores)
376 for (i = 0; i < dsc->var_cnt; i++)
377 if (dsc->vars[i].z_name[0] == 0)
379 if (!generate_z_varname (dict, dsc, dsc->vars[i].z_name,
380 var_get_name (dsc->vars[i].v),
389 /* Figure out statistics to display. */
390 if (dsc->show_stats & (1ul << DSC_SKEWNESS))
391 dsc->show_stats |= 1ul << DSC_SESKEW;
392 if (dsc->show_stats & (1ul << DSC_KURTOSIS))
393 dsc->show_stats |= 1ul << DSC_SEKURT;
395 /* Figure out which statistics to calculate. */
396 dsc->calc_stats = dsc->show_stats;
398 dsc->calc_stats |= (1ul << DSC_MEAN) | (1ul << DSC_STDDEV);
399 if (dsc->sort_by_stat >= 0)
400 dsc->calc_stats |= 1ul << dsc->sort_by_stat;
401 if (dsc->show_stats & (1ul << DSC_SESKEW))
402 dsc->calc_stats |= 1ul << DSC_SKEWNESS;
403 if (dsc->show_stats & (1ul << DSC_SEKURT))
404 dsc->calc_stats |= 1ul << DSC_KURTOSIS;
406 /* Figure out maximum moment needed and allocate moments for
408 dsc->max_moment = MOMENT_NONE;
409 for (i = 0; i < DSC_N_STATS; i++)
410 if (dsc->calc_stats & (1ul << i) && dsc_info[i].moment > dsc->max_moment)
411 dsc->max_moment = dsc_info[i].moment;
412 if (dsc->max_moment != MOMENT_NONE)
413 for (i = 0; i < dsc->var_cnt; i++)
414 dsc->vars[i].moments = moments_create (dsc->max_moment);
417 ok = multipass_procedure_with_splits (ds, calc_descriptives, dsc);
421 setup_z_trns (dsc, ds);
426 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
434 /* Returns the statistic named by the current token and skips past the token.
435 Returns DSC_NONE if no statistic is given (e.g., subcommand with no
436 specifiers). Emits an error if the current token ID does not name a
438 static enum dsc_statistic
439 match_statistic (struct lexer *lexer)
441 if (lex_token (lexer) == T_ID)
443 enum dsc_statistic stat;
445 for (stat = 0; stat < DSC_N_STATS; stat++)
446 if (lex_match_id (lexer, dsc_info[stat].identifier))
450 lex_error (lexer, _("expecting statistic name: reverting to default"));
458 free_dsc_proc (struct dsc_proc *dsc)
465 for (i = 0; i < dsc->var_cnt; i++)
466 moments_destroy (dsc->vars[i].moments);
473 /* Returns false if NAME is a duplicate of any existing variable name or
474 of any previously-declared z-var name; otherwise returns true. */
476 try_name (const struct dictionary *dict, struct dsc_proc *dsc,
481 if (dict_lookup_var (dict, name) != NULL)
483 for (i = 0; i < dsc->var_cnt; i++)
484 if (!strcasecmp (dsc->vars[i].z_name, name))
489 /* Generates a name for a Z-score variable based on a variable
490 named VAR_NAME, given that *Z_CNT generated variable names are
491 known to already exist. If successful, returns true and
492 copies the new name into Z_NAME. On failure, returns false. */
494 generate_z_varname (const struct dictionary *dict, struct dsc_proc *dsc, char *z_name,
495 const char *var_name, int *z_cnt)
497 char name[LONG_NAME_LEN + 1];
499 /* Try a name based on the original variable name. */
501 str_copy_trunc (name + 1, sizeof name - 1, var_name);
502 if (try_name (dict, dsc, name))
504 strcpy (z_name, name);
508 /* Generate a synthetic name. */
514 sprintf (name, "ZSC%03d", *z_cnt);
515 else if (*z_cnt <= 108)
516 sprintf (name, "STDZ%02d", *z_cnt - 99);
517 else if (*z_cnt <= 117)
518 sprintf (name, "ZZZZ%02d", *z_cnt - 108);
519 else if (*z_cnt <= 126)
520 sprintf (name, "ZQZQ%02d", *z_cnt - 117);
523 msg (SE, _("Ran out of generic names for Z-score variables. "
524 "There are only 126 generic names: ZSC001-ZSC0999, "
525 "STDZ01-STDZ09, ZZZZ01-ZZZZ09, ZQZQ01-ZQZQ09."));
529 if (try_name (dict, dsc, name))
531 strcpy (z_name, name);
538 /* Outputs a table describing the mapping between source
539 variables and Z-score variables. */
541 dump_z_table (struct dsc_proc *dsc)
549 for (i = 0; i < dsc->var_cnt; i++)
550 if (dsc->vars[i].z_name[0] != '\0')
554 t = tab_create (2, cnt + 1, 0);
555 tab_title (t, _("Mapping of variables to corresponding Z-scores."));
556 tab_columns (t, SOM_COL_DOWN, 1);
557 tab_headers (t, 0, 0, 1, 0);
558 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, cnt);
559 tab_hline (t, TAL_2, 0, 1, 1);
560 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Source"));
561 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Target"));
562 tab_dim (t, tab_natural_dimensions);
567 for (i = 0, y = 1; i < dsc->var_cnt; i++)
568 if (dsc->vars[i].z_name[0] != '\0')
570 tab_text (t, 0, y, TAB_LEFT, var_get_name (dsc->vars[i].v));
571 tab_text (t, 1, y++, TAB_LEFT, dsc->vars[i].z_name);
578 /* Transformation function to calculate Z-scores. Will return SYSMIS if any of
579 the following are true: 1) mean or standard deviation is SYSMIS 2) score is
580 SYSMIS 3) score is user missing and they were not included in the original
581 analyis. 4) any of the variables in the original analysis were missing
582 (either system or user-missing values that weren't included).
585 descriptives_trns_proc (void *trns_, struct ccase * c,
586 casenumber case_idx UNUSED)
588 struct dsc_trns *t = trns_;
589 struct dsc_z_score *z;
590 const struct variable **vars;
593 if (t->missing_type == DSC_LISTWISE)
596 for (vars = t->vars; vars < t->vars + t->var_cnt; vars++)
598 double score = case_num (c, *vars);
599 if (var_is_num_missing (*vars, score, t->exclude))
607 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
609 double input = case_num (c, z->src_var);
610 double *output = &case_data_rw (c, z->z_var)->f;
612 if (z->mean == SYSMIS || z->std_dev == SYSMIS || all_sysmis
613 || var_is_num_missing (z->src_var, input, t->exclude))
616 *output = (input - z->mean) / z->std_dev;
618 return TRNS_CONTINUE;
621 /* Frees a descriptives_trns struct. */
623 descriptives_trns_free (void *trns_)
625 struct dsc_trns *t = trns_;
628 assert((t->missing_type != DSC_LISTWISE) ^ (t->vars != NULL));
633 /* Sets up a transformation to calculate Z scores. */
635 setup_z_trns (struct dsc_proc *dsc, struct dataset *ds)
640 for (cnt = i = 0; i < dsc->var_cnt; i++)
641 if (dsc->vars[i].z_name[0] != '\0')
644 t = xmalloc (sizeof *t);
645 t->z_scores = xnmalloc (cnt, sizeof *t->z_scores);
646 t->z_score_cnt = cnt;
647 t->missing_type = dsc->missing_type;
648 t->exclude = dsc->exclude;
649 if ( t->missing_type == DSC_LISTWISE )
651 t->var_cnt = dsc->var_cnt;
652 t->vars = xnmalloc (t->var_cnt, sizeof *t->vars);
653 for (i = 0; i < t->var_cnt; i++)
654 t->vars[i] = dsc->vars[i].v;
662 for (cnt = i = 0; i < dsc->var_cnt; i++)
664 struct dsc_var *dv = &dsc->vars[i];
665 if (dv->z_name[0] != '\0')
667 struct dsc_z_score *z;
668 struct variable *dst_var;
670 dst_var = dict_create_var_assert (dataset_dict (ds), dv->z_name, 0);
671 var_set_label (dst_var, xasprintf (_("Z-score of %s"),
672 var_to_string (dv->v)));
674 z = &t->z_scores[cnt++];
677 z->mean = dv->stats[DSC_MEAN];
678 z->std_dev = dv->stats[DSC_STDDEV];
682 add_transformation (ds,
683 descriptives_trns_proc, descriptives_trns_free, t);
686 /* Statistical calculation. */
688 static bool listwise_missing (struct dsc_proc *dsc, const struct ccase *c);
690 /* Calculates and displays descriptive statistics for the cases
693 calc_descriptives (const struct ccase *first,
694 const struct casefile *cf, void *dsc_,
695 const struct dataset *ds)
697 struct dsc_proc *dsc = dsc_;
698 struct casereader *reader;
702 output_split_file_values (ds, first);
704 for (i = 0; i < dsc->var_cnt; i++)
706 struct dsc_var *dv = &dsc->vars[i];
708 dv->valid = dv->missing = 0.0;
709 if (dv->moments != NULL)
710 moments_clear (dv->moments);
714 dsc->missing_listwise = 0.;
717 /* First pass to handle most of the work. */
718 for (reader = casefile_get_reader (cf, NULL);
719 casereader_read (reader, &c);
722 double weight = dict_get_case_weight (dataset_dict (ds), &c, &dsc->bad_warn);
726 /* Check for missing values. */
727 if (listwise_missing (dsc, &c))
729 dsc->missing_listwise += weight;
730 if (dsc->missing_type == DSC_LISTWISE)
733 dsc->valid += weight;
735 for (i = 0; i < dsc->var_cnt; i++)
737 struct dsc_var *dv = &dsc->vars[i];
738 double x = case_num (&c, dv->v);
740 if (dsc->missing_type != DSC_LISTWISE
741 && var_is_num_missing (dv->v, x, dsc->exclude))
743 dv->missing += weight;
747 if (dv->moments != NULL)
748 moments_pass_one (dv->moments, x, weight);
756 casereader_destroy (reader);
758 /* Second pass for higher-order moments. */
759 if (dsc->max_moment > MOMENT_MEAN)
761 for (reader = casefile_get_reader (cf, NULL);
762 casereader_read (reader, &c);
765 double weight = dict_get_case_weight (dataset_dict (ds), &c,
770 /* Check for missing values. */
771 if (dsc->missing_type == DSC_LISTWISE && listwise_missing (dsc, &c))
774 for (i = 0; i < dsc->var_cnt; i++)
776 struct dsc_var *dv = &dsc->vars[i];
777 double x = case_num (&c, dv->v);
779 if (dsc->missing_type != DSC_LISTWISE
780 && var_is_num_missing (dv->v, x, dsc->exclude))
783 if (dv->moments != NULL)
784 moments_pass_two (dv->moments, x, weight);
787 casereader_destroy (reader);
790 /* Calculate results. */
791 for (i = 0; i < dsc->var_cnt; i++)
793 struct dsc_var *dv = &dsc->vars[i];
797 for (j = 0; j < DSC_N_STATS; j++)
798 dv->stats[j] = SYSMIS;
800 dv->valid = W = dsc->valid - dv->missing;
802 if (dv->moments != NULL)
803 moments_calculate (dv->moments, NULL,
804 &dv->stats[DSC_MEAN], &dv->stats[DSC_VARIANCE],
805 &dv->stats[DSC_SKEWNESS], &dv->stats[DSC_KURTOSIS]);
806 if (dsc->calc_stats & (1ul << DSC_SEMEAN)
807 && dv->stats[DSC_VARIANCE] != SYSMIS && W > 0.)
808 dv->stats[DSC_SEMEAN] = sqrt (dv->stats[DSC_VARIANCE]) / sqrt (W);
809 if (dsc->calc_stats & (1ul << DSC_STDDEV)
810 && dv->stats[DSC_VARIANCE] != SYSMIS)
811 dv->stats[DSC_STDDEV] = sqrt (dv->stats[DSC_VARIANCE]);
812 if (dsc->calc_stats & (1ul << DSC_SEKURT))
813 if (dv->stats[DSC_KURTOSIS] != SYSMIS)
814 dv->stats[DSC_SEKURT] = calc_sekurt (W);
815 if (dsc->calc_stats & (1ul << DSC_SESKEW)
816 && dv->stats[DSC_SKEWNESS] != SYSMIS)
817 dv->stats[DSC_SESKEW] = calc_seskew (W);
818 dv->stats[DSC_RANGE] = ((dv->min == DBL_MAX || dv->max == -DBL_MAX)
819 ? SYSMIS : dv->max - dv->min);
820 dv->stats[DSC_MIN] = dv->min == DBL_MAX ? SYSMIS : dv->min;
821 dv->stats[DSC_MAX] = dv->max == -DBL_MAX ? SYSMIS : dv->max;
822 if (dsc->calc_stats & (1ul << DSC_SUM))
823 dv->stats[DSC_SUM] = W * dv->stats[DSC_MEAN];
826 /* Output results. */
832 /* Returns true if any of the descriptives variables in DSC's
833 variable list have missing values in case C, false otherwise. */
835 listwise_missing (struct dsc_proc *dsc, const struct ccase *c)
839 for (i = 0; i < dsc->var_cnt; i++)
841 struct dsc_var *dv = &dsc->vars[i];
842 double x = case_num (c, dv->v);
844 if (var_is_num_missing (dv->v, x, dsc->exclude))
850 /* Statistical display. */
852 static algo_compare_func descriptives_compare_dsc_vars;
854 /* Displays a table of descriptive statistics for DSC. */
856 display (struct dsc_proc *dsc)
862 nc = 1 + (dsc->format == DSC_SERIAL ? 2 : 1);
863 for (i = 0; i < DSC_N_STATS; i++)
864 if (dsc->show_stats & (1ul << i))
867 if (dsc->sort_by_stat != DSC_NONE)
868 sort (dsc->vars, dsc->var_cnt, sizeof *dsc->vars,
869 descriptives_compare_dsc_vars, dsc);
871 t = tab_create (nc, dsc->var_cnt + 1, 0);
872 tab_headers (t, 1, 0, 1, 0);
873 tab_box (t, TAL_1, TAL_1, -1, -1, 0, 0, nc - 1, dsc->var_cnt);
874 tab_box (t, -1, -1, -1, TAL_1, 1, 0, nc - 1, dsc->var_cnt);
875 tab_hline (t, TAL_2, 0, nc - 1, 1);
876 tab_vline (t, TAL_2, 1, 0, dsc->var_cnt);
877 tab_dim (t, tab_natural_dimensions);
880 tab_text (t, nc++, 0, TAB_LEFT | TAT_TITLE, _("Variable"));
881 if (dsc->format == DSC_SERIAL)
883 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Valid N"));
884 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Missing N"));
887 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, "N");
889 for (i = 0; i < DSC_N_STATS; i++)
890 if (dsc->show_stats & (1ul << i))
892 const char *title = gettext (dsc_info[i].name);
893 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, title);
896 for (i = 0; i < dsc->var_cnt; i++)
898 struct dsc_var *dv = &dsc->vars[i];
902 tab_text (t, nc++, i + 1, TAB_LEFT, var_get_name (dv->v));
903 tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", dv->valid);
904 if (dsc->format == DSC_SERIAL)
905 tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", dv->missing);
906 for (j = 0; j < DSC_N_STATS; j++)
907 if (dsc->show_stats & (1ul << j))
908 tab_float (t, nc++, i + 1, TAB_NONE, dv->stats[j], 10, 3);
911 tab_title (t, _("Valid cases = %g; cases with missing value(s) = %g."),
912 dsc->valid, dsc->missing_listwise);
917 /* Compares `struct dsc_var's A and B according to the ordering
920 descriptives_compare_dsc_vars (const void *a_, const void *b_, const void *dsc_)
922 const struct dsc_var *a = a_;
923 const struct dsc_var *b = b_;
924 const struct dsc_proc *dsc = dsc_;
928 if (dsc->sort_by_stat == DSC_NAME)
929 result = strcasecmp (var_get_name (a->v), var_get_name (b->v));
932 double as = a->stats[dsc->sort_by_stat];
933 double bs = b->stats[dsc->sort_by_stat];
935 result = as < bs ? -1 : as > bs;
938 if (!dsc->sort_ascending)