1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 This program is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 /* FIXME: Many possible optimizations. */
27 #include <data/case.h>
28 #include <data/casefile.h>
29 #include <data/dictionary.h>
30 #include <data/procedure.h>
31 #include <data/transformations.h>
32 #include <data/variable.h>
33 #include <language/command.h>
34 #include <language/dictionary/split-file.h>
35 #include <language/lexer/lexer.h>
36 #include <language/lexer/variable-parser.h>
37 #include <libpspp/alloc.h>
38 #include <libpspp/array.h>
39 #include <libpspp/compiler.h>
40 #include <libpspp/magic.h>
41 #include <libpspp/message.h>
42 #include <libpspp/assertion.h>
43 #include <math/moments.h>
44 #include <output/manager.h>
45 #include <output/table.h>
48 #define _(msgid) gettext (msgid)
49 #define N_(msgid) msgid
51 /* DESCRIPTIVES private data. */
55 /* Handling of missing values. */
58 DSC_VARIABLE, /* Handle missing values on a per-variable basis. */
59 DSC_LISTWISE /* Discard entire case if any variable is missing. */
62 /* Describes properties of a distribution for the purpose of
63 calculating a Z-score. */
66 struct variable *src_var; /* Variable on which z-score is based. */
67 struct variable *z_var; /* New z-score variable. */
68 double mean; /* Distribution mean. */
69 double std_dev; /* Distribution standard deviation. */
72 /* DESCRIPTIVES transformation (for calculating Z-scores). */
75 struct dsc_z_score *z_scores; /* Array of Z-scores. */
76 int z_score_cnt; /* Number of Z-scores. */
77 struct variable **vars; /* Variables for listwise missing checks. */
78 size_t var_cnt; /* Number of variables. */
79 enum dsc_missing_type missing_type; /* Treatment of missing values. */
80 int include_user_missing; /* Nonzero to include user-missing values. */
83 /* Statistics. Used as bit indexes, so must be 32 or fewer. */
86 DSC_MEAN = 0, DSC_SEMEAN, DSC_STDDEV, DSC_VARIANCE, DSC_KURTOSIS,
87 DSC_SEKURT, DSC_SKEWNESS, DSC_SESKEW, DSC_RANGE, DSC_MIN,
88 DSC_MAX, DSC_SUM, DSC_N_STATS,
90 /* Only valid as sort criteria. */
91 DSC_NAME = -2, /* Sort by name. */
92 DSC_NONE = -1 /* Unsorted. */
95 /* Describes one statistic. */
96 struct dsc_statistic_info
98 const char *identifier; /* Identifier. */
99 const char *name; /* Full name. */
100 enum moment moment; /* Highest moment needed to calculate. */
103 /* Table of statistics, indexed by DSC_*. */
104 static const struct dsc_statistic_info dsc_info[DSC_N_STATS] =
106 {"MEAN", N_("Mean"), MOMENT_MEAN},
107 {"SEMEAN", N_("S E Mean"), MOMENT_VARIANCE},
108 {"STDDEV", N_("Std Dev"), MOMENT_VARIANCE},
109 {"VARIANCE", N_("Variance"), MOMENT_VARIANCE},
110 {"KURTOSIS", N_("Kurtosis"), MOMENT_KURTOSIS},
111 {"SEKURTOSIS", N_("S E Kurt"), MOMENT_NONE},
112 {"SKEWNESS", N_("Skewness"), MOMENT_SKEWNESS},
113 {"SESKEWNESS", N_("S E Skew"), MOMENT_NONE},
114 {"RANGE", N_("Range"), MOMENT_NONE},
115 {"MINIMUM", N_("Minimum"), MOMENT_NONE},
116 {"MAXIMUM", N_("Maximum"), MOMENT_NONE},
117 {"SUM", N_("Sum"), MOMENT_MEAN},
120 /* Statistics calculated by default if none are explicitly
122 #define DEFAULT_STATS \
123 ((1ul << DSC_MEAN) | (1ul << DSC_STDDEV) | (1ul << DSC_MIN) \
126 /* A variable specified on DESCRIPTIVES. */
129 struct variable *v; /* Variable to calculate on. */
130 char z_name[LONG_NAME_LEN + 1]; /* Name for z-score variable. */
131 double valid, missing; /* Valid, missing counts. */
132 struct moments *moments; /* Moments. */
133 double min, max; /* Maximum and mimimum values. */
134 double stats[DSC_N_STATS]; /* All the stats' values. */
140 DSC_LINE, /* Abbreviated format. */
141 DSC_SERIAL /* Long format. */
144 /* A DESCRIPTIVES procedure. */
147 /* Per-variable info. */
148 struct dsc_var *vars; /* Variables. */
149 size_t var_cnt; /* Number of variables. */
152 enum dsc_missing_type missing_type; /* Treatment of missing values. */
153 int include_user_missing; /* Nonzero to include user-missing values. */
154 int show_var_labels; /* Nonzero to show variable labels. */
155 int show_index; /* Nonzero to show variable index. */
156 enum dsc_format format; /* Output format. */
158 /* Accumulated results. */
159 double missing_listwise; /* Sum of weights of cases missing listwise. */
160 double valid; /* Sum of weights of valid cases. */
161 bool bad_warn; /* Warn if bad weight found. */
162 enum dsc_statistic sort_by_stat; /* Statistic to sort by; -1: name. */
163 int sort_ascending; /* !0: ascending order; 0: descending. */
164 unsigned long show_stats; /* Statistics to display. */
165 unsigned long calc_stats; /* Statistics to calculate. */
166 enum moment max_moment; /* Highest moment needed for stats. */
170 static enum dsc_statistic match_statistic (struct lexer *);
171 static void free_dsc_proc (struct dsc_proc *);
173 /* Z-score functions. */
174 static bool try_name (const struct dictionary *dict,
175 struct dsc_proc *dsc, const char *name);
176 static bool generate_z_varname (const struct dictionary *dict,
177 struct dsc_proc *dsc, char *z_name,
178 const char *name, size_t *z_cnt);
179 static void dump_z_table (struct dsc_proc *);
180 static void setup_z_trns (struct dsc_proc *, struct dataset *);
182 /* Procedure execution functions. */
183 static bool calc_descriptives (const struct ccase *first,
184 const struct casefile *, void *dsc_,
185 const struct dataset *);
186 static void display (struct dsc_proc *dsc);
188 /* Parser and outline. */
190 /* Handles DESCRIPTIVES. */
192 cmd_descriptives (struct lexer *lexer, struct dataset *ds)
194 struct dictionary *dict = dataset_dict (ds);
195 struct dsc_proc *dsc;
196 struct variable **vars = NULL;
198 int save_z_scores = 0;
203 /* Create and initialize dsc. */
204 dsc = xmalloc (sizeof *dsc);
207 dsc->missing_type = DSC_VARIABLE;
208 dsc->include_user_missing = 0;
209 dsc->show_var_labels = 1;
211 dsc->format = DSC_LINE;
212 dsc->missing_listwise = 0.;
215 dsc->sort_by_stat = DSC_NONE;
216 dsc->sort_ascending = 1;
217 dsc->show_stats = dsc->calc_stats = DEFAULT_STATS;
219 /* Parse DESCRIPTIVES. */
220 while (lex_token (lexer) != '.')
222 if (lex_match_id (lexer, "MISSING"))
224 lex_match (lexer, '=');
225 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
227 if (lex_match_id (lexer, "VARIABLE"))
228 dsc->missing_type = DSC_VARIABLE;
229 else if (lex_match_id (lexer, "LISTWISE"))
230 dsc->missing_type = DSC_LISTWISE;
231 else if (lex_match_id (lexer, "INCLUDE"))
232 dsc->include_user_missing = 1;
235 lex_error (lexer, NULL);
238 lex_match (lexer, ',');
241 else if (lex_match_id (lexer, "SAVE"))
243 else if (lex_match_id (lexer, "FORMAT"))
245 lex_match (lexer, '=');
246 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
248 if (lex_match_id (lexer, "LABELS"))
249 dsc->show_var_labels = 1;
250 else if (lex_match_id (lexer, "NOLABELS"))
251 dsc->show_var_labels = 0;
252 else if (lex_match_id (lexer, "INDEX"))
254 else if (lex_match_id (lexer, "NOINDEX"))
256 else if (lex_match_id (lexer, "LINE"))
257 dsc->format = DSC_LINE;
258 else if (lex_match_id (lexer, "SERIAL"))
259 dsc->format = DSC_SERIAL;
262 lex_error (lexer, NULL);
265 lex_match (lexer, ',');
268 else if (lex_match_id (lexer, "STATISTICS"))
270 lex_match (lexer, '=');
272 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
274 if (lex_match (lexer, T_ALL))
275 dsc->show_stats |= (1ul << DSC_N_STATS) - 1;
276 else if (lex_match_id (lexer, "DEFAULT"))
277 dsc->show_stats |= DEFAULT_STATS;
279 dsc->show_stats |= 1ul << (match_statistic (lexer));
280 lex_match (lexer, ',');
282 if (dsc->show_stats == 0)
283 dsc->show_stats = DEFAULT_STATS;
285 else if (lex_match_id (lexer, "SORT"))
287 lex_match (lexer, '=');
288 if (lex_match_id (lexer, "NAME"))
289 dsc->sort_by_stat = DSC_NAME;
292 dsc->sort_by_stat = match_statistic (lexer);
293 if (dsc->sort_by_stat == DSC_NONE )
294 dsc->sort_by_stat = DSC_MEAN;
296 if (lex_match (lexer, '('))
298 if (lex_match_id (lexer, "A"))
299 dsc->sort_ascending = 1;
300 else if (lex_match_id (lexer, "D"))
301 dsc->sort_ascending = 0;
303 lex_error (lexer, NULL);
304 lex_force_match (lexer, ')');
307 else if (var_cnt == 0)
309 if (lex_look_ahead (lexer) == '=')
311 lex_match_id (lexer, "VARIABLES");
312 lex_match (lexer, '=');
315 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
319 if (!parse_variables (lexer, dataset_dict (ds), &vars, &var_cnt,
320 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
323 dsc->vars = xnrealloc (dsc->vars, var_cnt, sizeof *dsc->vars);
324 for (i = dsc->var_cnt; i < var_cnt; i++)
326 struct dsc_var *dv = &dsc->vars[i];
328 dv->z_name[0] = '\0';
331 dsc->var_cnt = var_cnt;
333 if (lex_match (lexer, '('))
335 if (lex_token (lexer) != T_ID)
337 lex_error (lexer, NULL);
340 if (try_name (dict, dsc, lex_tokid (lexer)))
342 strcpy (dsc->vars[dsc->var_cnt - 1].z_name, lex_tokid (lexer));
346 msg (SE, _("Z-score variable name %s would be"
347 " a duplicate variable name."), lex_tokid (lexer));
349 if (!lex_force_match (lexer, ')'))
356 lex_error (lexer, NULL);
360 lex_match (lexer, '/');
364 msg (SE, _("No variables specified."));
368 /* Construct z-score varnames, show translation table. */
369 if (z_cnt || save_z_scores)
375 for (i = 0; i < dsc->var_cnt; i++)
376 if (dsc->vars[i].z_name[0] == 0)
378 if (!generate_z_varname (dict, dsc, dsc->vars[i].z_name,
379 var_get_name (dsc->vars[i].v),
388 /* Figure out statistics to display. */
389 if (dsc->show_stats & (1ul << DSC_SKEWNESS))
390 dsc->show_stats |= 1ul << DSC_SESKEW;
391 if (dsc->show_stats & (1ul << DSC_KURTOSIS))
392 dsc->show_stats |= 1ul << DSC_SEKURT;
394 /* Figure out which statistics to calculate. */
395 dsc->calc_stats = dsc->show_stats;
397 dsc->calc_stats |= (1ul << DSC_MEAN) | (1ul << DSC_STDDEV);
398 if (dsc->sort_by_stat >= 0)
399 dsc->calc_stats |= 1ul << dsc->sort_by_stat;
400 if (dsc->show_stats & (1ul << DSC_SESKEW))
401 dsc->calc_stats |= 1ul << DSC_SKEWNESS;
402 if (dsc->show_stats & (1ul << DSC_SEKURT))
403 dsc->calc_stats |= 1ul << DSC_KURTOSIS;
405 /* Figure out maximum moment needed and allocate moments for
407 dsc->max_moment = MOMENT_NONE;
408 for (i = 0; i < DSC_N_STATS; i++)
409 if (dsc->calc_stats & (1ul << i) && dsc_info[i].moment > dsc->max_moment)
410 dsc->max_moment = dsc_info[i].moment;
411 if (dsc->max_moment != MOMENT_NONE)
412 for (i = 0; i < dsc->var_cnt; i++)
413 dsc->vars[i].moments = moments_create (dsc->max_moment);
416 ok = multipass_procedure_with_splits (ds, calc_descriptives, dsc);
420 setup_z_trns (dsc, ds);
425 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
433 /* Returns the statistic named by the current token and skips past the token.
434 Returns DSC_NONE if no statistic is given (e.g., subcommand with no
435 specifiers). Emits an error if the current token ID does not name a
437 static enum dsc_statistic
438 match_statistic (struct lexer *lexer)
440 if (lex_token (lexer) == T_ID)
442 enum dsc_statistic stat;
444 for (stat = 0; stat < DSC_N_STATS; stat++)
445 if (lex_match_id (lexer, dsc_info[stat].identifier))
449 lex_error (lexer, _("expecting statistic name: reverting to default"));
457 free_dsc_proc (struct dsc_proc *dsc)
464 for (i = 0; i < dsc->var_cnt; i++)
465 moments_destroy (dsc->vars[i].moments);
472 /* Returns false if NAME is a duplicate of any existing variable name or
473 of any previously-declared z-var name; otherwise returns true. */
475 try_name (const struct dictionary *dict, struct dsc_proc *dsc,
480 if (dict_lookup_var (dict, name) != NULL)
482 for (i = 0; i < dsc->var_cnt; i++)
483 if (!strcasecmp (dsc->vars[i].z_name, name))
488 /* Generates a name for a Z-score variable based on a variable
489 named VAR_NAME, given that *Z_CNT generated variable names are
490 known to already exist. If successful, returns true and
491 copies the new name into Z_NAME. On failure, returns false. */
493 generate_z_varname (const struct dictionary *dict, struct dsc_proc *dsc, char *z_name,
494 const char *var_name, size_t *z_cnt)
496 char name[LONG_NAME_LEN + 1];
498 /* Try a name based on the original variable name. */
500 str_copy_trunc (name + 1, sizeof name - 1, var_name);
501 if (try_name (dict, dsc, name))
503 strcpy (z_name, name);
507 /* Generate a synthetic name. */
513 sprintf (name, "ZSC%03d", *z_cnt);
514 else if (*z_cnt <= 108)
515 sprintf (name, "STDZ%02d", *z_cnt - 99);
516 else if (*z_cnt <= 117)
517 sprintf (name, "ZZZZ%02d", *z_cnt - 108);
518 else if (*z_cnt <= 126)
519 sprintf (name, "ZQZQ%02d", *z_cnt - 117);
522 msg (SE, _("Ran out of generic names for Z-score variables. "
523 "There are only 126 generic names: ZSC001-ZSC0999, "
524 "STDZ01-STDZ09, ZZZZ01-ZZZZ09, ZQZQ01-ZQZQ09."));
528 if (try_name (dict, dsc, name))
530 strcpy (z_name, name);
537 /* Outputs a table describing the mapping between source
538 variables and Z-score variables. */
540 dump_z_table (struct dsc_proc *dsc)
548 for (i = 0; i < dsc->var_cnt; i++)
549 if (dsc->vars[i].z_name[0] != '\0')
553 t = tab_create (2, cnt + 1, 0);
554 tab_title (t, _("Mapping of variables to corresponding Z-scores."));
555 tab_columns (t, SOM_COL_DOWN, 1);
556 tab_headers (t, 0, 0, 1, 0);
557 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, cnt);
558 tab_hline (t, TAL_2, 0, 1, 1);
559 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Source"));
560 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Target"));
561 tab_dim (t, tab_natural_dimensions);
566 for (i = 0, y = 1; i < dsc->var_cnt; i++)
567 if (dsc->vars[i].z_name[0] != '\0')
569 tab_text (t, 0, y, TAB_LEFT, var_get_name (dsc->vars[i].v));
570 tab_text (t, 1, y++, TAB_LEFT, dsc->vars[i].z_name);
577 /* Transformation function to calculate Z-scores. Will return SYSMIS if any of
578 the following are true: 1) mean or standard deviation is SYSMIS 2) score is
579 SYSMIS 3) score is user missing and they were not included in the original
580 analyis. 4) any of the variables in the original analysis were missing
581 (either system or user-missing values that weren't included).
584 descriptives_trns_proc (void *trns_, struct ccase * c,
585 casenumber case_idx UNUSED)
587 struct dsc_trns *t = trns_;
588 struct dsc_z_score *z;
589 struct variable **vars;
592 if (t->missing_type == DSC_LISTWISE)
595 for (vars = t->vars; vars < t->vars + t->var_cnt; vars++)
597 double score = case_num (c, *vars);
599 || (!t->include_user_missing
600 && var_is_num_user_missing (*vars, score)))
608 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
610 double input = case_num (c, z->src_var);
611 double *output = &case_data_rw (c, z->z_var)->f;
613 if (z->mean == SYSMIS || z->std_dev == SYSMIS
614 || all_sysmis || input == SYSMIS
615 || (!t->include_user_missing
616 && var_is_num_user_missing (z->src_var, input)))
619 *output = (input - z->mean) / z->std_dev;
621 return TRNS_CONTINUE;
624 /* Frees a descriptives_trns struct. */
626 descriptives_trns_free (void *trns_)
628 struct dsc_trns *t = trns_;
631 assert((t->missing_type != DSC_LISTWISE) ^ (t->vars != NULL));
636 /* Sets up a transformation to calculate Z scores. */
638 setup_z_trns (struct dsc_proc *dsc, struct dataset *ds)
643 for (cnt = i = 0; i < dsc->var_cnt; i++)
644 if (dsc->vars[i].z_name[0] != '\0')
647 t = xmalloc (sizeof *t);
648 t->z_scores = xnmalloc (cnt, sizeof *t->z_scores);
649 t->z_score_cnt = cnt;
650 t->missing_type = dsc->missing_type;
651 t->include_user_missing = dsc->include_user_missing;
652 if ( t->missing_type == DSC_LISTWISE )
654 t->var_cnt = dsc->var_cnt;
655 t->vars = xnmalloc (t->var_cnt, sizeof *t->vars);
656 for (i = 0; i < t->var_cnt; i++)
657 t->vars[i] = dsc->vars[i].v;
665 for (cnt = i = 0; i < dsc->var_cnt; i++)
667 struct dsc_var *dv = &dsc->vars[i];
668 if (dv->z_name[0] != '\0')
670 struct dsc_z_score *z;
671 struct variable *dst_var;
673 dst_var = dict_create_var_assert (dataset_dict (ds), dv->z_name, 0);
674 var_set_label (dst_var, xasprintf (_("Z-score of %s"),
675 var_to_string (dv->v)));
677 z = &t->z_scores[cnt++];
680 z->mean = dv->stats[DSC_MEAN];
681 z->std_dev = dv->stats[DSC_STDDEV];
685 add_transformation (ds,
686 descriptives_trns_proc, descriptives_trns_free, t);
689 /* Statistical calculation. */
691 static bool listwise_missing (struct dsc_proc *dsc, const struct ccase *c);
693 /* Calculates and displays descriptive statistics for the cases
696 calc_descriptives (const struct ccase *first,
697 const struct casefile *cf, void *dsc_,
698 const struct dataset *ds)
700 struct dsc_proc *dsc = dsc_;
701 struct casereader *reader;
705 output_split_file_values (ds, first);
707 for (i = 0; i < dsc->var_cnt; i++)
709 struct dsc_var *dv = &dsc->vars[i];
711 dv->valid = dv->missing = 0.0;
712 if (dv->moments != NULL)
713 moments_clear (dv->moments);
717 dsc->missing_listwise = 0.;
720 /* First pass to handle most of the work. */
721 for (reader = casefile_get_reader (cf, NULL);
722 casereader_read (reader, &c);
725 double weight = dict_get_case_weight (dataset_dict (ds), &c, &dsc->bad_warn);
729 /* Check for missing values. */
730 if (listwise_missing (dsc, &c))
732 dsc->missing_listwise += weight;
733 if (dsc->missing_type == DSC_LISTWISE)
736 dsc->valid += weight;
738 for (i = 0; i < dsc->var_cnt; i++)
740 struct dsc_var *dv = &dsc->vars[i];
741 double x = case_num (&c, dv->v);
743 if (dsc->missing_type != DSC_LISTWISE
745 || (!dsc->include_user_missing
746 && var_is_num_user_missing (dv->v, x))))
748 dv->missing += weight;
752 if (dv->moments != NULL)
753 moments_pass_one (dv->moments, x, weight);
761 casereader_destroy (reader);
763 /* Second pass for higher-order moments. */
764 if (dsc->max_moment > MOMENT_MEAN)
766 for (reader = casefile_get_reader (cf, NULL);
767 casereader_read (reader, &c);
770 double weight = dict_get_case_weight (dataset_dict (ds), &c,
775 /* Check for missing values. */
776 if (listwise_missing (dsc, &c)
777 && dsc->missing_type == DSC_LISTWISE)
780 for (i = 0; i < dsc->var_cnt; i++)
782 struct dsc_var *dv = &dsc->vars[i];
783 double x = case_num (&c, dv->v);
785 if (dsc->missing_type != DSC_LISTWISE
787 || (!dsc->include_user_missing
788 && var_is_num_user_missing (dv->v, x))))
791 if (dv->moments != NULL)
792 moments_pass_two (dv->moments, x, weight);
795 casereader_destroy (reader);
798 /* Calculate results. */
799 for (i = 0; i < dsc->var_cnt; i++)
801 struct dsc_var *dv = &dsc->vars[i];
805 for (j = 0; j < DSC_N_STATS; j++)
806 dv->stats[j] = SYSMIS;
808 dv->valid = W = dsc->valid - dv->missing;
810 if (dv->moments != NULL)
811 moments_calculate (dv->moments, NULL,
812 &dv->stats[DSC_MEAN], &dv->stats[DSC_VARIANCE],
813 &dv->stats[DSC_SKEWNESS], &dv->stats[DSC_KURTOSIS]);
814 if (dsc->calc_stats & (1ul << DSC_SEMEAN)
815 && dv->stats[DSC_VARIANCE] != SYSMIS && W > 0.)
816 dv->stats[DSC_SEMEAN] = sqrt (dv->stats[DSC_VARIANCE]) / sqrt (W);
817 if (dsc->calc_stats & (1ul << DSC_STDDEV)
818 && dv->stats[DSC_VARIANCE] != SYSMIS)
819 dv->stats[DSC_STDDEV] = sqrt (dv->stats[DSC_VARIANCE]);
820 if (dsc->calc_stats & (1ul << DSC_SEKURT))
821 if (dv->stats[DSC_KURTOSIS] != SYSMIS)
822 dv->stats[DSC_SEKURT] = calc_sekurt (W);
823 if (dsc->calc_stats & (1ul << DSC_SESKEW)
824 && dv->stats[DSC_SKEWNESS] != SYSMIS)
825 dv->stats[DSC_SESKEW] = calc_seskew (W);
826 dv->stats[DSC_RANGE] = ((dv->min == DBL_MAX || dv->max == -DBL_MAX)
827 ? SYSMIS : dv->max - dv->min);
828 dv->stats[DSC_MIN] = dv->min == DBL_MAX ? SYSMIS : dv->min;
829 dv->stats[DSC_MAX] = dv->max == -DBL_MAX ? SYSMIS : dv->max;
830 if (dsc->calc_stats & (1ul << DSC_SUM))
831 dv->stats[DSC_SUM] = W * dv->stats[DSC_MEAN];
834 /* Output results. */
840 /* Returns true if any of the descriptives variables in DSC's
841 variable list have missing values in case C, false otherwise. */
843 listwise_missing (struct dsc_proc *dsc, const struct ccase *c)
847 for (i = 0; i < dsc->var_cnt; i++)
849 struct dsc_var *dv = &dsc->vars[i];
850 double x = case_num (c, dv->v);
853 || (!dsc->include_user_missing
854 && var_is_num_user_missing (dv->v, x)))
860 /* Statistical display. */
862 static algo_compare_func descriptives_compare_dsc_vars;
864 /* Displays a table of descriptive statistics for DSC. */
866 display (struct dsc_proc *dsc)
872 nc = 1 + (dsc->format == DSC_SERIAL ? 2 : 1);
873 for (i = 0; i < DSC_N_STATS; i++)
874 if (dsc->show_stats & (1ul << i))
877 if (dsc->sort_by_stat != DSC_NONE)
878 sort (dsc->vars, dsc->var_cnt, sizeof *dsc->vars,
879 descriptives_compare_dsc_vars, dsc);
881 t = tab_create (nc, dsc->var_cnt + 1, 0);
882 tab_headers (t, 1, 0, 1, 0);
883 tab_box (t, TAL_1, TAL_1, -1, -1, 0, 0, nc - 1, dsc->var_cnt);
884 tab_box (t, -1, -1, -1, TAL_1, 1, 0, nc - 1, dsc->var_cnt);
885 tab_hline (t, TAL_2, 0, nc - 1, 1);
886 tab_vline (t, TAL_2, 1, 0, dsc->var_cnt);
887 tab_dim (t, tab_natural_dimensions);
890 tab_text (t, nc++, 0, TAB_LEFT | TAT_TITLE, _("Variable"));
891 if (dsc->format == DSC_SERIAL)
893 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Valid N"));
894 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Missing N"));
897 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, "N");
899 for (i = 0; i < DSC_N_STATS; i++)
900 if (dsc->show_stats & (1ul << i))
902 const char *title = gettext (dsc_info[i].name);
903 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, title);
906 for (i = 0; i < dsc->var_cnt; i++)
908 struct dsc_var *dv = &dsc->vars[i];
912 tab_text (t, nc++, i + 1, TAB_LEFT, var_get_name (dv->v));
913 tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", dv->valid);
914 if (dsc->format == DSC_SERIAL)
915 tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", dv->missing);
916 for (j = 0; j < DSC_N_STATS; j++)
917 if (dsc->show_stats & (1ul << j))
918 tab_float (t, nc++, i + 1, TAB_NONE, dv->stats[j], 10, 3);
921 tab_title (t, _("Valid cases = %g; cases with missing value(s) = %g."),
922 dsc->valid, dsc->missing_listwise);
927 /* Compares `struct dsc_var's A and B according to the ordering
930 descriptives_compare_dsc_vars (const void *a_, const void *b_, const void *dsc_)
932 const struct dsc_var *a = a_;
933 const struct dsc_var *b = b_;
934 const struct dsc_proc *dsc = dsc_;
938 if (dsc->sort_by_stat == DSC_NAME)
939 result = strcasecmp (var_get_name (a->v), var_get_name (b->v));
942 double as = a->stats[dsc->sort_by_stat];
943 double bs = b->stats[dsc->sort_by_stat];
945 result = as < bs ? -1 : as > bs;
948 if (!dsc->sort_ascending)