1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
23 #include <data/casegrouper.h>
24 #include <data/casereader.h>
25 #include <data/dictionary.h>
26 #include <data/procedure.h>
27 #include <data/transformations.h>
28 #include <data/variable.h>
29 #include <language/command.h>
30 #include <language/dictionary/split-file.h>
31 #include <language/lexer/lexer.h>
32 #include <language/lexer/variable-parser.h>
33 #include <libpspp/array.h>
34 #include <libpspp/compiler.h>
35 #include <libpspp/message.h>
36 #include <libpspp/assertion.h>
37 #include <math/moments.h>
38 #include <output/manager.h>
39 #include <output/table.h>
44 #define _(msgid) gettext (msgid)
45 #define N_(msgid) msgid
47 /* DESCRIPTIVES private data. */
51 /* Handling of missing values. */
54 DSC_VARIABLE, /* Handle missing values on a per-variable basis. */
55 DSC_LISTWISE /* Discard entire case if any variable is missing. */
58 /* Describes properties of a distribution for the purpose of
59 calculating a Z-score. */
62 const struct variable *src_var; /* Variable on which z-score is based. */
63 struct variable *z_var; /* New z-score variable. */
64 double mean; /* Distribution mean. */
65 double std_dev; /* Distribution standard deviation. */
68 /* DESCRIPTIVES transformation (for calculating Z-scores). */
71 struct dsc_z_score *z_scores; /* Array of Z-scores. */
72 int z_score_cnt; /* Number of Z-scores. */
73 const struct variable **vars; /* Variables for listwise missing checks. */
74 size_t var_cnt; /* Number of variables. */
75 enum dsc_missing_type missing_type; /* Treatment of missing values. */
76 enum mv_class exclude; /* Classes of missing values to exclude. */
79 /* Statistics. Used as bit indexes, so must be 32 or fewer. */
82 DSC_MEAN = 0, DSC_SEMEAN, DSC_STDDEV, DSC_VARIANCE, DSC_KURTOSIS,
83 DSC_SEKURT, DSC_SKEWNESS, DSC_SESKEW, DSC_RANGE, DSC_MIN,
84 DSC_MAX, DSC_SUM, DSC_N_STATS,
86 /* Only valid as sort criteria. */
87 DSC_NAME = -2, /* Sort by name. */
88 DSC_NONE = -1 /* Unsorted. */
91 /* Describes one statistic. */
92 struct dsc_statistic_info
94 const char *identifier; /* Identifier. */
95 const char *name; /* Full name. */
96 enum moment moment; /* Highest moment needed to calculate. */
99 /* Table of statistics, indexed by DSC_*. */
100 static const struct dsc_statistic_info dsc_info[DSC_N_STATS] =
102 {"MEAN", N_("Mean"), MOMENT_MEAN},
103 {"SEMEAN", N_("S E Mean"), MOMENT_VARIANCE},
104 {"STDDEV", N_("Std Dev"), MOMENT_VARIANCE},
105 {"VARIANCE", N_("Variance"), MOMENT_VARIANCE},
106 {"KURTOSIS", N_("Kurtosis"), MOMENT_KURTOSIS},
107 {"SEKURTOSIS", N_("S E Kurt"), MOMENT_NONE},
108 {"SKEWNESS", N_("Skewness"), MOMENT_SKEWNESS},
109 {"SESKEWNESS", N_("S E Skew"), MOMENT_NONE},
110 {"RANGE", N_("Range"), MOMENT_NONE},
111 {"MINIMUM", N_("Minimum"), MOMENT_NONE},
112 {"MAXIMUM", N_("Maximum"), MOMENT_NONE},
113 {"SUM", N_("Sum"), MOMENT_MEAN},
116 /* Statistics calculated by default if none are explicitly
118 #define DEFAULT_STATS \
119 ((1ul << DSC_MEAN) | (1ul << DSC_STDDEV) | (1ul << DSC_MIN) \
122 /* A variable specified on DESCRIPTIVES. */
125 const struct variable *v; /* Variable to calculate on. */
126 char z_name[VAR_NAME_LEN + 1]; /* Name for z-score variable. */
127 double valid, missing; /* Valid, missing counts. */
128 struct moments *moments; /* Moments. */
129 double min, max; /* Maximum and mimimum values. */
130 double stats[DSC_N_STATS]; /* All the stats' values. */
136 DSC_LINE, /* Abbreviated format. */
137 DSC_SERIAL /* Long format. */
140 /* A DESCRIPTIVES procedure. */
143 /* Per-variable info. */
144 struct dsc_var *vars; /* Variables. */
145 size_t var_cnt; /* Number of variables. */
148 enum dsc_missing_type missing_type; /* Treatment of missing values. */
149 enum mv_class exclude; /* Classes of missing values to exclude. */
150 int show_var_labels; /* Nonzero to show variable labels. */
151 int show_index; /* Nonzero to show variable index. */
152 enum dsc_format format; /* Output format. */
154 /* Accumulated results. */
155 double missing_listwise; /* Sum of weights of cases missing listwise. */
156 double valid; /* Sum of weights of valid cases. */
157 bool bad_warn; /* Warn if bad weight found. */
158 enum dsc_statistic sort_by_stat; /* Statistic to sort by; -1: name. */
159 int sort_ascending; /* !0: ascending order; 0: descending. */
160 unsigned long show_stats; /* Statistics to display. */
161 unsigned long calc_stats; /* Statistics to calculate. */
162 enum moment max_moment; /* Highest moment needed for stats. */
166 static enum dsc_statistic match_statistic (struct lexer *);
167 static void free_dsc_proc (struct dsc_proc *);
169 /* Z-score functions. */
170 static bool try_name (const struct dictionary *dict,
171 struct dsc_proc *dsc, const char *name);
172 static bool generate_z_varname (const struct dictionary *dict,
173 struct dsc_proc *dsc, char *z_name,
174 const char *name, int *z_cnt);
175 static void dump_z_table (struct dsc_proc *);
176 static void setup_z_trns (struct dsc_proc *, struct dataset *);
178 /* Procedure execution functions. */
179 static void calc_descriptives (struct dsc_proc *, struct casereader *,
181 static void display (struct dsc_proc *dsc);
183 /* Parser and outline. */
185 /* Handles DESCRIPTIVES. */
187 cmd_descriptives (struct lexer *lexer, struct dataset *ds)
189 struct dictionary *dict = dataset_dict (ds);
190 struct dsc_proc *dsc;
191 const struct variable **vars = NULL;
193 int save_z_scores = 0;
198 struct casegrouper *grouper;
199 struct casereader *group;
201 /* Create and initialize dsc. */
202 dsc = xmalloc (sizeof *dsc);
205 dsc->missing_type = DSC_VARIABLE;
206 dsc->exclude = MV_ANY;
207 dsc->show_var_labels = 1;
209 dsc->format = DSC_LINE;
210 dsc->missing_listwise = 0.;
213 dsc->sort_by_stat = DSC_NONE;
214 dsc->sort_ascending = 1;
215 dsc->show_stats = dsc->calc_stats = DEFAULT_STATS;
217 /* Parse DESCRIPTIVES. */
218 while (lex_token (lexer) != '.')
220 if (lex_match_id (lexer, "MISSING"))
222 lex_match (lexer, '=');
223 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
225 if (lex_match_id (lexer, "VARIABLE"))
226 dsc->missing_type = DSC_VARIABLE;
227 else if (lex_match_id (lexer, "LISTWISE"))
228 dsc->missing_type = DSC_LISTWISE;
229 else if (lex_match_id (lexer, "INCLUDE"))
230 dsc->exclude = MV_SYSTEM;
233 lex_error (lexer, NULL);
236 lex_match (lexer, ',');
239 else if (lex_match_id (lexer, "SAVE"))
241 else if (lex_match_id (lexer, "FORMAT"))
243 lex_match (lexer, '=');
244 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
246 if (lex_match_id (lexer, "LABELS"))
247 dsc->show_var_labels = 1;
248 else if (lex_match_id (lexer, "NOLABELS"))
249 dsc->show_var_labels = 0;
250 else if (lex_match_id (lexer, "INDEX"))
252 else if (lex_match_id (lexer, "NOINDEX"))
254 else if (lex_match_id (lexer, "LINE"))
255 dsc->format = DSC_LINE;
256 else if (lex_match_id (lexer, "SERIAL"))
257 dsc->format = DSC_SERIAL;
260 lex_error (lexer, NULL);
263 lex_match (lexer, ',');
266 else if (lex_match_id (lexer, "STATISTICS"))
268 lex_match (lexer, '=');
270 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
272 if (lex_match (lexer, T_ALL))
273 dsc->show_stats |= (1ul << DSC_N_STATS) - 1;
274 else if (lex_match_id (lexer, "DEFAULT"))
275 dsc->show_stats |= DEFAULT_STATS;
277 dsc->show_stats |= 1ul << (match_statistic (lexer));
278 lex_match (lexer, ',');
280 if (dsc->show_stats == 0)
281 dsc->show_stats = DEFAULT_STATS;
283 else if (lex_match_id (lexer, "SORT"))
285 lex_match (lexer, '=');
286 if (lex_match_id (lexer, "NAME"))
287 dsc->sort_by_stat = DSC_NAME;
290 dsc->sort_by_stat = match_statistic (lexer);
291 if (dsc->sort_by_stat == DSC_NONE )
292 dsc->sort_by_stat = DSC_MEAN;
294 if (lex_match (lexer, '('))
296 if (lex_match_id (lexer, "A"))
297 dsc->sort_ascending = 1;
298 else if (lex_match_id (lexer, "D"))
299 dsc->sort_ascending = 0;
301 lex_error (lexer, NULL);
302 lex_force_match (lexer, ')');
305 else if (var_cnt == 0)
307 if (lex_look_ahead (lexer) == '=')
309 lex_match_id (lexer, "VARIABLES");
310 lex_match (lexer, '=');
313 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
317 if (!parse_variables_const (lexer, dict, &vars, &var_cnt,
318 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
321 dsc->vars = xnrealloc ((void *)dsc->vars, var_cnt, sizeof *dsc->vars);
322 for (i = dsc->var_cnt; i < var_cnt; i++)
324 struct dsc_var *dv = &dsc->vars[i];
326 dv->z_name[0] = '\0';
329 dsc->var_cnt = var_cnt;
331 if (lex_match (lexer, '('))
333 if (lex_token (lexer) != T_ID)
335 lex_error (lexer, NULL);
338 if (try_name (dict, dsc, lex_tokid (lexer)))
340 strcpy (dsc->vars[dsc->var_cnt - 1].z_name, lex_tokid (lexer));
344 msg (SE, _("Z-score variable name %s would be"
345 " a duplicate variable name."), lex_tokid (lexer));
347 if (!lex_force_match (lexer, ')'))
354 lex_error (lexer, NULL);
358 lex_match (lexer, '/');
362 msg (SE, _("No variables specified."));
366 /* Construct z-score varnames, show translation table. */
367 if (z_cnt || save_z_scores)
373 for (i = 0; i < dsc->var_cnt; i++)
374 if (dsc->vars[i].z_name[0] == 0)
376 if (!generate_z_varname (dict, dsc, dsc->vars[i].z_name,
377 var_get_name (dsc->vars[i].v),
386 /* Figure out statistics to display. */
387 if (dsc->show_stats & (1ul << DSC_SKEWNESS))
388 dsc->show_stats |= 1ul << DSC_SESKEW;
389 if (dsc->show_stats & (1ul << DSC_KURTOSIS))
390 dsc->show_stats |= 1ul << DSC_SEKURT;
392 /* Figure out which statistics to calculate. */
393 dsc->calc_stats = dsc->show_stats;
395 dsc->calc_stats |= (1ul << DSC_MEAN) | (1ul << DSC_STDDEV);
396 if (dsc->sort_by_stat >= 0)
397 dsc->calc_stats |= 1ul << dsc->sort_by_stat;
398 if (dsc->show_stats & (1ul << DSC_SESKEW))
399 dsc->calc_stats |= 1ul << DSC_SKEWNESS;
400 if (dsc->show_stats & (1ul << DSC_SEKURT))
401 dsc->calc_stats |= 1ul << DSC_KURTOSIS;
403 /* Figure out maximum moment needed and allocate moments for
405 dsc->max_moment = MOMENT_NONE;
406 for (i = 0; i < DSC_N_STATS; i++)
407 if (dsc->calc_stats & (1ul << i) && dsc_info[i].moment > dsc->max_moment)
408 dsc->max_moment = dsc_info[i].moment;
409 if (dsc->max_moment != MOMENT_NONE)
410 for (i = 0; i < dsc->var_cnt; i++)
411 dsc->vars[i].moments = moments_create (dsc->max_moment);
414 grouper = casegrouper_create_splits (proc_open (ds), dict);
415 while (casegrouper_get_next_group (grouper, &group))
416 calc_descriptives (dsc, group, ds);
417 ok = casegrouper_destroy (grouper);
418 ok = proc_commit (ds) && ok;
422 setup_z_trns (dsc, ds);
427 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
435 /* Returns the statistic named by the current token and skips past the token.
436 Returns DSC_NONE if no statistic is given (e.g., subcommand with no
437 specifiers). Emits an error if the current token ID does not name a
439 static enum dsc_statistic
440 match_statistic (struct lexer *lexer)
442 if (lex_token (lexer) == T_ID)
444 enum dsc_statistic stat;
446 for (stat = 0; stat < DSC_N_STATS; stat++)
447 if (lex_match_id (lexer, dsc_info[stat].identifier))
451 lex_error (lexer, _("expecting statistic name: reverting to default"));
459 free_dsc_proc (struct dsc_proc *dsc)
466 for (i = 0; i < dsc->var_cnt; i++)
467 moments_destroy (dsc->vars[i].moments);
474 /* Returns false if NAME is a duplicate of any existing variable name or
475 of any previously-declared z-var name; otherwise returns true. */
477 try_name (const struct dictionary *dict, struct dsc_proc *dsc,
482 if (dict_lookup_var (dict, name) != NULL)
484 for (i = 0; i < dsc->var_cnt; i++)
485 if (!strcasecmp (dsc->vars[i].z_name, name))
490 /* Generates a name for a Z-score variable based on a variable
491 named VAR_NAME, given that *Z_CNT generated variable names are
492 known to already exist. If successful, returns true and
493 copies the new name into Z_NAME. On failure, returns false. */
495 generate_z_varname (const struct dictionary *dict, struct dsc_proc *dsc, char *z_name,
496 const char *var_name, int *z_cnt)
498 char name[VAR_NAME_LEN + 1];
500 /* Try a name based on the original variable name. */
502 str_copy_trunc (name + 1, sizeof name - 1, var_name);
503 if (try_name (dict, dsc, name))
505 strcpy (z_name, name);
509 /* Generate a synthetic name. */
515 sprintf (name, "ZSC%03d", *z_cnt);
516 else if (*z_cnt <= 108)
517 sprintf (name, "STDZ%02d", *z_cnt - 99);
518 else if (*z_cnt <= 117)
519 sprintf (name, "ZZZZ%02d", *z_cnt - 108);
520 else if (*z_cnt <= 126)
521 sprintf (name, "ZQZQ%02d", *z_cnt - 117);
524 msg (SE, _("Ran out of generic names for Z-score variables. "
525 "There are only 126 generic names: ZSC001-ZSC0999, "
526 "STDZ01-STDZ09, ZZZZ01-ZZZZ09, ZQZQ01-ZQZQ09."));
530 if (try_name (dict, dsc, name))
532 strcpy (z_name, name);
539 /* Outputs a table describing the mapping between source
540 variables and Z-score variables. */
542 dump_z_table (struct dsc_proc *dsc)
550 for (i = 0; i < dsc->var_cnt; i++)
551 if (dsc->vars[i].z_name[0] != '\0')
555 t = tab_create (2, cnt + 1, 0);
556 tab_title (t, _("Mapping of variables to corresponding Z-scores."));
557 tab_columns (t, SOM_COL_DOWN, 1);
558 tab_headers (t, 0, 0, 1, 0);
559 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, cnt);
560 tab_hline (t, TAL_2, 0, 1, 1);
561 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Source"));
562 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Target"));
563 tab_dim (t, tab_natural_dimensions);
568 for (i = 0, y = 1; i < dsc->var_cnt; i++)
569 if (dsc->vars[i].z_name[0] != '\0')
571 tab_text (t, 0, y, TAB_LEFT, var_get_name (dsc->vars[i].v));
572 tab_text (t, 1, y++, TAB_LEFT, dsc->vars[i].z_name);
579 /* Transformation function to calculate Z-scores. Will return SYSMIS if any of
580 the following are true: 1) mean or standard deviation is SYSMIS 2) score is
581 SYSMIS 3) score is user missing and they were not included in the original
582 analyis. 4) any of the variables in the original analysis were missing
583 (either system or user-missing values that weren't included).
586 descriptives_trns_proc (void *trns_, struct ccase * c,
587 casenumber case_idx UNUSED)
589 struct dsc_trns *t = trns_;
590 struct dsc_z_score *z;
591 const struct variable **vars;
594 if (t->missing_type == DSC_LISTWISE)
597 for (vars = t->vars; vars < t->vars + t->var_cnt; vars++)
599 double score = case_num (c, *vars);
600 if (var_is_num_missing (*vars, score, t->exclude))
608 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
610 double input = case_num (c, z->src_var);
611 double *output = &case_data_rw (c, z->z_var)->f;
613 if (z->mean == SYSMIS || z->std_dev == SYSMIS || all_sysmis
614 || var_is_num_missing (z->src_var, input, t->exclude))
617 *output = (input - z->mean) / z->std_dev;
619 return TRNS_CONTINUE;
622 /* Frees a descriptives_trns struct. */
624 descriptives_trns_free (void *trns_)
626 struct dsc_trns *t = trns_;
629 assert((t->missing_type != DSC_LISTWISE) ^ (t->vars != NULL));
634 /* Sets up a transformation to calculate Z scores. */
636 setup_z_trns (struct dsc_proc *dsc, struct dataset *ds)
641 for (cnt = i = 0; i < dsc->var_cnt; i++)
642 if (dsc->vars[i].z_name[0] != '\0')
645 t = xmalloc (sizeof *t);
646 t->z_scores = xnmalloc (cnt, sizeof *t->z_scores);
647 t->z_score_cnt = cnt;
648 t->missing_type = dsc->missing_type;
649 t->exclude = dsc->exclude;
650 if ( t->missing_type == DSC_LISTWISE )
652 t->var_cnt = dsc->var_cnt;
653 t->vars = xnmalloc (t->var_cnt, sizeof *t->vars);
654 for (i = 0; i < t->var_cnt; i++)
655 t->vars[i] = dsc->vars[i].v;
663 for (cnt = i = 0; i < dsc->var_cnt; i++)
665 struct dsc_var *dv = &dsc->vars[i];
666 if (dv->z_name[0] != '\0')
668 struct dsc_z_score *z;
669 struct variable *dst_var;
671 dst_var = dict_create_var_assert (dataset_dict (ds), dv->z_name, 0);
672 var_set_label (dst_var, xasprintf (_("Z-score of %s"),
673 var_to_string (dv->v)));
675 z = &t->z_scores[cnt++];
678 z->mean = dv->stats[DSC_MEAN];
679 z->std_dev = dv->stats[DSC_STDDEV];
683 add_transformation (ds,
684 descriptives_trns_proc, descriptives_trns_free, t);
687 /* Statistical calculation. */
689 static bool listwise_missing (struct dsc_proc *dsc, const struct ccase *c);
691 /* Calculates and displays descriptive statistics for the cases
694 calc_descriptives (struct dsc_proc *dsc, struct casereader *group,
697 struct casereader *pass1, *pass2;
701 if (!casereader_peek (group, 0, &c))
703 casereader_destroy (group);
706 output_split_file_values (ds, &c);
709 group = casereader_create_filter_weight (group, dataset_dict (ds),
713 pass2 = dsc->max_moment <= MOMENT_MEAN ? NULL : casereader_clone (pass1);
715 for (i = 0; i < dsc->var_cnt; i++)
717 struct dsc_var *dv = &dsc->vars[i];
719 dv->valid = dv->missing = 0.0;
720 if (dv->moments != NULL)
721 moments_clear (dv->moments);
725 dsc->missing_listwise = 0.;
728 /* First pass to handle most of the work. */
729 for (; casereader_read (pass1, &c); case_destroy (&c))
731 double weight = dict_get_case_weight (dataset_dict (ds), &c, NULL);
733 /* Check for missing values. */
734 if (listwise_missing (dsc, &c))
736 dsc->missing_listwise += weight;
737 if (dsc->missing_type == DSC_LISTWISE)
740 dsc->valid += weight;
742 for (i = 0; i < dsc->var_cnt; i++)
744 struct dsc_var *dv = &dsc->vars[i];
745 double x = case_num (&c, dv->v);
747 if (var_is_num_missing (dv->v, x, dsc->exclude))
749 dv->missing += weight;
753 if (dv->moments != NULL)
754 moments_pass_one (dv->moments, x, weight);
762 if (!casereader_destroy (pass1))
764 casereader_destroy (pass2);
768 /* Second pass for higher-order moments. */
769 if (dsc->max_moment > MOMENT_MEAN)
771 for (; casereader_read (pass2, &c); case_destroy (&c))
773 double weight = dict_get_case_weight (dataset_dict (ds), &c, NULL);
775 /* Check for missing values. */
776 if (dsc->missing_type == DSC_LISTWISE && listwise_missing (dsc, &c))
779 for (i = 0; i < dsc->var_cnt; i++)
781 struct dsc_var *dv = &dsc->vars[i];
782 double x = case_num (&c, dv->v);
784 if (var_is_num_missing (dv->v, x, dsc->exclude))
787 if (dv->moments != NULL)
788 moments_pass_two (dv->moments, x, weight);
791 if (!casereader_destroy (pass2))
795 /* Calculate results. */
796 for (i = 0; i < dsc->var_cnt; i++)
798 struct dsc_var *dv = &dsc->vars[i];
802 for (j = 0; j < DSC_N_STATS; j++)
803 dv->stats[j] = SYSMIS;
805 dv->valid = W = dsc->valid - dv->missing;
807 if (dv->moments != NULL)
808 moments_calculate (dv->moments, NULL,
809 &dv->stats[DSC_MEAN], &dv->stats[DSC_VARIANCE],
810 &dv->stats[DSC_SKEWNESS], &dv->stats[DSC_KURTOSIS]);
811 if (dsc->calc_stats & (1ul << DSC_SEMEAN)
812 && dv->stats[DSC_VARIANCE] != SYSMIS && W > 0.)
813 dv->stats[DSC_SEMEAN] = sqrt (dv->stats[DSC_VARIANCE]) / sqrt (W);
814 if (dsc->calc_stats & (1ul << DSC_STDDEV)
815 && dv->stats[DSC_VARIANCE] != SYSMIS)
816 dv->stats[DSC_STDDEV] = sqrt (dv->stats[DSC_VARIANCE]);
817 if (dsc->calc_stats & (1ul << DSC_SEKURT))
818 if (dv->stats[DSC_KURTOSIS] != SYSMIS)
819 dv->stats[DSC_SEKURT] = calc_sekurt (W);
820 if (dsc->calc_stats & (1ul << DSC_SESKEW)
821 && dv->stats[DSC_SKEWNESS] != SYSMIS)
822 dv->stats[DSC_SESKEW] = calc_seskew (W);
823 dv->stats[DSC_RANGE] = ((dv->min == DBL_MAX || dv->max == -DBL_MAX)
824 ? SYSMIS : dv->max - dv->min);
825 dv->stats[DSC_MIN] = dv->min == DBL_MAX ? SYSMIS : dv->min;
826 dv->stats[DSC_MAX] = dv->max == -DBL_MAX ? SYSMIS : dv->max;
827 if (dsc->calc_stats & (1ul << DSC_SUM))
828 dv->stats[DSC_SUM] = W * dv->stats[DSC_MEAN];
831 /* Output results. */
835 /* Returns true if any of the descriptives variables in DSC's
836 variable list have missing values in case C, false otherwise. */
838 listwise_missing (struct dsc_proc *dsc, const struct ccase *c)
842 for (i = 0; i < dsc->var_cnt; i++)
844 struct dsc_var *dv = &dsc->vars[i];
845 double x = case_num (c, dv->v);
847 if (var_is_num_missing (dv->v, x, dsc->exclude))
853 /* Statistical display. */
855 static algo_compare_func descriptives_compare_dsc_vars;
857 /* Displays a table of descriptive statistics for DSC. */
859 display (struct dsc_proc *dsc)
865 nc = 1 + (dsc->format == DSC_SERIAL ? 2 : 1);
866 for (i = 0; i < DSC_N_STATS; i++)
867 if (dsc->show_stats & (1ul << i))
870 if (dsc->sort_by_stat != DSC_NONE)
871 sort (dsc->vars, dsc->var_cnt, sizeof *dsc->vars,
872 descriptives_compare_dsc_vars, dsc);
874 t = tab_create (nc, dsc->var_cnt + 1, 0);
875 tab_headers (t, 1, 0, 1, 0);
876 tab_box (t, TAL_1, TAL_1, -1, -1, 0, 0, nc - 1, dsc->var_cnt);
877 tab_box (t, -1, -1, -1, TAL_1, 1, 0, nc - 1, dsc->var_cnt);
878 tab_hline (t, TAL_2, 0, nc - 1, 1);
879 tab_vline (t, TAL_2, 1, 0, dsc->var_cnt);
880 tab_dim (t, tab_natural_dimensions);
883 tab_text (t, nc++, 0, TAB_LEFT | TAT_TITLE, _("Variable"));
884 if (dsc->format == DSC_SERIAL)
886 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Valid N"));
887 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Missing N"));
890 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, "N");
892 for (i = 0; i < DSC_N_STATS; i++)
893 if (dsc->show_stats & (1ul << i))
895 const char *title = gettext (dsc_info[i].name);
896 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, title);
899 for (i = 0; i < dsc->var_cnt; i++)
901 struct dsc_var *dv = &dsc->vars[i];
905 tab_text (t, nc++, i + 1, TAB_LEFT, var_get_name (dv->v));
906 tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", dv->valid);
907 if (dsc->format == DSC_SERIAL)
908 tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", dv->missing);
910 for (j = 0; j < DSC_N_STATS; j++)
911 if (dsc->show_stats & (1ul << j))
912 tab_double (t, nc++, i + 1, TAB_NONE, dv->stats[j], NULL);
915 tab_title (t, _("Valid cases = %g; cases with missing value(s) = %g."),
916 dsc->valid, dsc->missing_listwise);
921 /* Compares `struct dsc_var's A and B according to the ordering
924 descriptives_compare_dsc_vars (const void *a_, const void *b_, const void *dsc_)
926 const struct dsc_var *a = a_;
927 const struct dsc_var *b = b_;
928 const struct dsc_proc *dsc = dsc_;
932 if (dsc->sort_by_stat == DSC_NAME)
933 result = strcasecmp (var_get_name (a->v), var_get_name (b->v));
936 double as = a->stats[dsc->sort_by_stat];
937 double bs = b->stats[dsc->sort_by_stat];
939 result = as < bs ? -1 : as > bs;
942 if (!dsc->sort_ascending)