1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2009, 2010 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
23 #include <data/casegrouper.h>
24 #include <data/casereader.h>
25 #include <data/dictionary.h>
26 #include <data/procedure.h>
27 #include <data/transformations.h>
28 #include <data/variable.h>
29 #include <language/command.h>
30 #include <language/dictionary/split-file.h>
31 #include <language/lexer/lexer.h>
32 #include <language/lexer/variable-parser.h>
33 #include <libpspp/array.h>
34 #include <libpspp/compiler.h>
35 #include <libpspp/message.h>
36 #include <libpspp/assertion.h>
37 #include <math/moments.h>
38 #include <output/tab.h>
43 #define _(msgid) gettext (msgid)
44 #define N_(msgid) msgid
46 /* DESCRIPTIVES private data. */
50 /* Handling of missing values. */
53 DSC_VARIABLE, /* Handle missing values on a per-variable basis. */
54 DSC_LISTWISE /* Discard entire case if any variable is missing. */
57 /* Describes properties of a distribution for the purpose of
58 calculating a Z-score. */
61 const struct variable *src_var; /* Variable on which z-score is based. */
62 struct variable *z_var; /* New z-score variable. */
63 double mean; /* Distribution mean. */
64 double std_dev; /* Distribution standard deviation. */
67 /* DESCRIPTIVES transformation (for calculating Z-scores). */
70 struct dsc_z_score *z_scores; /* Array of Z-scores. */
71 int z_score_cnt; /* Number of Z-scores. */
72 const struct variable **vars; /* Variables for listwise missing checks. */
73 size_t var_cnt; /* Number of variables. */
74 enum dsc_missing_type missing_type; /* Treatment of missing values. */
75 enum mv_class exclude; /* Classes of missing values to exclude. */
78 /* Statistics. Used as bit indexes, so must be 32 or fewer. */
81 DSC_MEAN = 0, DSC_SEMEAN, DSC_STDDEV, DSC_VARIANCE, DSC_KURTOSIS,
82 DSC_SEKURT, DSC_SKEWNESS, DSC_SESKEW, DSC_RANGE, DSC_MIN,
83 DSC_MAX, DSC_SUM, DSC_N_STATS,
85 /* Only valid as sort criteria. */
86 DSC_NAME = -2, /* Sort by name. */
87 DSC_NONE = -1 /* Unsorted. */
90 /* Describes one statistic. */
91 struct dsc_statistic_info
93 const char *identifier; /* Identifier. */
94 const char *name; /* Full name. */
95 enum moment moment; /* Highest moment needed to calculate. */
98 /* Table of statistics, indexed by DSC_*. */
99 static const struct dsc_statistic_info dsc_info[DSC_N_STATS] =
101 {"MEAN", N_("Mean"), MOMENT_MEAN},
102 {"SEMEAN", N_("S.E. Mean"), MOMENT_VARIANCE},
103 {"STDDEV", N_("Std Dev"), MOMENT_VARIANCE},
104 {"VARIANCE", N_("Variance"), MOMENT_VARIANCE},
105 {"KURTOSIS", N_("Kurtosis"), MOMENT_KURTOSIS},
106 {"SEKURTOSIS", N_("S.E. Kurt"), MOMENT_NONE},
107 {"SKEWNESS", N_("Skewness"), MOMENT_SKEWNESS},
108 {"SESKEWNESS", N_("S.E. Skew"), MOMENT_NONE},
109 {"RANGE", N_("Range"), MOMENT_NONE},
110 {"MINIMUM", N_("Minimum"), MOMENT_NONE},
111 {"MAXIMUM", N_("Maximum"), MOMENT_NONE},
112 {"SUM", N_("Sum"), MOMENT_MEAN},
115 /* Statistics calculated by default if none are explicitly
117 #define DEFAULT_STATS \
118 ((1ul << DSC_MEAN) | (1ul << DSC_STDDEV) | (1ul << DSC_MIN) \
121 /* A variable specified on DESCRIPTIVES. */
124 const struct variable *v; /* Variable to calculate on. */
125 char z_name[VAR_NAME_LEN + 1]; /* Name for z-score variable. */
126 double valid, missing; /* Valid, missing counts. */
127 struct moments *moments; /* Moments. */
128 double min, max; /* Maximum and mimimum values. */
129 double stats[DSC_N_STATS]; /* All the stats' values. */
135 DSC_LINE, /* Abbreviated format. */
136 DSC_SERIAL /* Long format. */
139 /* A DESCRIPTIVES procedure. */
142 /* Per-variable info. */
143 struct dsc_var *vars; /* Variables. */
144 size_t var_cnt; /* Number of variables. */
147 enum dsc_missing_type missing_type; /* Treatment of missing values. */
148 enum mv_class exclude; /* Classes of missing values to exclude. */
149 int show_var_labels; /* Nonzero to show variable labels. */
150 int show_index; /* Nonzero to show variable index. */
151 enum dsc_format format; /* Output format. */
153 /* Accumulated results. */
154 double missing_listwise; /* Sum of weights of cases missing listwise. */
155 double valid; /* Sum of weights of valid cases. */
156 bool bad_warn; /* Warn if bad weight found. */
157 enum dsc_statistic sort_by_stat; /* Statistic to sort by; -1: name. */
158 int sort_ascending; /* !0: ascending order; 0: descending. */
159 unsigned long show_stats; /* Statistics to display. */
160 unsigned long calc_stats; /* Statistics to calculate. */
161 enum moment max_moment; /* Highest moment needed for stats. */
165 static enum dsc_statistic match_statistic (struct lexer *);
166 static void free_dsc_proc (struct dsc_proc *);
168 /* Z-score functions. */
169 static bool try_name (const struct dictionary *dict,
170 struct dsc_proc *dsc, const char *name);
171 static bool generate_z_varname (const struct dictionary *dict,
172 struct dsc_proc *dsc, char *z_name,
173 const char *name, int *z_cnt);
174 static void dump_z_table (struct dsc_proc *);
175 static void setup_z_trns (struct dsc_proc *, struct dataset *);
177 /* Procedure execution functions. */
178 static void calc_descriptives (struct dsc_proc *, struct casereader *,
180 static void display (struct dsc_proc *dsc);
182 /* Parser and outline. */
184 /* Handles DESCRIPTIVES. */
186 cmd_descriptives (struct lexer *lexer, struct dataset *ds)
188 struct dictionary *dict = dataset_dict (ds);
189 struct dsc_proc *dsc;
190 const struct variable **vars = NULL;
192 int save_z_scores = 0;
197 struct casegrouper *grouper;
198 struct casereader *group;
200 /* Create and initialize dsc. */
201 dsc = xmalloc (sizeof *dsc);
204 dsc->missing_type = DSC_VARIABLE;
205 dsc->exclude = MV_ANY;
206 dsc->show_var_labels = 1;
208 dsc->format = DSC_LINE;
209 dsc->missing_listwise = 0.;
212 dsc->sort_by_stat = DSC_NONE;
213 dsc->sort_ascending = 1;
214 dsc->show_stats = dsc->calc_stats = DEFAULT_STATS;
216 /* Parse DESCRIPTIVES. */
217 while (lex_token (lexer) != T_ENDCMD)
219 if (lex_match_id (lexer, "MISSING"))
221 lex_match (lexer, T_EQUALS);
222 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
224 if (lex_match_id (lexer, "VARIABLE"))
225 dsc->missing_type = DSC_VARIABLE;
226 else if (lex_match_id (lexer, "LISTWISE"))
227 dsc->missing_type = DSC_LISTWISE;
228 else if (lex_match_id (lexer, "INCLUDE"))
229 dsc->exclude = MV_SYSTEM;
232 lex_error (lexer, NULL);
235 lex_match (lexer, T_COMMA);
238 else if (lex_match_id (lexer, "SAVE"))
240 else if (lex_match_id (lexer, "FORMAT"))
242 lex_match (lexer, T_EQUALS);
243 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
245 if (lex_match_id (lexer, "LABELS"))
246 dsc->show_var_labels = 1;
247 else if (lex_match_id (lexer, "NOLABELS"))
248 dsc->show_var_labels = 0;
249 else if (lex_match_id (lexer, "INDEX"))
251 else if (lex_match_id (lexer, "NOINDEX"))
253 else if (lex_match_id (lexer, "LINE"))
254 dsc->format = DSC_LINE;
255 else if (lex_match_id (lexer, "SERIAL"))
256 dsc->format = DSC_SERIAL;
259 lex_error (lexer, NULL);
262 lex_match (lexer, T_COMMA);
265 else if (lex_match_id (lexer, "STATISTICS"))
267 lex_match (lexer, T_EQUALS);
269 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
271 if (lex_match (lexer, T_ALL))
272 dsc->show_stats |= (1ul << DSC_N_STATS) - 1;
273 else if (lex_match_id (lexer, "DEFAULT"))
274 dsc->show_stats |= DEFAULT_STATS;
276 dsc->show_stats |= 1ul << (match_statistic (lexer));
277 lex_match (lexer, T_COMMA);
279 if (dsc->show_stats == 0)
280 dsc->show_stats = DEFAULT_STATS;
282 else if (lex_match_id (lexer, "SORT"))
284 lex_match (lexer, T_EQUALS);
285 if (lex_match_id (lexer, "NAME"))
286 dsc->sort_by_stat = DSC_NAME;
289 dsc->sort_by_stat = match_statistic (lexer);
290 if (dsc->sort_by_stat == DSC_NONE )
291 dsc->sort_by_stat = DSC_MEAN;
293 if (lex_match (lexer, T_LPAREN))
295 if (lex_match_id (lexer, "A"))
296 dsc->sort_ascending = 1;
297 else if (lex_match_id (lexer, "D"))
298 dsc->sort_ascending = 0;
300 lex_error (lexer, NULL);
301 lex_force_match (lexer, T_RPAREN);
304 else if (var_cnt == 0)
306 if (lex_look_ahead (lexer) == T_EQUALS)
308 lex_match_id (lexer, "VARIABLES");
309 lex_match (lexer, T_EQUALS);
312 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
316 if (!parse_variables_const (lexer, dict, &vars, &var_cnt,
317 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
320 dsc->vars = xnrealloc ((void *)dsc->vars, var_cnt, sizeof *dsc->vars);
321 for (i = dsc->var_cnt; i < var_cnt; i++)
323 struct dsc_var *dv = &dsc->vars[i];
325 dv->z_name[0] = '\0';
328 dsc->var_cnt = var_cnt;
330 if (lex_match (lexer, T_LPAREN))
332 if (lex_token (lexer) != T_ID)
334 lex_error (lexer, NULL);
337 if (try_name (dict, dsc, lex_tokcstr (lexer)))
339 strcpy (dsc->vars[dsc->var_cnt - 1].z_name, lex_tokcstr (lexer));
343 msg (SE, _("Z-score variable name %s would be"
344 " a duplicate variable name."), lex_tokcstr (lexer));
346 if (!lex_force_match (lexer, T_RPAREN))
353 lex_error (lexer, NULL);
357 lex_match (lexer, T_SLASH);
361 msg (SE, _("No variables specified."));
365 /* Construct z-score varnames, show translation table. */
366 if (z_cnt || save_z_scores)
372 for (i = 0; i < dsc->var_cnt; i++)
373 if (dsc->vars[i].z_name[0] == 0)
375 if (!generate_z_varname (dict, dsc, dsc->vars[i].z_name,
376 var_get_name (dsc->vars[i].v),
385 /* Figure out statistics to display. */
386 if (dsc->show_stats & (1ul << DSC_SKEWNESS))
387 dsc->show_stats |= 1ul << DSC_SESKEW;
388 if (dsc->show_stats & (1ul << DSC_KURTOSIS))
389 dsc->show_stats |= 1ul << DSC_SEKURT;
391 /* Figure out which statistics to calculate. */
392 dsc->calc_stats = dsc->show_stats;
394 dsc->calc_stats |= (1ul << DSC_MEAN) | (1ul << DSC_STDDEV);
395 if (dsc->sort_by_stat >= 0)
396 dsc->calc_stats |= 1ul << dsc->sort_by_stat;
397 if (dsc->show_stats & (1ul << DSC_SESKEW))
398 dsc->calc_stats |= 1ul << DSC_SKEWNESS;
399 if (dsc->show_stats & (1ul << DSC_SEKURT))
400 dsc->calc_stats |= 1ul << DSC_KURTOSIS;
402 /* Figure out maximum moment needed and allocate moments for
404 dsc->max_moment = MOMENT_NONE;
405 for (i = 0; i < DSC_N_STATS; i++)
406 if (dsc->calc_stats & (1ul << i) && dsc_info[i].moment > dsc->max_moment)
407 dsc->max_moment = dsc_info[i].moment;
408 if (dsc->max_moment != MOMENT_NONE)
409 for (i = 0; i < dsc->var_cnt; i++)
410 dsc->vars[i].moments = moments_create (dsc->max_moment);
413 grouper = casegrouper_create_splits (proc_open (ds), dict);
414 while (casegrouper_get_next_group (grouper, &group))
415 calc_descriptives (dsc, group, ds);
416 ok = casegrouper_destroy (grouper);
417 ok = proc_commit (ds) && ok;
421 setup_z_trns (dsc, ds);
426 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
434 /* Returns the statistic named by the current token and skips past the token.
435 Returns DSC_NONE if no statistic is given (e.g., subcommand with no
436 specifiers). Emits an error if the current token ID does not name a
438 static enum dsc_statistic
439 match_statistic (struct lexer *lexer)
441 if (lex_token (lexer) == T_ID)
443 enum dsc_statistic stat;
445 for (stat = 0; stat < DSC_N_STATS; stat++)
446 if (lex_match_id (lexer, dsc_info[stat].identifier))
450 lex_error (lexer, _("expecting statistic name: reverting to default"));
458 free_dsc_proc (struct dsc_proc *dsc)
465 for (i = 0; i < dsc->var_cnt; i++)
466 moments_destroy (dsc->vars[i].moments);
473 /* Returns false if NAME is a duplicate of any existing variable name or
474 of any previously-declared z-var name; otherwise returns true. */
476 try_name (const struct dictionary *dict, struct dsc_proc *dsc,
481 if (dict_lookup_var (dict, name) != NULL)
483 for (i = 0; i < dsc->var_cnt; i++)
484 if (!strcasecmp (dsc->vars[i].z_name, name))
489 /* Generates a name for a Z-score variable based on a variable
490 named VAR_NAME, given that *Z_CNT generated variable names are
491 known to already exist. If successful, returns true and
492 copies the new name into Z_NAME. On failure, returns false. */
494 generate_z_varname (const struct dictionary *dict, struct dsc_proc *dsc, char *z_name,
495 const char *var_name, int *z_cnt)
497 char name[VAR_NAME_LEN + 1];
499 /* Try a name based on the original variable name. */
501 str_copy_trunc (name + 1, sizeof name - 1, var_name);
502 if (try_name (dict, dsc, name))
504 strcpy (z_name, name);
508 /* Generate a synthetic name. */
514 sprintf (name, "ZSC%03d", *z_cnt);
515 else if (*z_cnt <= 108)
516 sprintf (name, "STDZ%02d", *z_cnt - 99);
517 else if (*z_cnt <= 117)
518 sprintf (name, "ZZZZ%02d", *z_cnt - 108);
519 else if (*z_cnt <= 126)
520 sprintf (name, "ZQZQ%02d", *z_cnt - 117);
523 msg (SE, _("Ran out of generic names for Z-score variables. "
524 "There are only 126 generic names: ZSC001-ZSC0999, "
525 "STDZ01-STDZ09, ZZZZ01-ZZZZ09, ZQZQ01-ZQZQ09."));
529 if (try_name (dict, dsc, name))
531 strcpy (z_name, name);
538 /* Outputs a table describing the mapping between source
539 variables and Z-score variables. */
541 dump_z_table (struct dsc_proc *dsc)
549 for (i = 0; i < dsc->var_cnt; i++)
550 if (dsc->vars[i].z_name[0] != '\0')
554 t = tab_create (2, cnt + 1);
555 tab_title (t, _("Mapping of variables to corresponding Z-scores."));
556 tab_headers (t, 0, 0, 1, 0);
557 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, cnt);
558 tab_hline (t, TAL_2, 0, 1, 1);
559 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Source"));
560 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Target"));
565 for (i = 0, y = 1; i < dsc->var_cnt; i++)
566 if (dsc->vars[i].z_name[0] != '\0')
568 tab_text (t, 0, y, TAB_LEFT, var_get_name (dsc->vars[i].v));
569 tab_text (t, 1, y++, TAB_LEFT, dsc->vars[i].z_name);
576 /* Transformation function to calculate Z-scores. Will return SYSMIS if any of
577 the following are true: 1) mean or standard deviation is SYSMIS 2) score is
578 SYSMIS 3) score is user missing and they were not included in the original
579 analyis. 4) any of the variables in the original analysis were missing
580 (either system or user-missing values that weren't included).
583 descriptives_trns_proc (void *trns_, struct ccase **c,
584 casenumber case_idx UNUSED)
586 struct dsc_trns *t = trns_;
587 struct dsc_z_score *z;
588 const struct variable **vars;
591 if (t->missing_type == DSC_LISTWISE)
594 for (vars = t->vars; vars < t->vars + t->var_cnt; vars++)
596 double score = case_num (*c, *vars);
597 if (var_is_num_missing (*vars, score, t->exclude))
605 *c = case_unshare (*c);
606 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
608 double input = case_num (*c, z->src_var);
609 double *output = &case_data_rw (*c, z->z_var)->f;
611 if (z->mean == SYSMIS || z->std_dev == SYSMIS || all_sysmis
612 || var_is_num_missing (z->src_var, input, t->exclude))
615 *output = (input - z->mean) / z->std_dev;
617 return TRNS_CONTINUE;
620 /* Frees a descriptives_trns struct. */
622 descriptives_trns_free (void *trns_)
624 struct dsc_trns *t = trns_;
627 assert((t->missing_type != DSC_LISTWISE) ^ (t->vars != NULL));
632 /* Sets up a transformation to calculate Z scores. */
634 setup_z_trns (struct dsc_proc *dsc, struct dataset *ds)
639 for (cnt = i = 0; i < dsc->var_cnt; i++)
640 if (dsc->vars[i].z_name[0] != '\0')
643 t = xmalloc (sizeof *t);
644 t->z_scores = xnmalloc (cnt, sizeof *t->z_scores);
645 t->z_score_cnt = cnt;
646 t->missing_type = dsc->missing_type;
647 t->exclude = dsc->exclude;
648 if ( t->missing_type == DSC_LISTWISE )
650 t->var_cnt = dsc->var_cnt;
651 t->vars = xnmalloc (t->var_cnt, sizeof *t->vars);
652 for (i = 0; i < t->var_cnt; i++)
653 t->vars[i] = dsc->vars[i].v;
661 for (cnt = i = 0; i < dsc->var_cnt; i++)
663 struct dsc_var *dv = &dsc->vars[i];
664 if (dv->z_name[0] != '\0')
666 struct dsc_z_score *z;
667 struct variable *dst_var;
669 dst_var = dict_create_var_assert (dataset_dict (ds), dv->z_name, 0);
670 var_set_label (dst_var, xasprintf (_("Z-score of %s"),
671 var_to_string (dv->v)));
673 z = &t->z_scores[cnt++];
676 z->mean = dv->stats[DSC_MEAN];
677 z->std_dev = dv->stats[DSC_STDDEV];
681 add_transformation (ds,
682 descriptives_trns_proc, descriptives_trns_free, t);
685 /* Statistical calculation. */
687 static bool listwise_missing (struct dsc_proc *dsc, const struct ccase *c);
689 /* Calculates and displays descriptive statistics for the cases
692 calc_descriptives (struct dsc_proc *dsc, struct casereader *group,
695 struct casereader *pass1, *pass2;
699 c = casereader_peek (group, 0);
702 casereader_destroy (group);
705 output_split_file_values (ds, c);
708 group = casereader_create_filter_weight (group, dataset_dict (ds),
712 pass2 = dsc->max_moment <= MOMENT_MEAN ? NULL : casereader_clone (pass1);
714 for (i = 0; i < dsc->var_cnt; i++)
716 struct dsc_var *dv = &dsc->vars[i];
718 dv->valid = dv->missing = 0.0;
719 if (dv->moments != NULL)
720 moments_clear (dv->moments);
724 dsc->missing_listwise = 0.;
727 /* First pass to handle most of the work. */
728 for (; (c = casereader_read (pass1)) != NULL; case_unref (c))
730 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
732 /* Check for missing values. */
733 if (listwise_missing (dsc, c))
735 dsc->missing_listwise += weight;
736 if (dsc->missing_type == DSC_LISTWISE)
739 dsc->valid += weight;
741 for (i = 0; i < dsc->var_cnt; i++)
743 struct dsc_var *dv = &dsc->vars[i];
744 double x = case_num (c, dv->v);
746 if (var_is_num_missing (dv->v, x, dsc->exclude))
748 dv->missing += weight;
752 if (dv->moments != NULL)
753 moments_pass_one (dv->moments, x, weight);
761 if (!casereader_destroy (pass1))
763 casereader_destroy (pass2);
767 /* Second pass for higher-order moments. */
768 if (dsc->max_moment > MOMENT_MEAN)
770 for (; (c = casereader_read (pass2)) != NULL; case_unref (c))
772 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
774 /* Check for missing values. */
775 if (dsc->missing_type == DSC_LISTWISE && listwise_missing (dsc, c))
778 for (i = 0; i < dsc->var_cnt; i++)
780 struct dsc_var *dv = &dsc->vars[i];
781 double x = case_num (c, dv->v);
783 if (var_is_num_missing (dv->v, x, dsc->exclude))
786 if (dv->moments != NULL)
787 moments_pass_two (dv->moments, x, weight);
790 if (!casereader_destroy (pass2))
794 /* Calculate results. */
795 for (i = 0; i < dsc->var_cnt; i++)
797 struct dsc_var *dv = &dsc->vars[i];
801 for (j = 0; j < DSC_N_STATS; j++)
802 dv->stats[j] = SYSMIS;
804 dv->valid = W = dsc->valid - dv->missing;
806 if (dv->moments != NULL)
807 moments_calculate (dv->moments, NULL,
808 &dv->stats[DSC_MEAN], &dv->stats[DSC_VARIANCE],
809 &dv->stats[DSC_SKEWNESS], &dv->stats[DSC_KURTOSIS]);
810 if (dsc->calc_stats & (1ul << DSC_SEMEAN)
811 && dv->stats[DSC_VARIANCE] != SYSMIS && W > 0.)
812 dv->stats[DSC_SEMEAN] = sqrt (dv->stats[DSC_VARIANCE]) / sqrt (W);
813 if (dsc->calc_stats & (1ul << DSC_STDDEV)
814 && dv->stats[DSC_VARIANCE] != SYSMIS)
815 dv->stats[DSC_STDDEV] = sqrt (dv->stats[DSC_VARIANCE]);
816 if (dsc->calc_stats & (1ul << DSC_SEKURT))
817 if (dv->stats[DSC_KURTOSIS] != SYSMIS)
818 dv->stats[DSC_SEKURT] = calc_sekurt (W);
819 if (dsc->calc_stats & (1ul << DSC_SESKEW)
820 && dv->stats[DSC_SKEWNESS] != SYSMIS)
821 dv->stats[DSC_SESKEW] = calc_seskew (W);
822 dv->stats[DSC_RANGE] = ((dv->min == DBL_MAX || dv->max == -DBL_MAX)
823 ? SYSMIS : dv->max - dv->min);
824 dv->stats[DSC_MIN] = dv->min == DBL_MAX ? SYSMIS : dv->min;
825 dv->stats[DSC_MAX] = dv->max == -DBL_MAX ? SYSMIS : dv->max;
826 if (dsc->calc_stats & (1ul << DSC_SUM))
827 dv->stats[DSC_SUM] = W * dv->stats[DSC_MEAN];
830 /* Output results. */
834 /* Returns true if any of the descriptives variables in DSC's
835 variable list have missing values in case C, false otherwise. */
837 listwise_missing (struct dsc_proc *dsc, const struct ccase *c)
841 for (i = 0; i < dsc->var_cnt; i++)
843 struct dsc_var *dv = &dsc->vars[i];
844 double x = case_num (c, dv->v);
846 if (var_is_num_missing (dv->v, x, dsc->exclude))
852 /* Statistical display. */
854 static algo_compare_func descriptives_compare_dsc_vars;
856 /* Displays a table of descriptive statistics for DSC. */
858 display (struct dsc_proc *dsc)
864 nc = 1 + (dsc->format == DSC_SERIAL ? 2 : 1);
865 for (i = 0; i < DSC_N_STATS; i++)
866 if (dsc->show_stats & (1ul << i))
869 if (dsc->sort_by_stat != DSC_NONE)
870 sort (dsc->vars, dsc->var_cnt, sizeof *dsc->vars,
871 descriptives_compare_dsc_vars, dsc);
873 t = tab_create (nc, dsc->var_cnt + 1);
874 tab_headers (t, 1, 0, 1, 0);
875 tab_box (t, TAL_1, TAL_1, -1, -1, 0, 0, nc - 1, dsc->var_cnt);
876 tab_box (t, -1, -1, -1, TAL_1, 1, 0, nc - 1, dsc->var_cnt);
877 tab_hline (t, TAL_2, 0, nc - 1, 1);
878 tab_vline (t, TAL_2, 1, 0, dsc->var_cnt);
881 tab_text (t, nc++, 0, TAB_LEFT | TAT_TITLE, _("Variable"));
882 if (dsc->format == DSC_SERIAL)
884 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Valid N"));
885 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Missing N"));
888 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, "N");
890 for (i = 0; i < DSC_N_STATS; i++)
891 if (dsc->show_stats & (1ul << i))
893 const char *title = gettext (dsc_info[i].name);
894 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, title);
897 for (i = 0; i < dsc->var_cnt; i++)
899 struct dsc_var *dv = &dsc->vars[i];
903 tab_text (t, nc++, i + 1, TAB_LEFT, var_get_name (dv->v));
904 tab_text_format (t, nc++, i + 1, 0, "%g", dv->valid);
905 if (dsc->format == DSC_SERIAL)
906 tab_text_format (t, nc++, i + 1, 0, "%g", dv->missing);
908 for (j = 0; j < DSC_N_STATS; j++)
909 if (dsc->show_stats & (1ul << j))
910 tab_double (t, nc++, i + 1, TAB_NONE, dv->stats[j], NULL);
913 tab_title (t, _("Valid cases = %g; cases with missing value(s) = %g."),
914 dsc->valid, dsc->missing_listwise);
919 /* Compares `struct dsc_var's A and B according to the ordering
922 descriptives_compare_dsc_vars (const void *a_, const void *b_, const void *dsc_)
924 const struct dsc_var *a = a_;
925 const struct dsc_var *b = b_;
926 const struct dsc_proc *dsc = dsc_;
930 if (dsc->sort_by_stat == DSC_NAME)
931 result = strcasecmp (var_get_name (a->v), var_get_name (b->v));
934 double as = a->stats[dsc->sort_by_stat];
935 double bs = b->stats[dsc->sort_by_stat];
937 result = as < bs ? -1 : as > bs;
940 if (!dsc->sort_ascending)