1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-2000, 2009-2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
23 #include "data/casegrouper.h"
24 #include "data/casereader.h"
25 #include "data/casewriter.h"
26 #include "data/dataset.h"
27 #include "data/dictionary.h"
28 #include "data/transformations.h"
29 #include "data/variable.h"
30 #include "language/command.h"
31 #include "language/dictionary/split-file.h"
32 #include "language/lexer/lexer.h"
33 #include "language/lexer/variable-parser.h"
34 #include "libpspp/array.h"
35 #include "libpspp/assertion.h"
36 #include "libpspp/compiler.h"
37 #include "libpspp/i18n.h"
38 #include "libpspp/message.h"
39 #include "math/moments.h"
40 #include "output/tab.h"
42 #include "gl/xalloc.h"
45 #define _(msgid) gettext (msgid)
46 #define N_(msgid) msgid
48 /* DESCRIPTIVES private data. */
52 /* Handling of missing values. */
55 DSC_VARIABLE, /* Handle missing values on a per-variable basis. */
56 DSC_LISTWISE /* Discard entire case if any variable is missing. */
59 /* Describes properties of a distribution for the purpose of
60 calculating a Z-score. */
63 const struct variable *src_var; /* Variable on which z-score is based. */
64 struct variable *z_var; /* New z-score variable. */
65 double mean; /* Distribution mean. */
66 double std_dev; /* Distribution standard deviation. */
69 /* DESCRIPTIVES transformation (for calculating Z-scores). */
72 struct dsc_z_score *z_scores; /* Array of Z-scores. */
73 int z_score_cnt; /* Number of Z-scores. */
74 const struct variable **vars; /* Variables for listwise missing checks. */
75 size_t var_cnt; /* Number of variables. */
76 enum dsc_missing_type missing_type; /* Treatment of missing values. */
77 enum mv_class exclude; /* Classes of missing values to exclude. */
78 struct casereader *z_reader; /* Reader for count, mean, stddev. */
79 casenumber count; /* Number left in this SPLIT FILE group.*/
83 /* Statistics. Used as bit indexes, so must be 32 or fewer. */
86 DSC_MEAN = 0, DSC_SEMEAN, DSC_STDDEV, DSC_VARIANCE, DSC_KURTOSIS,
87 DSC_SEKURT, DSC_SKEWNESS, DSC_SESKEW, DSC_RANGE, DSC_MIN,
88 DSC_MAX, DSC_SUM, DSC_N_STATS,
90 /* Only valid as sort criteria. */
91 DSC_NAME = -2, /* Sort by name. */
92 DSC_NONE = -1 /* Unsorted. */
95 /* Describes one statistic. */
96 struct dsc_statistic_info
98 const char *identifier; /* Identifier. */
99 const char *name; /* Full name. */
100 enum moment moment; /* Highest moment needed to calculate. */
103 /* Table of statistics, indexed by DSC_*. */
104 static const struct dsc_statistic_info dsc_info[DSC_N_STATS] =
106 {"MEAN", N_("Mean"), MOMENT_MEAN},
107 {"SEMEAN", N_("S.E. Mean"), MOMENT_VARIANCE},
108 {"STDDEV", N_("Std Dev"), MOMENT_VARIANCE},
109 {"VARIANCE", N_("Variance"), MOMENT_VARIANCE},
110 {"KURTOSIS", N_("Kurtosis"), MOMENT_KURTOSIS},
111 {"SEKURTOSIS", N_("S.E. Kurt"), MOMENT_NONE},
112 {"SKEWNESS", N_("Skewness"), MOMENT_SKEWNESS},
113 {"SESKEWNESS", N_("S.E. Skew"), MOMENT_NONE},
114 {"RANGE", N_("Range"), MOMENT_NONE},
115 {"MINIMUM", N_("Minimum"), MOMENT_NONE},
116 {"MAXIMUM", N_("Maximum"), MOMENT_NONE},
117 {"SUM", N_("Sum"), MOMENT_MEAN},
120 /* Statistics calculated by default if none are explicitly
122 #define DEFAULT_STATS \
123 ((1ul << DSC_MEAN) | (1ul << DSC_STDDEV) | (1ul << DSC_MIN) \
126 /* A variable specified on DESCRIPTIVES. */
129 const struct variable *v; /* Variable to calculate on. */
130 char *z_name; /* Name for z-score variable. */
131 double valid, missing; /* Valid, missing counts. */
132 struct moments *moments; /* Moments. */
133 double min, max; /* Maximum and mimimum values. */
134 double stats[DSC_N_STATS]; /* All the stats' values. */
140 DSC_LINE, /* Abbreviated format. */
141 DSC_SERIAL /* Long format. */
144 /* A DESCRIPTIVES procedure. */
147 /* Per-variable info. */
148 struct dsc_var *vars; /* Variables. */
149 size_t var_cnt; /* Number of variables. */
152 enum dsc_missing_type missing_type; /* Treatment of missing values. */
153 enum mv_class exclude; /* Classes of missing values to exclude. */
154 int show_var_labels; /* Nonzero to show variable labels. */
155 int show_index; /* Nonzero to show variable index. */
156 enum dsc_format format; /* Output format. */
158 /* Accumulated results. */
159 double missing_listwise; /* Sum of weights of cases missing listwise. */
160 double valid; /* Sum of weights of valid cases. */
161 bool bad_warn; /* Warn if bad weight found. */
162 enum dsc_statistic sort_by_stat; /* Statistic to sort by; -1: name. */
163 int sort_ascending; /* !0: ascending order; 0: descending. */
164 unsigned long show_stats; /* Statistics to display. */
165 unsigned long calc_stats; /* Statistics to calculate. */
166 enum moment max_moment; /* Highest moment needed for stats. */
169 struct casewriter *z_writer; /* Mean and stddev per SPLIT FILE group. */
173 static enum dsc_statistic match_statistic (struct lexer *);
174 static void free_dsc_proc (struct dsc_proc *);
176 /* Z-score functions. */
177 static bool try_name (const struct dictionary *dict,
178 struct dsc_proc *dsc, const char *name);
179 static char *generate_z_varname (const struct dictionary *dict,
180 struct dsc_proc *dsc,
181 const char *name, int *z_cnt);
182 static void dump_z_table (struct dsc_proc *);
183 static void setup_z_trns (struct dsc_proc *, struct dataset *);
185 /* Procedure execution functions. */
186 static void calc_descriptives (struct dsc_proc *, struct casereader *,
188 static void display (struct dsc_proc *dsc);
190 /* Parser and outline. */
192 /* Handles DESCRIPTIVES. */
194 cmd_descriptives (struct lexer *lexer, struct dataset *ds)
196 struct dictionary *dict = dataset_dict (ds);
197 struct dsc_proc *dsc;
198 const struct variable **vars = NULL;
200 int save_z_scores = 0;
205 struct casegrouper *grouper;
206 struct casereader *group;
208 /* Create and initialize dsc. */
209 dsc = xmalloc (sizeof *dsc);
212 dsc->missing_type = DSC_VARIABLE;
213 dsc->exclude = MV_ANY;
214 dsc->show_var_labels = 1;
216 dsc->format = DSC_LINE;
217 dsc->missing_listwise = 0.;
220 dsc->sort_by_stat = DSC_NONE;
221 dsc->sort_ascending = 1;
222 dsc->show_stats = dsc->calc_stats = DEFAULT_STATS;
223 dsc->z_writer = NULL;
225 /* Parse DESCRIPTIVES. */
226 while (lex_token (lexer) != T_ENDCMD)
228 if (lex_match_id (lexer, "MISSING"))
230 lex_match (lexer, T_EQUALS);
231 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
233 if (lex_match_id (lexer, "VARIABLE"))
234 dsc->missing_type = DSC_VARIABLE;
235 else if (lex_match_id (lexer, "LISTWISE"))
236 dsc->missing_type = DSC_LISTWISE;
237 else if (lex_match_id (lexer, "INCLUDE"))
238 dsc->exclude = MV_SYSTEM;
241 lex_error (lexer, NULL);
244 lex_match (lexer, T_COMMA);
247 else if (lex_match_id (lexer, "SAVE"))
249 else if (lex_match_id (lexer, "FORMAT"))
251 lex_match (lexer, T_EQUALS);
252 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
254 if (lex_match_id (lexer, "LABELS"))
255 dsc->show_var_labels = 1;
256 else if (lex_match_id (lexer, "NOLABELS"))
257 dsc->show_var_labels = 0;
258 else if (lex_match_id (lexer, "INDEX"))
260 else if (lex_match_id (lexer, "NOINDEX"))
262 else if (lex_match_id (lexer, "LINE"))
263 dsc->format = DSC_LINE;
264 else if (lex_match_id (lexer, "SERIAL"))
265 dsc->format = DSC_SERIAL;
268 lex_error (lexer, NULL);
271 lex_match (lexer, T_COMMA);
274 else if (lex_match_id (lexer, "STATISTICS"))
276 lex_match (lexer, T_EQUALS);
278 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
280 if (lex_match (lexer, T_ALL))
281 dsc->show_stats |= (1ul << DSC_N_STATS) - 1;
282 else if (lex_match_id (lexer, "DEFAULT"))
283 dsc->show_stats |= DEFAULT_STATS;
285 dsc->show_stats |= 1ul << (match_statistic (lexer));
286 lex_match (lexer, T_COMMA);
288 if (dsc->show_stats == 0)
289 dsc->show_stats = DEFAULT_STATS;
291 else if (lex_match_id (lexer, "SORT"))
293 lex_match (lexer, T_EQUALS);
294 if (lex_match_id (lexer, "NAME"))
295 dsc->sort_by_stat = DSC_NAME;
298 dsc->sort_by_stat = match_statistic (lexer);
299 if (dsc->sort_by_stat == DSC_NONE )
300 dsc->sort_by_stat = DSC_MEAN;
302 if (lex_match (lexer, T_LPAREN))
304 if (lex_match_id (lexer, "A"))
305 dsc->sort_ascending = 1;
306 else if (lex_match_id (lexer, "D"))
307 dsc->sort_ascending = 0;
309 lex_error (lexer, NULL);
310 lex_force_match (lexer, T_RPAREN);
313 else if (var_cnt == 0)
315 if (lex_next_token (lexer, 1) == T_EQUALS)
317 lex_match_id (lexer, "VARIABLES");
318 lex_match (lexer, T_EQUALS);
321 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
325 if (!parse_variables_const (lexer, dict, &vars, &var_cnt,
326 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
329 dsc->vars = xnrealloc ((void *)dsc->vars, var_cnt, sizeof *dsc->vars);
330 for (i = dsc->var_cnt; i < var_cnt; i++)
332 struct dsc_var *dv = &dsc->vars[i];
337 dsc->var_cnt = var_cnt;
339 if (lex_match (lexer, T_LPAREN))
341 if (lex_token (lexer) != T_ID)
343 lex_error (lexer, NULL);
346 if (try_name (dict, dsc, lex_tokcstr (lexer)))
348 struct dsc_var *dsc_var = &dsc->vars[dsc->var_cnt - 1];
349 dsc_var->z_name = xstrdup (lex_tokcstr (lexer));
353 msg (SE, _("Z-score variable name %s would be"
354 " a duplicate variable name."), lex_tokcstr (lexer));
356 if (!lex_force_match (lexer, T_RPAREN))
363 lex_error (lexer, NULL);
367 lex_match (lexer, T_SLASH);
371 msg (SE, _("No variables specified."));
375 /* Construct z-score varnames, show translation table. */
376 if (z_cnt || save_z_scores)
378 struct caseproto *proto;
384 for (i = 0; i < dsc->var_cnt; i++)
386 struct dsc_var *dsc_var = &dsc->vars[i];
387 if (dsc_var->z_name == NULL)
389 const char *name = var_get_name (dsc_var->v);
390 dsc_var->z_name = generate_z_varname (dict, dsc, name,
392 if (dsc_var->z_name == NULL)
400 /* It would be better to handle Z scores correctly (however we define
401 that) when TEMPORARY is in effect, but in the meantime this at least
402 prevents a use-after-free error. See bug #38786. */
403 if (proc_make_temporary_transformations_permanent (ds))
404 msg (SW, _("DESCRIPTIVES with Z scores ignores TEMPORARY. "
405 "Temporary transformations will be made permanent."));
407 proto = caseproto_create ();
408 for (i = 0; i < 1 + 2 * z_cnt; i++)
409 proto = caseproto_add_width (proto, 0);
410 dsc->z_writer = autopaging_writer_create (proto);
411 caseproto_unref (proto);
416 /* Figure out statistics to display. */
417 if (dsc->show_stats & (1ul << DSC_SKEWNESS))
418 dsc->show_stats |= 1ul << DSC_SESKEW;
419 if (dsc->show_stats & (1ul << DSC_KURTOSIS))
420 dsc->show_stats |= 1ul << DSC_SEKURT;
422 /* Figure out which statistics to calculate. */
423 dsc->calc_stats = dsc->show_stats;
425 dsc->calc_stats |= (1ul << DSC_MEAN) | (1ul << DSC_STDDEV);
426 if (dsc->sort_by_stat >= 0)
427 dsc->calc_stats |= 1ul << dsc->sort_by_stat;
428 if (dsc->show_stats & (1ul << DSC_SESKEW))
429 dsc->calc_stats |= 1ul << DSC_SKEWNESS;
430 if (dsc->show_stats & (1ul << DSC_SEKURT))
431 dsc->calc_stats |= 1ul << DSC_KURTOSIS;
433 /* Figure out maximum moment needed and allocate moments for
435 dsc->max_moment = MOMENT_NONE;
436 for (i = 0; i < DSC_N_STATS; i++)
437 if (dsc->calc_stats & (1ul << i) && dsc_info[i].moment > dsc->max_moment)
438 dsc->max_moment = dsc_info[i].moment;
439 if (dsc->max_moment != MOMENT_NONE)
440 for (i = 0; i < dsc->var_cnt; i++)
441 dsc->vars[i].moments = moments_create (dsc->max_moment);
444 grouper = casegrouper_create_splits (proc_open_filtering (ds, z_cnt == 0),
446 while (casegrouper_get_next_group (grouper, &group))
447 calc_descriptives (dsc, group, ds);
448 ok = casegrouper_destroy (grouper);
449 ok = proc_commit (ds) && ok;
453 setup_z_trns (dsc, ds);
458 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
466 /* Returns the statistic named by the current token and skips past the token.
467 Returns DSC_NONE if no statistic is given (e.g., subcommand with no
468 specifiers). Emits an error if the current token ID does not name a
470 static enum dsc_statistic
471 match_statistic (struct lexer *lexer)
473 if (lex_token (lexer) == T_ID)
475 enum dsc_statistic stat;
477 for (stat = 0; stat < DSC_N_STATS; stat++)
478 if (lex_match_id (lexer, dsc_info[stat].identifier))
482 lex_error (lexer, _("expecting statistic name: reverting to default"));
490 free_dsc_proc (struct dsc_proc *dsc)
497 for (i = 0; i < dsc->var_cnt; i++)
499 struct dsc_var *dsc_var = &dsc->vars[i];
500 free (dsc_var->z_name);
501 moments_destroy (dsc_var->moments);
503 casewriter_destroy (dsc->z_writer);
510 /* Returns false if NAME is a duplicate of any existing variable name or
511 of any previously-declared z-var name; otherwise returns true. */
513 try_name (const struct dictionary *dict, struct dsc_proc *dsc,
518 if (dict_lookup_var (dict, name) != NULL)
520 for (i = 0; i < dsc->var_cnt; i++)
522 struct dsc_var *dsc_var = &dsc->vars[i];
523 if (dsc_var->z_name != NULL && !utf8_strcasecmp (dsc_var->z_name, name))
529 /* Generates a name for a Z-score variable based on a variable
530 named VAR_NAME, given that *Z_CNT generated variable names are
531 known to already exist. If successful, returns the new name
532 as a dynamically allocated string. On failure, returns NULL. */
534 generate_z_varname (const struct dictionary *dict, struct dsc_proc *dsc,
535 const char *var_name, int *z_cnt)
537 char *z_name, *trunc_name;
539 /* Try a name based on the original variable name. */
540 z_name = xasprintf ("Z%s", var_name);
541 trunc_name = utf8_encoding_trunc (z_name, dict_get_encoding (dict),
544 if (try_name (dict, dsc, trunc_name))
548 /* Generate a synthetic name. */
556 sprintf (name, "ZSC%03d", *z_cnt);
557 else if (*z_cnt <= 108)
558 sprintf (name, "STDZ%02d", *z_cnt - 99);
559 else if (*z_cnt <= 117)
560 sprintf (name, "ZZZZ%02d", *z_cnt - 108);
561 else if (*z_cnt <= 126)
562 sprintf (name, "ZQZQ%02d", *z_cnt - 117);
565 msg (SE, _("Ran out of generic names for Z-score variables. "
566 "There are only 126 generic names: ZSC001-ZSC0999, "
567 "STDZ01-STDZ09, ZZZZ01-ZZZZ09, ZQZQ01-ZQZQ09."));
571 if (try_name (dict, dsc, name))
572 return xstrdup (name);
577 /* Outputs a table describing the mapping between source
578 variables and Z-score variables. */
580 dump_z_table (struct dsc_proc *dsc)
588 for (i = 0; i < dsc->var_cnt; i++)
589 if (dsc->vars[i].z_name != NULL)
593 t = tab_create (2, cnt + 1);
594 tab_title (t, _("Mapping of variables to corresponding Z-scores."));
595 tab_headers (t, 0, 0, 1, 0);
596 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, cnt);
597 tab_hline (t, TAL_2, 0, 1, 1);
598 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Source"));
599 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Target"));
604 for (i = 0, y = 1; i < dsc->var_cnt; i++)
605 if (dsc->vars[i].z_name != NULL)
607 tab_text (t, 0, y, TAB_LEFT, var_to_string (dsc->vars[i].v));
608 tab_text (t, 1, y++, TAB_LEFT, dsc->vars[i].z_name);
615 /* Transformation function to calculate Z-scores. Will return SYSMIS if any of
616 the following are true: 1) mean or standard deviation is SYSMIS 2) score is
617 SYSMIS 3) score is user missing and they were not included in the original
618 analyis. 4) any of the variables in the original analysis were missing
619 (either system or user-missing values that weren't included).
622 descriptives_trns_proc (void *trns_, struct ccase **c,
623 casenumber case_idx UNUSED)
625 struct dsc_trns *t = trns_;
626 struct dsc_z_score *z;
627 const struct variable **vars;
632 struct ccase *z_case;
634 z_case = casereader_read (t->z_reader);
639 t->count = case_num_idx (z_case, z_idx++);
640 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
642 z->mean = case_num_idx (z_case, z_idx++);
643 z->std_dev = case_num_idx (z_case, z_idx++);
651 msg (SE, _("Internal error processing Z scores"));
654 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
655 z->mean = z->std_dev = SYSMIS;
660 if (t->missing_type == DSC_LISTWISE)
663 for (vars = t->vars; vars < t->vars + t->var_cnt; vars++)
665 double score = case_num (*c, *vars);
666 if (var_is_num_missing (*vars, score, t->exclude))
674 *c = case_unshare (*c);
675 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
677 double input = case_num (*c, z->src_var);
678 double *output = &case_data_rw (*c, z->z_var)->f;
680 if (z->mean == SYSMIS || z->std_dev == SYSMIS || all_sysmis
681 || var_is_num_missing (z->src_var, input, t->exclude))
684 *output = (input - z->mean) / z->std_dev;
686 return TRNS_CONTINUE;
689 /* Frees a descriptives_trns struct. */
691 descriptives_trns_free (void *trns_)
693 struct dsc_trns *t = trns_;
694 bool ok = t->ok && !casereader_error (t->z_reader);
697 casereader_destroy (t->z_reader);
698 assert((t->missing_type != DSC_LISTWISE) ^ (t->vars != NULL));
705 /* Sets up a transformation to calculate Z scores. */
707 setup_z_trns (struct dsc_proc *dsc, struct dataset *ds)
712 for (cnt = i = 0; i < dsc->var_cnt; i++)
713 if (dsc->vars[i].z_name != NULL)
716 t = xmalloc (sizeof *t);
717 t->z_scores = xnmalloc (cnt, sizeof *t->z_scores);
718 t->z_score_cnt = cnt;
719 t->missing_type = dsc->missing_type;
720 t->exclude = dsc->exclude;
721 if ( t->missing_type == DSC_LISTWISE )
723 t->var_cnt = dsc->var_cnt;
724 t->vars = xnmalloc (t->var_cnt, sizeof *t->vars);
725 for (i = 0; i < t->var_cnt; i++)
726 t->vars[i] = dsc->vars[i].v;
733 t->z_reader = casewriter_make_reader (dsc->z_writer);
736 dsc->z_writer = NULL;
738 for (cnt = i = 0; i < dsc->var_cnt; i++)
740 struct dsc_var *dv = &dsc->vars[i];
741 if (dv->z_name != NULL)
743 struct dsc_z_score *z;
744 struct variable *dst_var;
747 dst_var = dict_create_var_assert (dataset_dict (ds), dv->z_name, 0);
749 label = xasprintf (_("Z-score of %s"),var_to_string (dv->v));
750 var_set_label (dst_var, label, false);
753 z = &t->z_scores[cnt++];
759 add_transformation (ds,
760 descriptives_trns_proc, descriptives_trns_free, t);
763 /* Statistical calculation. */
765 static bool listwise_missing (struct dsc_proc *dsc, const struct ccase *c);
767 /* Calculates and displays descriptive statistics for the cases
770 calc_descriptives (struct dsc_proc *dsc, struct casereader *group,
773 struct casereader *pass1, *pass2;
779 c = casereader_peek (group, 0);
782 casereader_destroy (group);
785 output_split_file_values (ds, c);
788 group = casereader_create_filter_weight (group, dataset_dict (ds),
792 pass2 = dsc->max_moment <= MOMENT_MEAN ? NULL : casereader_clone (pass1);
794 for (i = 0; i < dsc->var_cnt; i++)
796 struct dsc_var *dv = &dsc->vars[i];
798 dv->valid = dv->missing = 0.0;
799 if (dv->moments != NULL)
800 moments_clear (dv->moments);
804 dsc->missing_listwise = 0.;
807 /* First pass to handle most of the work. */
809 for (; (c = casereader_read (pass1)) != NULL; case_unref (c))
811 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
813 /* Check for missing values. */
814 if (listwise_missing (dsc, c))
816 dsc->missing_listwise += weight;
817 if (dsc->missing_type == DSC_LISTWISE)
820 dsc->valid += weight;
822 for (i = 0; i < dsc->var_cnt; i++)
824 struct dsc_var *dv = &dsc->vars[i];
825 double x = case_num (c, dv->v);
827 if (var_is_num_missing (dv->v, x, dsc->exclude))
829 dv->missing += weight;
833 if (dv->moments != NULL)
834 moments_pass_one (dv->moments, x, weight);
844 if (!casereader_destroy (pass1))
846 casereader_destroy (pass2);
850 /* Second pass for higher-order moments. */
851 if (dsc->max_moment > MOMENT_MEAN)
853 for (; (c = casereader_read (pass2)) != NULL; case_unref (c))
855 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
857 /* Check for missing values. */
858 if (dsc->missing_type == DSC_LISTWISE && listwise_missing (dsc, c))
861 for (i = 0; i < dsc->var_cnt; i++)
863 struct dsc_var *dv = &dsc->vars[i];
864 double x = case_num (c, dv->v);
866 if (var_is_num_missing (dv->v, x, dsc->exclude))
869 if (dv->moments != NULL)
870 moments_pass_two (dv->moments, x, weight);
873 if (!casereader_destroy (pass2))
877 /* Calculate results. */
880 c = case_create (casewriter_get_proto (dsc->z_writer));
882 case_data_rw_idx (c, z_idx++)->f = count;
887 for (i = 0; i < dsc->var_cnt; i++)
889 struct dsc_var *dv = &dsc->vars[i];
893 for (j = 0; j < DSC_N_STATS; j++)
894 dv->stats[j] = SYSMIS;
896 dv->valid = W = dsc->valid - dv->missing;
898 if (dv->moments != NULL)
899 moments_calculate (dv->moments, NULL,
900 &dv->stats[DSC_MEAN], &dv->stats[DSC_VARIANCE],
901 &dv->stats[DSC_SKEWNESS], &dv->stats[DSC_KURTOSIS]);
902 if (dsc->calc_stats & (1ul << DSC_SEMEAN)
903 && dv->stats[DSC_VARIANCE] != SYSMIS && W > 0.)
904 dv->stats[DSC_SEMEAN] = sqrt (dv->stats[DSC_VARIANCE]) / sqrt (W);
905 if (dsc->calc_stats & (1ul << DSC_STDDEV)
906 && dv->stats[DSC_VARIANCE] != SYSMIS)
907 dv->stats[DSC_STDDEV] = sqrt (dv->stats[DSC_VARIANCE]);
908 if (dsc->calc_stats & (1ul << DSC_SEKURT))
909 if (dv->stats[DSC_KURTOSIS] != SYSMIS)
910 dv->stats[DSC_SEKURT] = calc_sekurt (W);
911 if (dsc->calc_stats & (1ul << DSC_SESKEW)
912 && dv->stats[DSC_SKEWNESS] != SYSMIS)
913 dv->stats[DSC_SESKEW] = calc_seskew (W);
914 dv->stats[DSC_RANGE] = ((dv->min == DBL_MAX || dv->max == -DBL_MAX)
915 ? SYSMIS : dv->max - dv->min);
916 dv->stats[DSC_MIN] = dv->min == DBL_MAX ? SYSMIS : dv->min;
917 dv->stats[DSC_MAX] = dv->max == -DBL_MAX ? SYSMIS : dv->max;
918 if (dsc->calc_stats & (1ul << DSC_SUM))
919 dv->stats[DSC_SUM] = W * dv->stats[DSC_MEAN];
923 case_data_rw_idx (c, z_idx++)->f = dv->stats[DSC_MEAN];
924 case_data_rw_idx (c, z_idx++)->f = dv->stats[DSC_STDDEV];
929 casewriter_write (dsc->z_writer, c);
931 /* Output results. */
935 /* Returns true if any of the descriptives variables in DSC's
936 variable list have missing values in case C, false otherwise. */
938 listwise_missing (struct dsc_proc *dsc, const struct ccase *c)
942 for (i = 0; i < dsc->var_cnt; i++)
944 struct dsc_var *dv = &dsc->vars[i];
945 double x = case_num (c, dv->v);
947 if (var_is_num_missing (dv->v, x, dsc->exclude))
953 /* Statistical display. */
955 static algo_compare_func descriptives_compare_dsc_vars;
957 /* Displays a table of descriptive statistics for DSC. */
959 display (struct dsc_proc *dsc)
965 nc = 1 + (dsc->format == DSC_SERIAL ? 2 : 1);
966 for (i = 0; i < DSC_N_STATS; i++)
967 if (dsc->show_stats & (1ul << i))
970 if (dsc->sort_by_stat != DSC_NONE)
971 sort (dsc->vars, dsc->var_cnt, sizeof *dsc->vars,
972 descriptives_compare_dsc_vars, dsc);
974 t = tab_create (nc, dsc->var_cnt + 1);
975 tab_headers (t, 1, 0, 1, 0);
976 tab_box (t, TAL_1, TAL_1, -1, -1, 0, 0, nc - 1, dsc->var_cnt);
977 tab_box (t, -1, -1, -1, TAL_1, 1, 0, nc - 1, dsc->var_cnt);
978 tab_hline (t, TAL_2, 0, nc - 1, 1);
979 tab_vline (t, TAL_2, 1, 0, dsc->var_cnt);
982 tab_text (t, nc++, 0, TAB_LEFT | TAT_TITLE, _("Variable"));
983 if (dsc->format == DSC_SERIAL)
985 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Valid N"));
986 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Missing N"));
989 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, "N");
991 for (i = 0; i < DSC_N_STATS; i++)
992 if (dsc->show_stats & (1ul << i))
994 const char *title = gettext (dsc_info[i].name);
995 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, title);
998 for (i = 0; i < dsc->var_cnt; i++)
1000 struct dsc_var *dv = &dsc->vars[i];
1004 tab_text (t, nc++, i + 1, TAB_LEFT, var_to_string (dv->v));
1005 tab_text_format (t, nc++, i + 1, 0, "%g", dv->valid);
1006 if (dsc->format == DSC_SERIAL)
1007 tab_text_format (t, nc++, i + 1, 0, "%g", dv->missing);
1009 for (j = 0; j < DSC_N_STATS; j++)
1010 if (dsc->show_stats & (1ul << j))
1011 tab_double (t, nc++, i + 1, TAB_NONE, dv->stats[j], NULL);
1014 tab_title (t, _("Valid cases = %g; cases with missing value(s) = %g."),
1015 dsc->valid, dsc->missing_listwise);
1020 /* Compares `struct dsc_var's A and B according to the ordering
1021 specified by CMD. */
1023 descriptives_compare_dsc_vars (const void *a_, const void *b_, const void *dsc_)
1025 const struct dsc_var *a = a_;
1026 const struct dsc_var *b = b_;
1027 const struct dsc_proc *dsc = dsc_;
1031 if (dsc->sort_by_stat == DSC_NAME)
1032 result = utf8_strcasecmp (var_get_name (a->v), var_get_name (b->v));
1035 double as = a->stats[dsc->sort_by_stat];
1036 double bs = b->stats[dsc->sort_by_stat];
1038 result = as < bs ? -1 : as > bs;
1041 if (!dsc->sort_ascending)