1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-2000, 2009-2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
24 #include "data/casegrouper.h"
25 #include "data/casereader.h"
26 #include "data/casewriter.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/transformations.h"
30 #include "data/variable.h"
31 #include "language/command.h"
32 #include "language/dictionary/split-file.h"
33 #include "language/lexer/lexer.h"
34 #include "language/lexer/variable-parser.h"
35 #include "libpspp/array.h"
36 #include "libpspp/assertion.h"
37 #include "libpspp/compiler.h"
38 #include "libpspp/i18n.h"
39 #include "libpspp/message.h"
40 #include "math/moments.h"
41 #include "output/tab.h"
43 #include "gl/xalloc.h"
46 #define _(msgid) gettext (msgid)
47 #define N_(msgid) msgid
49 /* DESCRIPTIVES private data. */
53 /* Handling of missing values. */
56 DSC_VARIABLE, /* Handle missing values on a per-variable basis. */
57 DSC_LISTWISE /* Discard entire case if any variable is missing. */
60 /* Describes properties of a distribution for the purpose of
61 calculating a Z-score. */
64 const struct variable *src_var; /* Variable on which z-score is based. */
65 struct variable *z_var; /* New z-score variable. */
66 double mean; /* Distribution mean. */
67 double std_dev; /* Distribution standard deviation. */
70 /* DESCRIPTIVES transformation (for calculating Z-scores). */
73 struct dsc_z_score *z_scores; /* Array of Z-scores. */
74 int z_score_cnt; /* Number of Z-scores. */
75 const struct variable **vars; /* Variables for listwise missing checks. */
76 size_t var_cnt; /* Number of variables. */
77 enum dsc_missing_type missing_type; /* Treatment of missing values. */
78 enum mv_class exclude; /* Classes of missing values to exclude. */
79 struct casereader *z_reader; /* Reader for count, mean, stddev. */
80 casenumber count; /* Number left in this SPLIT FILE group.*/
84 /* Statistics. Used as bit indexes, so must be 32 or fewer. */
87 DSC_MEAN = 0, DSC_SEMEAN, DSC_STDDEV, DSC_VARIANCE, DSC_KURTOSIS,
88 DSC_SEKURT, DSC_SKEWNESS, DSC_SESKEW, DSC_RANGE, DSC_MIN,
89 DSC_MAX, DSC_SUM, DSC_N_STATS,
91 /* Only valid as sort criteria. */
92 DSC_NAME = -2, /* Sort by name. */
93 DSC_NONE = -1 /* Unsorted. */
96 /* Describes one statistic. */
97 struct dsc_statistic_info
99 const char *identifier; /* Identifier. */
100 const char *name; /* Full name. */
101 enum moment moment; /* Highest moment needed to calculate. */
104 /* Table of statistics, indexed by DSC_*. */
105 static const struct dsc_statistic_info dsc_info[DSC_N_STATS] =
107 {"MEAN", N_("Mean"), MOMENT_MEAN},
108 {"SEMEAN", N_("S.E. Mean"), MOMENT_VARIANCE},
109 {"STDDEV", N_("Std Dev"), MOMENT_VARIANCE},
110 {"VARIANCE", N_("Variance"), MOMENT_VARIANCE},
111 {"KURTOSIS", N_("Kurtosis"), MOMENT_KURTOSIS},
112 {"SEKURTOSIS", N_("S.E. Kurt"), MOMENT_NONE},
113 {"SKEWNESS", N_("Skewness"), MOMENT_SKEWNESS},
114 {"SESKEWNESS", N_("S.E. Skew"), MOMENT_NONE},
115 {"RANGE", N_("Range"), MOMENT_NONE},
116 {"MINIMUM", N_("Minimum"), MOMENT_NONE},
117 {"MAXIMUM", N_("Maximum"), MOMENT_NONE},
118 {"SUM", N_("Sum"), MOMENT_MEAN},
121 /* Statistics calculated by default if none are explicitly
123 #define DEFAULT_STATS \
124 ((1ul << DSC_MEAN) | (1ul << DSC_STDDEV) | (1ul << DSC_MIN) \
127 /* A variable specified on DESCRIPTIVES. */
130 const struct variable *v; /* Variable to calculate on. */
131 char *z_name; /* Name for z-score variable. */
132 double valid, missing; /* Valid, missing counts. */
133 struct moments *moments; /* Moments. */
134 double min, max; /* Maximum and mimimum values. */
135 double stats[DSC_N_STATS]; /* All the stats' values. */
141 DSC_LINE, /* Abbreviated format. */
142 DSC_SERIAL /* Long format. */
145 /* A DESCRIPTIVES procedure. */
148 /* Per-variable info. */
149 struct dsc_var *vars; /* Variables. */
150 size_t var_cnt; /* Number of variables. */
153 enum dsc_missing_type missing_type; /* Treatment of missing values. */
154 enum mv_class exclude; /* Classes of missing values to exclude. */
155 int show_var_labels; /* Nonzero to show variable labels. */
156 int show_index; /* Nonzero to show variable index. */
157 enum dsc_format format; /* Output format. */
159 /* Accumulated results. */
160 double missing_listwise; /* Sum of weights of cases missing listwise. */
161 double valid; /* Sum of weights of valid cases. */
162 bool bad_warn; /* Warn if bad weight found. */
163 enum dsc_statistic sort_by_stat; /* Statistic to sort by; -1: name. */
164 int sort_ascending; /* !0: ascending order; 0: descending. */
165 unsigned long show_stats; /* Statistics to display. */
166 unsigned long calc_stats; /* Statistics to calculate. */
167 enum moment max_moment; /* Highest moment needed for stats. */
170 struct casewriter *z_writer; /* Mean and stddev per SPLIT FILE group. */
174 static enum dsc_statistic match_statistic (struct lexer *);
175 static void free_dsc_proc (struct dsc_proc *);
177 /* Z-score functions. */
178 static bool try_name (const struct dictionary *dict,
179 struct dsc_proc *dsc, const char *name);
180 static char *generate_z_varname (const struct dictionary *dict,
181 struct dsc_proc *dsc,
182 const char *name, int *z_cnt);
183 static void dump_z_table (struct dsc_proc *);
184 static void setup_z_trns (struct dsc_proc *, struct dataset *);
186 /* Procedure execution functions. */
187 static void calc_descriptives (struct dsc_proc *, struct casereader *,
189 static void display (struct dsc_proc *dsc);
191 /* Parser and outline. */
193 /* Handles DESCRIPTIVES. */
195 cmd_descriptives (struct lexer *lexer, struct dataset *ds)
197 struct dictionary *dict = dataset_dict (ds);
198 struct dsc_proc *dsc;
199 const struct variable **vars = NULL;
201 int save_z_scores = 0;
206 struct casegrouper *grouper;
207 struct casereader *group;
209 /* Create and initialize dsc. */
210 dsc = xmalloc (sizeof *dsc);
213 dsc->missing_type = DSC_VARIABLE;
214 dsc->exclude = MV_ANY;
215 dsc->show_var_labels = 1;
217 dsc->format = DSC_LINE;
218 dsc->missing_listwise = 0.;
221 dsc->sort_by_stat = DSC_NONE;
222 dsc->sort_ascending = 1;
223 dsc->show_stats = dsc->calc_stats = DEFAULT_STATS;
224 dsc->z_writer = NULL;
226 /* Parse DESCRIPTIVES. */
227 while (lex_token (lexer) != T_ENDCMD)
229 if (lex_match_id (lexer, "MISSING"))
231 lex_match (lexer, T_EQUALS);
232 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
234 if (lex_match_id (lexer, "VARIABLE"))
235 dsc->missing_type = DSC_VARIABLE;
236 else if (lex_match_id (lexer, "LISTWISE"))
237 dsc->missing_type = DSC_LISTWISE;
238 else if (lex_match_id (lexer, "INCLUDE"))
239 dsc->exclude = MV_SYSTEM;
242 lex_error (lexer, NULL);
245 lex_match (lexer, T_COMMA);
248 else if (lex_match_id (lexer, "SAVE"))
250 else if (lex_match_id (lexer, "FORMAT"))
252 lex_match (lexer, T_EQUALS);
253 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
255 if (lex_match_id (lexer, "LABELS"))
256 dsc->show_var_labels = 1;
257 else if (lex_match_id (lexer, "NOLABELS"))
258 dsc->show_var_labels = 0;
259 else if (lex_match_id (lexer, "INDEX"))
261 else if (lex_match_id (lexer, "NOINDEX"))
263 else if (lex_match_id (lexer, "LINE"))
264 dsc->format = DSC_LINE;
265 else if (lex_match_id (lexer, "SERIAL"))
266 dsc->format = DSC_SERIAL;
269 lex_error (lexer, NULL);
272 lex_match (lexer, T_COMMA);
275 else if (lex_match_id (lexer, "STATISTICS"))
277 lex_match (lexer, T_EQUALS);
279 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
281 if (lex_match (lexer, T_ALL))
282 dsc->show_stats |= (1ul << DSC_N_STATS) - 1;
283 else if (lex_match_id (lexer, "DEFAULT"))
284 dsc->show_stats |= DEFAULT_STATS;
286 dsc->show_stats |= 1ul << (match_statistic (lexer));
287 lex_match (lexer, T_COMMA);
289 if (dsc->show_stats == 0)
290 dsc->show_stats = DEFAULT_STATS;
292 else if (lex_match_id (lexer, "SORT"))
294 lex_match (lexer, T_EQUALS);
295 if (lex_match_id (lexer, "NAME"))
296 dsc->sort_by_stat = DSC_NAME;
299 dsc->sort_by_stat = match_statistic (lexer);
300 if (dsc->sort_by_stat == DSC_NONE )
301 dsc->sort_by_stat = DSC_MEAN;
303 if (lex_match (lexer, T_LPAREN))
305 if (lex_match_id (lexer, "A"))
306 dsc->sort_ascending = 1;
307 else if (lex_match_id (lexer, "D"))
308 dsc->sort_ascending = 0;
310 lex_error (lexer, NULL);
311 lex_force_match (lexer, T_RPAREN);
314 else if (var_cnt == 0)
316 if (lex_next_token (lexer, 1) == T_EQUALS)
318 lex_match_id (lexer, "VARIABLES");
319 lex_match (lexer, T_EQUALS);
322 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
326 if (!parse_variables_const (lexer, dict, &vars, &var_cnt,
327 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
330 dsc->vars = xnrealloc ((void *)dsc->vars, var_cnt, sizeof *dsc->vars);
331 for (i = dsc->var_cnt; i < var_cnt; i++)
333 struct dsc_var *dv = &dsc->vars[i];
338 dsc->var_cnt = var_cnt;
340 if (lex_match (lexer, T_LPAREN))
342 if (lex_token (lexer) != T_ID)
344 lex_error (lexer, NULL);
347 if (try_name (dict, dsc, lex_tokcstr (lexer)))
349 struct dsc_var *dsc_var = &dsc->vars[dsc->var_cnt - 1];
350 dsc_var->z_name = xstrdup (lex_tokcstr (lexer));
354 msg (SE, _("Z-score variable name %s would be"
355 " a duplicate variable name."), lex_tokcstr (lexer));
357 if (!lex_force_match (lexer, T_RPAREN))
364 lex_error (lexer, NULL);
368 lex_match (lexer, T_SLASH);
372 msg (SE, _("No variables specified."));
376 /* Construct z-score varnames, show translation table. */
377 if (z_cnt || save_z_scores)
379 struct caseproto *proto;
385 for (i = 0; i < dsc->var_cnt; i++)
387 struct dsc_var *dsc_var = &dsc->vars[i];
388 if (dsc_var->z_name == NULL)
390 const char *name = var_get_name (dsc_var->v);
391 dsc_var->z_name = generate_z_varname (dict, dsc, name,
393 if (dsc_var->z_name == NULL)
401 /* It would be better to handle Z scores correctly (however we define
402 that) when TEMPORARY is in effect, but in the meantime this at least
403 prevents a use-after-free error. See bug #38786. */
404 if (proc_make_temporary_transformations_permanent (ds))
405 msg (SW, _("DESCRIPTIVES with Z scores ignores TEMPORARY. "
406 "Temporary transformations will be made permanent."));
408 proto = caseproto_create ();
409 for (i = 0; i < 1 + 2 * z_cnt; i++)
410 proto = caseproto_add_width (proto, 0);
411 dsc->z_writer = autopaging_writer_create (proto);
412 caseproto_unref (proto);
417 /* Figure out statistics to display. */
418 if (dsc->show_stats & (1ul << DSC_SKEWNESS))
419 dsc->show_stats |= 1ul << DSC_SESKEW;
420 if (dsc->show_stats & (1ul << DSC_KURTOSIS))
421 dsc->show_stats |= 1ul << DSC_SEKURT;
423 /* Figure out which statistics to calculate. */
424 dsc->calc_stats = dsc->show_stats;
426 dsc->calc_stats |= (1ul << DSC_MEAN) | (1ul << DSC_STDDEV);
427 if (dsc->sort_by_stat >= 0)
428 dsc->calc_stats |= 1ul << dsc->sort_by_stat;
429 if (dsc->show_stats & (1ul << DSC_SESKEW))
430 dsc->calc_stats |= 1ul << DSC_SKEWNESS;
431 if (dsc->show_stats & (1ul << DSC_SEKURT))
432 dsc->calc_stats |= 1ul << DSC_KURTOSIS;
434 /* Figure out maximum moment needed and allocate moments for
436 dsc->max_moment = MOMENT_NONE;
437 for (i = 0; i < DSC_N_STATS; i++)
438 if (dsc->calc_stats & (1ul << i) && dsc_info[i].moment > dsc->max_moment)
439 dsc->max_moment = dsc_info[i].moment;
440 if (dsc->max_moment != MOMENT_NONE)
441 for (i = 0; i < dsc->var_cnt; i++)
442 dsc->vars[i].moments = moments_create (dsc->max_moment);
445 grouper = casegrouper_create_splits (proc_open_filtering (ds, z_cnt == 0),
447 while (casegrouper_get_next_group (grouper, &group))
448 calc_descriptives (dsc, group, ds);
449 ok = casegrouper_destroy (grouper);
450 ok = proc_commit (ds) && ok;
454 setup_z_trns (dsc, ds);
459 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
467 /* Returns the statistic named by the current token and skips past the token.
468 Returns DSC_NONE if no statistic is given (e.g., subcommand with no
469 specifiers). Emits an error if the current token ID does not name a
471 static enum dsc_statistic
472 match_statistic (struct lexer *lexer)
474 if (lex_token (lexer) == T_ID)
476 enum dsc_statistic stat;
478 for (stat = 0; stat < DSC_N_STATS; stat++)
479 if (lex_match_id (lexer, dsc_info[stat].identifier))
483 lex_error (lexer, _("expecting statistic name: reverting to default"));
491 free_dsc_proc (struct dsc_proc *dsc)
498 for (i = 0; i < dsc->var_cnt; i++)
500 struct dsc_var *dsc_var = &dsc->vars[i];
501 free (dsc_var->z_name);
502 moments_destroy (dsc_var->moments);
504 casewriter_destroy (dsc->z_writer);
511 /* Returns false if NAME is a duplicate of any existing variable name or
512 of any previously-declared z-var name; otherwise returns true. */
514 try_name (const struct dictionary *dict, struct dsc_proc *dsc,
519 if (dict_lookup_var (dict, name) != NULL)
521 for (i = 0; i < dsc->var_cnt; i++)
523 struct dsc_var *dsc_var = &dsc->vars[i];
524 if (dsc_var->z_name != NULL && !utf8_strcasecmp (dsc_var->z_name, name))
530 /* Generates a name for a Z-score variable based on a variable
531 named VAR_NAME, given that *Z_CNT generated variable names are
532 known to already exist. If successful, returns the new name
533 as a dynamically allocated string. On failure, returns NULL. */
535 generate_z_varname (const struct dictionary *dict, struct dsc_proc *dsc,
536 const char *var_name, int *z_cnt)
538 char *z_name, *trunc_name;
540 /* Try a name based on the original variable name. */
541 z_name = xasprintf ("Z%s", var_name);
542 trunc_name = utf8_encoding_trunc (z_name, dict_get_encoding (dict),
545 if (try_name (dict, dsc, trunc_name))
549 /* Generate a synthetic name. */
557 sprintf (name, "ZSC%03d", *z_cnt);
558 else if (*z_cnt <= 108)
559 sprintf (name, "STDZ%02d", *z_cnt - 99);
560 else if (*z_cnt <= 117)
561 sprintf (name, "ZZZZ%02d", *z_cnt - 108);
562 else if (*z_cnt <= 126)
563 sprintf (name, "ZQZQ%02d", *z_cnt - 117);
566 msg (SE, _("Ran out of generic names for Z-score variables. "
567 "There are only 126 generic names: ZSC001-ZSC0999, "
568 "STDZ01-STDZ09, ZZZZ01-ZZZZ09, ZQZQ01-ZQZQ09."));
572 if (try_name (dict, dsc, name))
573 return xstrdup (name);
578 /* Outputs a table describing the mapping between source
579 variables and Z-score variables. */
581 dump_z_table (struct dsc_proc *dsc)
589 for (i = 0; i < dsc->var_cnt; i++)
590 if (dsc->vars[i].z_name != NULL)
594 t = tab_create (2, cnt + 1);
595 tab_title (t, _("Mapping of variables to corresponding Z-scores."));
596 tab_headers (t, 0, 0, 1, 0);
597 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, cnt);
598 tab_hline (t, TAL_2, 0, 1, 1);
599 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Source"));
600 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Target"));
605 for (i = 0, y = 1; i < dsc->var_cnt; i++)
606 if (dsc->vars[i].z_name != NULL)
608 tab_text (t, 0, y, TAB_LEFT, var_to_string (dsc->vars[i].v));
609 tab_text (t, 1, y++, TAB_LEFT, dsc->vars[i].z_name);
616 /* Transformation function to calculate Z-scores. Will return SYSMIS if any of
617 the following are true: 1) mean or standard deviation is SYSMIS 2) score is
618 SYSMIS 3) score is user missing and they were not included in the original
619 analyis. 4) any of the variables in the original analysis were missing
620 (either system or user-missing values that weren't included).
623 descriptives_trns_proc (void *trns_, struct ccase **c,
624 casenumber case_idx UNUSED)
626 struct dsc_trns *t = trns_;
627 struct dsc_z_score *z;
628 const struct variable **vars;
633 struct ccase *z_case;
635 z_case = casereader_read (t->z_reader);
640 t->count = case_num_idx (z_case, z_idx++);
641 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
643 z->mean = case_num_idx (z_case, z_idx++);
644 z->std_dev = case_num_idx (z_case, z_idx++);
652 msg (SE, _("Internal error processing Z scores"));
655 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
656 z->mean = z->std_dev = SYSMIS;
661 if (t->missing_type == DSC_LISTWISE)
664 for (vars = t->vars; vars < t->vars + t->var_cnt; vars++)
666 double score = case_num (*c, *vars);
667 if (var_is_num_missing (*vars, score, t->exclude))
675 *c = case_unshare (*c);
676 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
678 double input = case_num (*c, z->src_var);
679 double *output = &case_data_rw (*c, z->z_var)->f;
681 if (z->mean == SYSMIS || z->std_dev == SYSMIS || all_sysmis
682 || var_is_num_missing (z->src_var, input, t->exclude))
685 *output = (input - z->mean) / z->std_dev;
687 return TRNS_CONTINUE;
690 /* Frees a descriptives_trns struct. */
692 descriptives_trns_free (void *trns_)
694 struct dsc_trns *t = trns_;
695 bool ok = t->ok && !casereader_error (t->z_reader);
698 casereader_destroy (t->z_reader);
699 assert((t->missing_type != DSC_LISTWISE) ^ (t->vars != NULL));
706 /* Sets up a transformation to calculate Z scores. */
708 setup_z_trns (struct dsc_proc *dsc, struct dataset *ds)
713 for (cnt = i = 0; i < dsc->var_cnt; i++)
714 if (dsc->vars[i].z_name != NULL)
717 t = xmalloc (sizeof *t);
718 t->z_scores = xnmalloc (cnt, sizeof *t->z_scores);
719 t->z_score_cnt = cnt;
720 t->missing_type = dsc->missing_type;
721 t->exclude = dsc->exclude;
722 if ( t->missing_type == DSC_LISTWISE )
724 t->var_cnt = dsc->var_cnt;
725 t->vars = xnmalloc (t->var_cnt, sizeof *t->vars);
726 for (i = 0; i < t->var_cnt; i++)
727 t->vars[i] = dsc->vars[i].v;
734 t->z_reader = casewriter_make_reader (dsc->z_writer);
737 dsc->z_writer = NULL;
739 for (cnt = i = 0; i < dsc->var_cnt; i++)
741 struct dsc_var *dv = &dsc->vars[i];
742 if (dv->z_name != NULL)
744 struct dsc_z_score *z;
745 struct variable *dst_var;
748 dst_var = dict_create_var_assert (dataset_dict (ds), dv->z_name, 0);
750 label = xasprintf (_("Z-score of %s"),var_to_string (dv->v));
751 var_set_label (dst_var, label, false);
754 z = &t->z_scores[cnt++];
760 add_transformation (ds,
761 descriptives_trns_proc, descriptives_trns_free, t);
764 /* Statistical calculation. */
766 static bool listwise_missing (struct dsc_proc *dsc, const struct ccase *c);
768 /* Calculates and displays descriptive statistics for the cases
771 calc_descriptives (struct dsc_proc *dsc, struct casereader *group,
774 struct casereader *pass1, *pass2;
780 c = casereader_peek (group, 0);
783 casereader_destroy (group);
786 output_split_file_values (ds, c);
789 group = casereader_create_filter_weight (group, dataset_dict (ds),
793 pass2 = dsc->max_moment <= MOMENT_MEAN ? NULL : casereader_clone (pass1);
795 for (i = 0; i < dsc->var_cnt; i++)
797 struct dsc_var *dv = &dsc->vars[i];
799 dv->valid = dv->missing = 0.0;
800 if (dv->moments != NULL)
801 moments_clear (dv->moments);
805 dsc->missing_listwise = 0.;
808 /* First pass to handle most of the work. */
810 for (; (c = casereader_read (pass1)) != NULL; case_unref (c))
812 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
814 /* Check for missing values. */
815 if (listwise_missing (dsc, c))
817 dsc->missing_listwise += weight;
818 if (dsc->missing_type == DSC_LISTWISE)
821 dsc->valid += weight;
823 for (i = 0; i < dsc->var_cnt; i++)
825 struct dsc_var *dv = &dsc->vars[i];
826 double x = case_num (c, dv->v);
828 if (var_is_num_missing (dv->v, x, dsc->exclude))
830 dv->missing += weight;
834 if (dv->moments != NULL)
835 moments_pass_one (dv->moments, x, weight);
845 if (!casereader_destroy (pass1))
847 casereader_destroy (pass2);
851 /* Second pass for higher-order moments. */
852 if (dsc->max_moment > MOMENT_MEAN)
854 for (; (c = casereader_read (pass2)) != NULL; case_unref (c))
856 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
858 /* Check for missing values. */
859 if (dsc->missing_type == DSC_LISTWISE && listwise_missing (dsc, c))
862 for (i = 0; i < dsc->var_cnt; i++)
864 struct dsc_var *dv = &dsc->vars[i];
865 double x = case_num (c, dv->v);
867 if (var_is_num_missing (dv->v, x, dsc->exclude))
870 if (dv->moments != NULL)
871 moments_pass_two (dv->moments, x, weight);
874 if (!casereader_destroy (pass2))
878 /* Calculate results. */
881 c = case_create (casewriter_get_proto (dsc->z_writer));
883 case_data_rw_idx (c, z_idx++)->f = count;
888 for (i = 0; i < dsc->var_cnt; i++)
890 struct dsc_var *dv = &dsc->vars[i];
894 for (j = 0; j < DSC_N_STATS; j++)
895 dv->stats[j] = SYSMIS;
897 dv->valid = W = dsc->valid - dv->missing;
899 if (dv->moments != NULL)
900 moments_calculate (dv->moments, NULL,
901 &dv->stats[DSC_MEAN], &dv->stats[DSC_VARIANCE],
902 &dv->stats[DSC_SKEWNESS], &dv->stats[DSC_KURTOSIS]);
903 if (dsc->calc_stats & (1ul << DSC_SEMEAN)
904 && dv->stats[DSC_VARIANCE] != SYSMIS && W > 0.)
905 dv->stats[DSC_SEMEAN] = sqrt (dv->stats[DSC_VARIANCE]) / sqrt (W);
906 if (dsc->calc_stats & (1ul << DSC_STDDEV)
907 && dv->stats[DSC_VARIANCE] != SYSMIS)
908 dv->stats[DSC_STDDEV] = sqrt (dv->stats[DSC_VARIANCE]);
909 if (dsc->calc_stats & (1ul << DSC_SEKURT))
910 if (dv->stats[DSC_KURTOSIS] != SYSMIS)
911 dv->stats[DSC_SEKURT] = calc_sekurt (W);
912 if (dsc->calc_stats & (1ul << DSC_SESKEW)
913 && dv->stats[DSC_SKEWNESS] != SYSMIS)
914 dv->stats[DSC_SESKEW] = calc_seskew (W);
915 dv->stats[DSC_RANGE] = ((dv->min == DBL_MAX || dv->max == -DBL_MAX)
916 ? SYSMIS : dv->max - dv->min);
917 dv->stats[DSC_MIN] = dv->min == DBL_MAX ? SYSMIS : dv->min;
918 dv->stats[DSC_MAX] = dv->max == -DBL_MAX ? SYSMIS : dv->max;
919 if (dsc->calc_stats & (1ul << DSC_SUM))
920 dv->stats[DSC_SUM] = W * dv->stats[DSC_MEAN];
924 case_data_rw_idx (c, z_idx++)->f = dv->stats[DSC_MEAN];
925 case_data_rw_idx (c, z_idx++)->f = dv->stats[DSC_STDDEV];
930 casewriter_write (dsc->z_writer, c);
932 /* Output results. */
936 /* Returns true if any of the descriptives variables in DSC's
937 variable list have missing values in case C, false otherwise. */
939 listwise_missing (struct dsc_proc *dsc, const struct ccase *c)
943 for (i = 0; i < dsc->var_cnt; i++)
945 struct dsc_var *dv = &dsc->vars[i];
946 double x = case_num (c, dv->v);
948 if (var_is_num_missing (dv->v, x, dsc->exclude))
954 /* Statistical display. */
956 static algo_compare_func descriptives_compare_dsc_vars;
958 /* Displays a table of descriptive statistics for DSC. */
960 display (struct dsc_proc *dsc)
966 nc = 1 + (dsc->format == DSC_SERIAL ? 2 : 1);
967 for (i = 0; i < DSC_N_STATS; i++)
968 if (dsc->show_stats & (1ul << i))
971 if (dsc->sort_by_stat != DSC_NONE)
972 sort (dsc->vars, dsc->var_cnt, sizeof *dsc->vars,
973 descriptives_compare_dsc_vars, dsc);
975 t = tab_create (nc, dsc->var_cnt + 1);
976 tab_headers (t, 1, 0, 1, 0);
977 tab_box (t, TAL_1, TAL_1, -1, -1, 0, 0, nc - 1, dsc->var_cnt);
978 tab_box (t, -1, -1, -1, TAL_1, 1, 0, nc - 1, dsc->var_cnt);
979 tab_hline (t, TAL_2, 0, nc - 1, 1);
980 tab_vline (t, TAL_2, 1, 0, dsc->var_cnt);
983 tab_text (t, nc++, 0, TAB_LEFT | TAT_TITLE, _("Variable"));
984 if (dsc->format == DSC_SERIAL)
986 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Valid N"));
987 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Missing N"));
990 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, "N");
992 for (i = 0; i < DSC_N_STATS; i++)
993 if (dsc->show_stats & (1ul << i))
995 const char *title = gettext (dsc_info[i].name);
996 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, title);
999 for (i = 0; i < dsc->var_cnt; i++)
1001 struct dsc_var *dv = &dsc->vars[i];
1005 tab_text (t, nc++, i + 1, TAB_LEFT, var_to_string (dv->v));
1006 tab_text_format (t, nc++, i + 1, 0, "%.*g", DBL_DIG + 1, dv->valid);
1007 if (dsc->format == DSC_SERIAL)
1008 tab_text_format (t, nc++, i + 1, 0, "%.*g", DBL_DIG + 1, dv->missing);
1010 for (j = 0; j < DSC_N_STATS; j++)
1011 if (dsc->show_stats & (1ul << j))
1012 tab_double (t, nc++, i + 1, TAB_NONE, dv->stats[j], NULL);
1015 tab_title (t, _("Valid cases = %.*g; cases with missing value(s) = %.*g."),
1016 DBL_DIG + 1, dsc->valid,
1017 DBL_DIG + 1, dsc->missing_listwise);
1022 /* Compares `struct dsc_var's A and B according to the ordering
1023 specified by CMD. */
1025 descriptives_compare_dsc_vars (const void *a_, const void *b_, const void *dsc_)
1027 const struct dsc_var *a = a_;
1028 const struct dsc_var *b = b_;
1029 const struct dsc_proc *dsc = dsc_;
1033 if (dsc->sort_by_stat == DSC_NAME)
1034 result = utf8_strcasecmp (var_get_name (a->v), var_get_name (b->v));
1037 double as = a->stats[dsc->sort_by_stat];
1038 double bs = b->stats[dsc->sort_by_stat];
1040 result = as < bs ? -1 : as > bs;
1043 if (!dsc->sort_ascending)