1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-2000, 2009-2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
23 #include "data/casegrouper.h"
24 #include "data/casereader.h"
25 #include "data/casewriter.h"
26 #include "data/dataset.h"
27 #include "data/dictionary.h"
28 #include "data/transformations.h"
29 #include "data/variable.h"
30 #include "language/command.h"
31 #include "language/dictionary/split-file.h"
32 #include "language/lexer/lexer.h"
33 #include "language/lexer/variable-parser.h"
34 #include "libpspp/array.h"
35 #include "libpspp/assertion.h"
36 #include "libpspp/compiler.h"
37 #include "libpspp/i18n.h"
38 #include "libpspp/message.h"
39 #include "math/moments.h"
40 #include "output/tab.h"
42 #include "gl/xalloc.h"
45 #define _(msgid) gettext (msgid)
46 #define N_(msgid) msgid
48 /* DESCRIPTIVES private data. */
52 /* Handling of missing values. */
55 DSC_VARIABLE, /* Handle missing values on a per-variable basis. */
56 DSC_LISTWISE /* Discard entire case if any variable is missing. */
59 /* Describes properties of a distribution for the purpose of
60 calculating a Z-score. */
63 const struct variable *src_var; /* Variable on which z-score is based. */
64 struct variable *z_var; /* New z-score variable. */
65 double mean; /* Distribution mean. */
66 double std_dev; /* Distribution standard deviation. */
69 /* DESCRIPTIVES transformation (for calculating Z-scores). */
72 struct dsc_z_score *z_scores; /* Array of Z-scores. */
73 int z_score_cnt; /* Number of Z-scores. */
74 const struct variable **vars; /* Variables for listwise missing checks. */
75 size_t var_cnt; /* Number of variables. */
76 enum dsc_missing_type missing_type; /* Treatment of missing values. */
77 enum mv_class exclude; /* Classes of missing values to exclude. */
78 struct casereader *z_reader; /* Reader for count, mean, stddev. */
79 casenumber count; /* Number left in this SPLIT FILE group.*/
83 /* Statistics. Used as bit indexes, so must be 32 or fewer. */
86 DSC_MEAN = 0, DSC_SEMEAN, DSC_STDDEV, DSC_VARIANCE, DSC_KURTOSIS,
87 DSC_SEKURT, DSC_SKEWNESS, DSC_SESKEW, DSC_RANGE, DSC_MIN,
88 DSC_MAX, DSC_SUM, DSC_N_STATS,
90 /* Only valid as sort criteria. */
91 DSC_NAME = -2, /* Sort by name. */
92 DSC_NONE = -1 /* Unsorted. */
95 /* Describes one statistic. */
96 struct dsc_statistic_info
98 const char *identifier; /* Identifier. */
99 const char *name; /* Full name. */
100 enum moment moment; /* Highest moment needed to calculate. */
103 /* Table of statistics, indexed by DSC_*. */
104 static const struct dsc_statistic_info dsc_info[DSC_N_STATS] =
106 {"MEAN", N_("Mean"), MOMENT_MEAN},
107 {"SEMEAN", N_("S.E. Mean"), MOMENT_VARIANCE},
108 {"STDDEV", N_("Std Dev"), MOMENT_VARIANCE},
109 {"VARIANCE", N_("Variance"), MOMENT_VARIANCE},
110 {"KURTOSIS", N_("Kurtosis"), MOMENT_KURTOSIS},
111 {"SEKURTOSIS", N_("S.E. Kurt"), MOMENT_NONE},
112 {"SKEWNESS", N_("Skewness"), MOMENT_SKEWNESS},
113 {"SESKEWNESS", N_("S.E. Skew"), MOMENT_NONE},
114 {"RANGE", N_("Range"), MOMENT_NONE},
115 {"MINIMUM", N_("Minimum"), MOMENT_NONE},
116 {"MAXIMUM", N_("Maximum"), MOMENT_NONE},
117 {"SUM", N_("Sum"), MOMENT_MEAN},
120 /* Statistics calculated by default if none are explicitly
122 #define DEFAULT_STATS \
123 ((1ul << DSC_MEAN) | (1ul << DSC_STDDEV) | (1ul << DSC_MIN) \
126 /* A variable specified on DESCRIPTIVES. */
129 const struct variable *v; /* Variable to calculate on. */
130 char *z_name; /* Name for z-score variable. */
131 double valid, missing; /* Valid, missing counts. */
132 struct moments *moments; /* Moments. */
133 double min, max; /* Maximum and mimimum values. */
134 double stats[DSC_N_STATS]; /* All the stats' values. */
140 DSC_LINE, /* Abbreviated format. */
141 DSC_SERIAL /* Long format. */
144 /* A DESCRIPTIVES procedure. */
147 /* Per-variable info. */
148 struct dsc_var *vars; /* Variables. */
149 size_t var_cnt; /* Number of variables. */
152 enum dsc_missing_type missing_type; /* Treatment of missing values. */
153 enum mv_class exclude; /* Classes of missing values to exclude. */
154 int show_var_labels; /* Nonzero to show variable labels. */
155 int show_index; /* Nonzero to show variable index. */
156 enum dsc_format format; /* Output format. */
158 /* Accumulated results. */
159 double missing_listwise; /* Sum of weights of cases missing listwise. */
160 double valid; /* Sum of weights of valid cases. */
161 bool bad_warn; /* Warn if bad weight found. */
162 enum dsc_statistic sort_by_stat; /* Statistic to sort by; -1: name. */
163 int sort_ascending; /* !0: ascending order; 0: descending. */
164 unsigned long show_stats; /* Statistics to display. */
165 unsigned long calc_stats; /* Statistics to calculate. */
166 enum moment max_moment; /* Highest moment needed for stats. */
169 struct casewriter *z_writer; /* Mean and stddev per SPLIT FILE group. */
173 static enum dsc_statistic match_statistic (struct lexer *);
174 static void free_dsc_proc (struct dsc_proc *);
176 /* Z-score functions. */
177 static bool try_name (const struct dictionary *dict,
178 struct dsc_proc *dsc, const char *name);
179 static char *generate_z_varname (const struct dictionary *dict,
180 struct dsc_proc *dsc,
181 const char *name, int *z_cnt);
182 static void dump_z_table (struct dsc_proc *);
183 static void setup_z_trns (struct dsc_proc *, struct dataset *);
185 /* Procedure execution functions. */
186 static void calc_descriptives (struct dsc_proc *, struct casereader *,
188 static void display (struct dsc_proc *dsc);
190 /* Parser and outline. */
192 /* Handles DESCRIPTIVES. */
194 cmd_descriptives (struct lexer *lexer, struct dataset *ds)
196 struct dictionary *dict = dataset_dict (ds);
197 struct dsc_proc *dsc;
198 const struct variable **vars = NULL;
200 int save_z_scores = 0;
205 struct casegrouper *grouper;
206 struct casereader *group;
208 /* Create and initialize dsc. */
209 dsc = xmalloc (sizeof *dsc);
212 dsc->missing_type = DSC_VARIABLE;
213 dsc->exclude = MV_ANY;
214 dsc->show_var_labels = 1;
216 dsc->format = DSC_LINE;
217 dsc->missing_listwise = 0.;
220 dsc->sort_by_stat = DSC_NONE;
221 dsc->sort_ascending = 1;
222 dsc->show_stats = dsc->calc_stats = DEFAULT_STATS;
223 dsc->z_writer = NULL;
225 /* Parse DESCRIPTIVES. */
226 while (lex_token (lexer) != T_ENDCMD)
228 if (lex_match_id (lexer, "MISSING"))
230 lex_match (lexer, T_EQUALS);
231 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
233 if (lex_match_id (lexer, "VARIABLE"))
234 dsc->missing_type = DSC_VARIABLE;
235 else if (lex_match_id (lexer, "LISTWISE"))
236 dsc->missing_type = DSC_LISTWISE;
237 else if (lex_match_id (lexer, "INCLUDE"))
238 dsc->exclude = MV_SYSTEM;
241 lex_error (lexer, NULL);
244 lex_match (lexer, T_COMMA);
247 else if (lex_match_id (lexer, "SAVE"))
249 else if (lex_match_id (lexer, "FORMAT"))
251 lex_match (lexer, T_EQUALS);
252 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
254 if (lex_match_id (lexer, "LABELS"))
255 dsc->show_var_labels = 1;
256 else if (lex_match_id (lexer, "NOLABELS"))
257 dsc->show_var_labels = 0;
258 else if (lex_match_id (lexer, "INDEX"))
260 else if (lex_match_id (lexer, "NOINDEX"))
262 else if (lex_match_id (lexer, "LINE"))
263 dsc->format = DSC_LINE;
264 else if (lex_match_id (lexer, "SERIAL"))
265 dsc->format = DSC_SERIAL;
268 lex_error (lexer, NULL);
271 lex_match (lexer, T_COMMA);
274 else if (lex_match_id (lexer, "STATISTICS"))
276 lex_match (lexer, T_EQUALS);
278 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
280 if (lex_match (lexer, T_ALL))
281 dsc->show_stats |= (1ul << DSC_N_STATS) - 1;
282 else if (lex_match_id (lexer, "DEFAULT"))
283 dsc->show_stats |= DEFAULT_STATS;
285 dsc->show_stats |= 1ul << (match_statistic (lexer));
286 lex_match (lexer, T_COMMA);
288 if (dsc->show_stats == 0)
289 dsc->show_stats = DEFAULT_STATS;
291 else if (lex_match_id (lexer, "SORT"))
293 lex_match (lexer, T_EQUALS);
294 if (lex_match_id (lexer, "NAME"))
295 dsc->sort_by_stat = DSC_NAME;
298 dsc->sort_by_stat = match_statistic (lexer);
299 if (dsc->sort_by_stat == DSC_NONE )
300 dsc->sort_by_stat = DSC_MEAN;
302 if (lex_match (lexer, T_LPAREN))
304 if (lex_match_id (lexer, "A"))
305 dsc->sort_ascending = 1;
306 else if (lex_match_id (lexer, "D"))
307 dsc->sort_ascending = 0;
309 lex_error (lexer, NULL);
310 lex_force_match (lexer, T_RPAREN);
313 else if (var_cnt == 0)
315 if (lex_next_token (lexer, 1) == T_EQUALS)
317 lex_match_id (lexer, "VARIABLES");
318 lex_match (lexer, T_EQUALS);
321 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
325 if (!parse_variables_const (lexer, dict, &vars, &var_cnt,
326 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
329 dsc->vars = xnrealloc ((void *)dsc->vars, var_cnt, sizeof *dsc->vars);
330 for (i = dsc->var_cnt; i < var_cnt; i++)
332 struct dsc_var *dv = &dsc->vars[i];
337 dsc->var_cnt = var_cnt;
339 if (lex_match (lexer, T_LPAREN))
341 if (lex_token (lexer) != T_ID)
343 lex_error (lexer, NULL);
346 if (try_name (dict, dsc, lex_tokcstr (lexer)))
348 struct dsc_var *dsc_var = &dsc->vars[dsc->var_cnt - 1];
349 dsc_var->z_name = xstrdup (lex_tokcstr (lexer));
353 msg (SE, _("Z-score variable name %s would be"
354 " a duplicate variable name."), lex_tokcstr (lexer));
356 if (!lex_force_match (lexer, T_RPAREN))
363 lex_error (lexer, NULL);
367 lex_match (lexer, T_SLASH);
371 msg (SE, _("No variables specified."));
375 /* Construct z-score varnames, show translation table. */
376 if (z_cnt || save_z_scores)
378 struct caseproto *proto;
384 for (i = 0; i < dsc->var_cnt; i++)
386 struct dsc_var *dsc_var = &dsc->vars[i];
387 if (dsc_var->z_name == NULL)
389 const char *name = var_get_name (dsc_var->v);
390 dsc_var->z_name = generate_z_varname (dict, dsc, name,
392 if (dsc_var->z_name == NULL)
400 /* It would be better to handle Z scores correctly (however we define
401 that) when TEMPORARY is in effect, but in the meantime this at least
402 prevents a use-after-free error. See bug #38786. */
403 if (proc_make_temporary_transformations_permanent (ds))
404 msg (SW, _("DESCRIPTIVES with Z scores ignores TEMPORARY. "
405 "Temporary transformations will be made permanent."));
407 proto = caseproto_create ();
408 for (i = 0; i < 1 + 2 * z_cnt; i++)
409 proto = caseproto_add_width (proto, 0);
410 dsc->z_writer = autopaging_writer_create (proto);
411 caseproto_unref (proto);
416 /* Figure out statistics to display. */
417 if (dsc->show_stats & (1ul << DSC_SKEWNESS))
418 dsc->show_stats |= 1ul << DSC_SESKEW;
419 if (dsc->show_stats & (1ul << DSC_KURTOSIS))
420 dsc->show_stats |= 1ul << DSC_SEKURT;
422 /* Figure out which statistics to calculate. */
423 dsc->calc_stats = dsc->show_stats;
425 dsc->calc_stats |= (1ul << DSC_MEAN) | (1ul << DSC_STDDEV);
426 if (dsc->sort_by_stat >= 0)
427 dsc->calc_stats |= 1ul << dsc->sort_by_stat;
428 if (dsc->show_stats & (1ul << DSC_SESKEW))
429 dsc->calc_stats |= 1ul << DSC_SKEWNESS;
430 if (dsc->show_stats & (1ul << DSC_SEKURT))
431 dsc->calc_stats |= 1ul << DSC_KURTOSIS;
433 /* Figure out maximum moment needed and allocate moments for
435 dsc->max_moment = MOMENT_NONE;
436 for (i = 0; i < DSC_N_STATS; i++)
437 if (dsc->calc_stats & (1ul << i) && dsc_info[i].moment > dsc->max_moment)
438 dsc->max_moment = dsc_info[i].moment;
439 if (dsc->max_moment != MOMENT_NONE)
440 for (i = 0; i < dsc->var_cnt; i++)
441 dsc->vars[i].moments = moments_create (dsc->max_moment);
444 grouper = casegrouper_create_splits (proc_open (ds), dict);
445 while (casegrouper_get_next_group (grouper, &group))
446 calc_descriptives (dsc, group, ds);
447 ok = casegrouper_destroy (grouper);
448 ok = proc_commit (ds) && ok;
452 setup_z_trns (dsc, ds);
457 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
465 /* Returns the statistic named by the current token and skips past the token.
466 Returns DSC_NONE if no statistic is given (e.g., subcommand with no
467 specifiers). Emits an error if the current token ID does not name a
469 static enum dsc_statistic
470 match_statistic (struct lexer *lexer)
472 if (lex_token (lexer) == T_ID)
474 enum dsc_statistic stat;
476 for (stat = 0; stat < DSC_N_STATS; stat++)
477 if (lex_match_id (lexer, dsc_info[stat].identifier))
481 lex_error (lexer, _("expecting statistic name: reverting to default"));
489 free_dsc_proc (struct dsc_proc *dsc)
496 for (i = 0; i < dsc->var_cnt; i++)
498 struct dsc_var *dsc_var = &dsc->vars[i];
499 free (dsc_var->z_name);
500 moments_destroy (dsc_var->moments);
502 casewriter_destroy (dsc->z_writer);
509 /* Returns false if NAME is a duplicate of any existing variable name or
510 of any previously-declared z-var name; otherwise returns true. */
512 try_name (const struct dictionary *dict, struct dsc_proc *dsc,
517 if (dict_lookup_var (dict, name) != NULL)
519 for (i = 0; i < dsc->var_cnt; i++)
521 struct dsc_var *dsc_var = &dsc->vars[i];
522 if (dsc_var->z_name != NULL && !utf8_strcasecmp (dsc_var->z_name, name))
528 /* Generates a name for a Z-score variable based on a variable
529 named VAR_NAME, given that *Z_CNT generated variable names are
530 known to already exist. If successful, returns the new name
531 as a dynamically allocated string. On failure, returns NULL. */
533 generate_z_varname (const struct dictionary *dict, struct dsc_proc *dsc,
534 const char *var_name, int *z_cnt)
536 char *z_name, *trunc_name;
538 /* Try a name based on the original variable name. */
539 z_name = xasprintf ("Z%s", var_name);
540 trunc_name = utf8_encoding_trunc (z_name, dict_get_encoding (dict),
543 if (try_name (dict, dsc, trunc_name))
547 /* Generate a synthetic name. */
555 sprintf (name, "ZSC%03d", *z_cnt);
556 else if (*z_cnt <= 108)
557 sprintf (name, "STDZ%02d", *z_cnt - 99);
558 else if (*z_cnt <= 117)
559 sprintf (name, "ZZZZ%02d", *z_cnt - 108);
560 else if (*z_cnt <= 126)
561 sprintf (name, "ZQZQ%02d", *z_cnt - 117);
564 msg (SE, _("Ran out of generic names for Z-score variables. "
565 "There are only 126 generic names: ZSC001-ZSC0999, "
566 "STDZ01-STDZ09, ZZZZ01-ZZZZ09, ZQZQ01-ZQZQ09."));
570 if (try_name (dict, dsc, name))
571 return xstrdup (name);
576 /* Outputs a table describing the mapping between source
577 variables and Z-score variables. */
579 dump_z_table (struct dsc_proc *dsc)
587 for (i = 0; i < dsc->var_cnt; i++)
588 if (dsc->vars[i].z_name != NULL)
592 t = tab_create (2, cnt + 1);
593 tab_title (t, _("Mapping of variables to corresponding Z-scores."));
594 tab_headers (t, 0, 0, 1, 0);
595 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, cnt);
596 tab_hline (t, TAL_2, 0, 1, 1);
597 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Source"));
598 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Target"));
603 for (i = 0, y = 1; i < dsc->var_cnt; i++)
604 if (dsc->vars[i].z_name != NULL)
606 tab_text (t, 0, y, TAB_LEFT, var_to_string (dsc->vars[i].v));
607 tab_text (t, 1, y++, TAB_LEFT, dsc->vars[i].z_name);
614 /* Transformation function to calculate Z-scores. Will return SYSMIS if any of
615 the following are true: 1) mean or standard deviation is SYSMIS 2) score is
616 SYSMIS 3) score is user missing and they were not included in the original
617 analyis. 4) any of the variables in the original analysis were missing
618 (either system or user-missing values that weren't included).
621 descriptives_trns_proc (void *trns_, struct ccase **c,
622 casenumber case_idx UNUSED)
624 struct dsc_trns *t = trns_;
625 struct dsc_z_score *z;
626 const struct variable **vars;
631 struct ccase *z_case;
633 z_case = casereader_read (t->z_reader);
638 t->count = case_num_idx (z_case, z_idx++);
639 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
641 z->mean = case_num_idx (z_case, z_idx++);
642 z->std_dev = case_num_idx (z_case, z_idx++);
650 msg (SE, _("Internal error processing Z scores"));
653 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
654 z->mean = z->std_dev = SYSMIS;
659 if (t->missing_type == DSC_LISTWISE)
662 for (vars = t->vars; vars < t->vars + t->var_cnt; vars++)
664 double score = case_num (*c, *vars);
665 if (var_is_num_missing (*vars, score, t->exclude))
673 *c = case_unshare (*c);
674 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
676 double input = case_num (*c, z->src_var);
677 double *output = &case_data_rw (*c, z->z_var)->f;
679 if (z->mean == SYSMIS || z->std_dev == SYSMIS || all_sysmis
680 || var_is_num_missing (z->src_var, input, t->exclude))
683 *output = (input - z->mean) / z->std_dev;
685 return TRNS_CONTINUE;
688 /* Frees a descriptives_trns struct. */
690 descriptives_trns_free (void *trns_)
692 struct dsc_trns *t = trns_;
693 bool ok = t->ok && !casereader_error (t->z_reader);
696 casereader_destroy (t->z_reader);
697 assert((t->missing_type != DSC_LISTWISE) ^ (t->vars != NULL));
704 /* Sets up a transformation to calculate Z scores. */
706 setup_z_trns (struct dsc_proc *dsc, struct dataset *ds)
711 for (cnt = i = 0; i < dsc->var_cnt; i++)
712 if (dsc->vars[i].z_name != NULL)
715 t = xmalloc (sizeof *t);
716 t->z_scores = xnmalloc (cnt, sizeof *t->z_scores);
717 t->z_score_cnt = cnt;
718 t->missing_type = dsc->missing_type;
719 t->exclude = dsc->exclude;
720 if ( t->missing_type == DSC_LISTWISE )
722 t->var_cnt = dsc->var_cnt;
723 t->vars = xnmalloc (t->var_cnt, sizeof *t->vars);
724 for (i = 0; i < t->var_cnt; i++)
725 t->vars[i] = dsc->vars[i].v;
732 t->z_reader = casewriter_make_reader (dsc->z_writer);
735 dsc->z_writer = NULL;
737 for (cnt = i = 0; i < dsc->var_cnt; i++)
739 struct dsc_var *dv = &dsc->vars[i];
740 if (dv->z_name != NULL)
742 struct dsc_z_score *z;
743 struct variable *dst_var;
746 dst_var = dict_create_var_assert (dataset_dict (ds), dv->z_name, 0);
748 label = xasprintf (_("Z-score of %s"),var_to_string (dv->v));
749 var_set_label (dst_var, label, false);
752 z = &t->z_scores[cnt++];
758 add_transformation (ds,
759 descriptives_trns_proc, descriptives_trns_free, t);
762 /* Statistical calculation. */
764 static bool listwise_missing (struct dsc_proc *dsc, const struct ccase *c);
766 /* Calculates and displays descriptive statistics for the cases
769 calc_descriptives (struct dsc_proc *dsc, struct casereader *group,
772 struct casereader *pass1, *pass2;
778 c = casereader_peek (group, 0);
781 casereader_destroy (group);
784 output_split_file_values (ds, c);
787 group = casereader_create_filter_weight (group, dataset_dict (ds),
791 pass2 = dsc->max_moment <= MOMENT_MEAN ? NULL : casereader_clone (pass1);
793 for (i = 0; i < dsc->var_cnt; i++)
795 struct dsc_var *dv = &dsc->vars[i];
797 dv->valid = dv->missing = 0.0;
798 if (dv->moments != NULL)
799 moments_clear (dv->moments);
803 dsc->missing_listwise = 0.;
806 /* First pass to handle most of the work. */
808 for (; (c = casereader_read (pass1)) != NULL; case_unref (c))
810 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
812 /* Check for missing values. */
813 if (listwise_missing (dsc, c))
815 dsc->missing_listwise += weight;
816 if (dsc->missing_type == DSC_LISTWISE)
819 dsc->valid += weight;
821 for (i = 0; i < dsc->var_cnt; i++)
823 struct dsc_var *dv = &dsc->vars[i];
824 double x = case_num (c, dv->v);
826 if (var_is_num_missing (dv->v, x, dsc->exclude))
828 dv->missing += weight;
832 if (dv->moments != NULL)
833 moments_pass_one (dv->moments, x, weight);
843 if (!casereader_destroy (pass1))
845 casereader_destroy (pass2);
849 /* Second pass for higher-order moments. */
850 if (dsc->max_moment > MOMENT_MEAN)
852 for (; (c = casereader_read (pass2)) != NULL; case_unref (c))
854 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
856 /* Check for missing values. */
857 if (dsc->missing_type == DSC_LISTWISE && listwise_missing (dsc, c))
860 for (i = 0; i < dsc->var_cnt; i++)
862 struct dsc_var *dv = &dsc->vars[i];
863 double x = case_num (c, dv->v);
865 if (var_is_num_missing (dv->v, x, dsc->exclude))
868 if (dv->moments != NULL)
869 moments_pass_two (dv->moments, x, weight);
872 if (!casereader_destroy (pass2))
876 /* Calculate results. */
879 c = case_create (casewriter_get_proto (dsc->z_writer));
881 case_data_rw_idx (c, z_idx++)->f = count;
886 for (i = 0; i < dsc->var_cnt; i++)
888 struct dsc_var *dv = &dsc->vars[i];
892 for (j = 0; j < DSC_N_STATS; j++)
893 dv->stats[j] = SYSMIS;
895 dv->valid = W = dsc->valid - dv->missing;
897 if (dv->moments != NULL)
898 moments_calculate (dv->moments, NULL,
899 &dv->stats[DSC_MEAN], &dv->stats[DSC_VARIANCE],
900 &dv->stats[DSC_SKEWNESS], &dv->stats[DSC_KURTOSIS]);
901 if (dsc->calc_stats & (1ul << DSC_SEMEAN)
902 && dv->stats[DSC_VARIANCE] != SYSMIS && W > 0.)
903 dv->stats[DSC_SEMEAN] = sqrt (dv->stats[DSC_VARIANCE]) / sqrt (W);
904 if (dsc->calc_stats & (1ul << DSC_STDDEV)
905 && dv->stats[DSC_VARIANCE] != SYSMIS)
906 dv->stats[DSC_STDDEV] = sqrt (dv->stats[DSC_VARIANCE]);
907 if (dsc->calc_stats & (1ul << DSC_SEKURT))
908 if (dv->stats[DSC_KURTOSIS] != SYSMIS)
909 dv->stats[DSC_SEKURT] = calc_sekurt (W);
910 if (dsc->calc_stats & (1ul << DSC_SESKEW)
911 && dv->stats[DSC_SKEWNESS] != SYSMIS)
912 dv->stats[DSC_SESKEW] = calc_seskew (W);
913 dv->stats[DSC_RANGE] = ((dv->min == DBL_MAX || dv->max == -DBL_MAX)
914 ? SYSMIS : dv->max - dv->min);
915 dv->stats[DSC_MIN] = dv->min == DBL_MAX ? SYSMIS : dv->min;
916 dv->stats[DSC_MAX] = dv->max == -DBL_MAX ? SYSMIS : dv->max;
917 if (dsc->calc_stats & (1ul << DSC_SUM))
918 dv->stats[DSC_SUM] = W * dv->stats[DSC_MEAN];
922 case_data_rw_idx (c, z_idx++)->f = dv->stats[DSC_MEAN];
923 case_data_rw_idx (c, z_idx++)->f = dv->stats[DSC_STDDEV];
928 casewriter_write (dsc->z_writer, c);
930 /* Output results. */
934 /* Returns true if any of the descriptives variables in DSC's
935 variable list have missing values in case C, false otherwise. */
937 listwise_missing (struct dsc_proc *dsc, const struct ccase *c)
941 for (i = 0; i < dsc->var_cnt; i++)
943 struct dsc_var *dv = &dsc->vars[i];
944 double x = case_num (c, dv->v);
946 if (var_is_num_missing (dv->v, x, dsc->exclude))
952 /* Statistical display. */
954 static algo_compare_func descriptives_compare_dsc_vars;
956 /* Displays a table of descriptive statistics for DSC. */
958 display (struct dsc_proc *dsc)
964 nc = 1 + (dsc->format == DSC_SERIAL ? 2 : 1);
965 for (i = 0; i < DSC_N_STATS; i++)
966 if (dsc->show_stats & (1ul << i))
969 if (dsc->sort_by_stat != DSC_NONE)
970 sort (dsc->vars, dsc->var_cnt, sizeof *dsc->vars,
971 descriptives_compare_dsc_vars, dsc);
973 t = tab_create (nc, dsc->var_cnt + 1);
974 tab_headers (t, 1, 0, 1, 0);
975 tab_box (t, TAL_1, TAL_1, -1, -1, 0, 0, nc - 1, dsc->var_cnt);
976 tab_box (t, -1, -1, -1, TAL_1, 1, 0, nc - 1, dsc->var_cnt);
977 tab_hline (t, TAL_2, 0, nc - 1, 1);
978 tab_vline (t, TAL_2, 1, 0, dsc->var_cnt);
981 tab_text (t, nc++, 0, TAB_LEFT | TAT_TITLE, _("Variable"));
982 if (dsc->format == DSC_SERIAL)
984 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Valid N"));
985 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Missing N"));
988 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, "N");
990 for (i = 0; i < DSC_N_STATS; i++)
991 if (dsc->show_stats & (1ul << i))
993 const char *title = gettext (dsc_info[i].name);
994 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, title);
997 for (i = 0; i < dsc->var_cnt; i++)
999 struct dsc_var *dv = &dsc->vars[i];
1003 tab_text (t, nc++, i + 1, TAB_LEFT, var_to_string (dv->v));
1004 tab_text_format (t, nc++, i + 1, 0, "%g", dv->valid);
1005 if (dsc->format == DSC_SERIAL)
1006 tab_text_format (t, nc++, i + 1, 0, "%g", dv->missing);
1008 for (j = 0; j < DSC_N_STATS; j++)
1009 if (dsc->show_stats & (1ul << j))
1010 tab_double (t, nc++, i + 1, TAB_NONE, dv->stats[j], NULL);
1013 tab_title (t, _("Valid cases = %g; cases with missing value(s) = %g."),
1014 dsc->valid, dsc->missing_listwise);
1019 /* Compares `struct dsc_var's A and B according to the ordering
1020 specified by CMD. */
1022 descriptives_compare_dsc_vars (const void *a_, const void *b_, const void *dsc_)
1024 const struct dsc_var *a = a_;
1025 const struct dsc_var *b = b_;
1026 const struct dsc_proc *dsc = dsc_;
1030 if (dsc->sort_by_stat == DSC_NAME)
1031 result = utf8_strcasecmp (var_get_name (a->v), var_get_name (b->v));
1034 double as = a->stats[dsc->sort_by_stat];
1035 double bs = b->stats[dsc->sort_by_stat];
1037 result = as < bs ? -1 : as > bs;
1040 if (!dsc->sort_ascending)