1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-2000, 2009-2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
23 #include "data/casegrouper.h"
24 #include "data/casereader.h"
25 #include "data/casewriter.h"
26 #include "data/dataset.h"
27 #include "data/dictionary.h"
28 #include "data/transformations.h"
29 #include "data/variable.h"
30 #include "language/command.h"
31 #include "language/dictionary/split-file.h"
32 #include "language/lexer/lexer.h"
33 #include "language/lexer/variable-parser.h"
34 #include "libpspp/array.h"
35 #include "libpspp/assertion.h"
36 #include "libpspp/compiler.h"
37 #include "libpspp/i18n.h"
38 #include "libpspp/message.h"
39 #include "math/moments.h"
40 #include "output/tab.h"
42 #include "gl/xalloc.h"
45 #define _(msgid) gettext (msgid)
46 #define N_(msgid) msgid
48 /* DESCRIPTIVES private data. */
52 /* Handling of missing values. */
55 DSC_VARIABLE, /* Handle missing values on a per-variable basis. */
56 DSC_LISTWISE /* Discard entire case if any variable is missing. */
59 /* Describes properties of a distribution for the purpose of
60 calculating a Z-score. */
63 const struct variable *src_var; /* Variable on which z-score is based. */
64 struct variable *z_var; /* New z-score variable. */
65 double mean; /* Distribution mean. */
66 double std_dev; /* Distribution standard deviation. */
69 /* DESCRIPTIVES transformation (for calculating Z-scores). */
72 struct dsc_z_score *z_scores; /* Array of Z-scores. */
73 int z_score_cnt; /* Number of Z-scores. */
74 const struct variable **vars; /* Variables for listwise missing checks. */
75 size_t var_cnt; /* Number of variables. */
76 enum dsc_missing_type missing_type; /* Treatment of missing values. */
77 enum mv_class exclude; /* Classes of missing values to exclude. */
78 struct casereader *z_reader; /* Reader for count, mean, stddev. */
79 casenumber count; /* Number left in this SPLIT FILE group.*/
83 /* Statistics. Used as bit indexes, so must be 32 or fewer. */
86 DSC_MEAN = 0, DSC_SEMEAN, DSC_STDDEV, DSC_VARIANCE, DSC_KURTOSIS,
87 DSC_SEKURT, DSC_SKEWNESS, DSC_SESKEW, DSC_RANGE, DSC_MIN,
88 DSC_MAX, DSC_SUM, DSC_N_STATS,
90 /* Only valid as sort criteria. */
91 DSC_NAME = -2, /* Sort by name. */
92 DSC_NONE = -1 /* Unsorted. */
95 /* Describes one statistic. */
96 struct dsc_statistic_info
98 const char *identifier; /* Identifier. */
99 const char *name; /* Full name. */
100 enum moment moment; /* Highest moment needed to calculate. */
103 /* Table of statistics, indexed by DSC_*. */
104 static const struct dsc_statistic_info dsc_info[DSC_N_STATS] =
106 {"MEAN", N_("Mean"), MOMENT_MEAN},
107 {"SEMEAN", N_("S.E. Mean"), MOMENT_VARIANCE},
108 {"STDDEV", N_("Std Dev"), MOMENT_VARIANCE},
109 {"VARIANCE", N_("Variance"), MOMENT_VARIANCE},
110 {"KURTOSIS", N_("Kurtosis"), MOMENT_KURTOSIS},
111 {"SEKURTOSIS", N_("S.E. Kurt"), MOMENT_NONE},
112 {"SKEWNESS", N_("Skewness"), MOMENT_SKEWNESS},
113 {"SESKEWNESS", N_("S.E. Skew"), MOMENT_NONE},
114 {"RANGE", N_("Range"), MOMENT_NONE},
115 {"MINIMUM", N_("Minimum"), MOMENT_NONE},
116 {"MAXIMUM", N_("Maximum"), MOMENT_NONE},
117 {"SUM", N_("Sum"), MOMENT_MEAN},
120 /* Statistics calculated by default if none are explicitly
122 #define DEFAULT_STATS \
123 ((1ul << DSC_MEAN) | (1ul << DSC_STDDEV) | (1ul << DSC_MIN) \
126 /* A variable specified on DESCRIPTIVES. */
129 const struct variable *v; /* Variable to calculate on. */
130 char *z_name; /* Name for z-score variable. */
131 double valid, missing; /* Valid, missing counts. */
132 struct moments *moments; /* Moments. */
133 double min, max; /* Maximum and mimimum values. */
134 double stats[DSC_N_STATS]; /* All the stats' values. */
140 DSC_LINE, /* Abbreviated format. */
141 DSC_SERIAL /* Long format. */
144 /* A DESCRIPTIVES procedure. */
147 /* Per-variable info. */
148 struct dsc_var *vars; /* Variables. */
149 size_t var_cnt; /* Number of variables. */
152 enum dsc_missing_type missing_type; /* Treatment of missing values. */
153 enum mv_class exclude; /* Classes of missing values to exclude. */
154 int show_var_labels; /* Nonzero to show variable labels. */
155 int show_index; /* Nonzero to show variable index. */
156 enum dsc_format format; /* Output format. */
158 /* Accumulated results. */
159 double missing_listwise; /* Sum of weights of cases missing listwise. */
160 double valid; /* Sum of weights of valid cases. */
161 bool bad_warn; /* Warn if bad weight found. */
162 enum dsc_statistic sort_by_stat; /* Statistic to sort by; -1: name. */
163 int sort_ascending; /* !0: ascending order; 0: descending. */
164 unsigned long show_stats; /* Statistics to display. */
165 unsigned long calc_stats; /* Statistics to calculate. */
166 enum moment max_moment; /* Highest moment needed for stats. */
169 struct casewriter *z_writer; /* Mean and stddev per SPLIT FILE group. */
173 static enum dsc_statistic match_statistic (struct lexer *);
174 static void free_dsc_proc (struct dsc_proc *);
176 /* Z-score functions. */
177 static bool try_name (const struct dictionary *dict,
178 struct dsc_proc *dsc, const char *name);
179 static char *generate_z_varname (const struct dictionary *dict,
180 struct dsc_proc *dsc,
181 const char *name, int *z_cnt);
182 static void dump_z_table (struct dsc_proc *);
183 static void setup_z_trns (struct dsc_proc *, struct dataset *);
185 /* Procedure execution functions. */
186 static void calc_descriptives (struct dsc_proc *, struct casereader *,
188 static void display (struct dsc_proc *dsc);
190 /* Parser and outline. */
192 /* Handles DESCRIPTIVES. */
194 cmd_descriptives (struct lexer *lexer, struct dataset *ds)
196 struct dictionary *dict = dataset_dict (ds);
197 struct dsc_proc *dsc;
198 const struct variable **vars = NULL;
200 int save_z_scores = 0;
205 struct casegrouper *grouper;
206 struct casereader *group;
208 /* Create and initialize dsc. */
209 dsc = xmalloc (sizeof *dsc);
212 dsc->missing_type = DSC_VARIABLE;
213 dsc->exclude = MV_ANY;
214 dsc->show_var_labels = 1;
216 dsc->format = DSC_LINE;
217 dsc->missing_listwise = 0.;
220 dsc->sort_by_stat = DSC_NONE;
221 dsc->sort_ascending = 1;
222 dsc->show_stats = dsc->calc_stats = DEFAULT_STATS;
223 dsc->z_writer = NULL;
225 /* Parse DESCRIPTIVES. */
226 while (lex_token (lexer) != T_ENDCMD)
228 if (lex_match_id (lexer, "MISSING"))
230 lex_match (lexer, T_EQUALS);
231 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
233 if (lex_match_id (lexer, "VARIABLE"))
234 dsc->missing_type = DSC_VARIABLE;
235 else if (lex_match_id (lexer, "LISTWISE"))
236 dsc->missing_type = DSC_LISTWISE;
237 else if (lex_match_id (lexer, "INCLUDE"))
238 dsc->exclude = MV_SYSTEM;
241 lex_error (lexer, NULL);
244 lex_match (lexer, T_COMMA);
247 else if (lex_match_id (lexer, "SAVE"))
249 else if (lex_match_id (lexer, "FORMAT"))
251 lex_match (lexer, T_EQUALS);
252 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
254 if (lex_match_id (lexer, "LABELS"))
255 dsc->show_var_labels = 1;
256 else if (lex_match_id (lexer, "NOLABELS"))
257 dsc->show_var_labels = 0;
258 else if (lex_match_id (lexer, "INDEX"))
260 else if (lex_match_id (lexer, "NOINDEX"))
262 else if (lex_match_id (lexer, "LINE"))
263 dsc->format = DSC_LINE;
264 else if (lex_match_id (lexer, "SERIAL"))
265 dsc->format = DSC_SERIAL;
268 lex_error (lexer, NULL);
271 lex_match (lexer, T_COMMA);
274 else if (lex_match_id (lexer, "STATISTICS"))
276 lex_match (lexer, T_EQUALS);
278 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
280 if (lex_match (lexer, T_ALL))
281 dsc->show_stats |= (1ul << DSC_N_STATS) - 1;
282 else if (lex_match_id (lexer, "DEFAULT"))
283 dsc->show_stats |= DEFAULT_STATS;
285 dsc->show_stats |= 1ul << (match_statistic (lexer));
286 lex_match (lexer, T_COMMA);
288 if (dsc->show_stats == 0)
289 dsc->show_stats = DEFAULT_STATS;
291 else if (lex_match_id (lexer, "SORT"))
293 lex_match (lexer, T_EQUALS);
294 if (lex_match_id (lexer, "NAME"))
295 dsc->sort_by_stat = DSC_NAME;
298 dsc->sort_by_stat = match_statistic (lexer);
299 if (dsc->sort_by_stat == DSC_NONE )
300 dsc->sort_by_stat = DSC_MEAN;
302 if (lex_match (lexer, T_LPAREN))
304 if (lex_match_id (lexer, "A"))
305 dsc->sort_ascending = 1;
306 else if (lex_match_id (lexer, "D"))
307 dsc->sort_ascending = 0;
309 lex_error (lexer, NULL);
310 lex_force_match (lexer, T_RPAREN);
313 else if (var_cnt == 0)
315 if (lex_next_token (lexer, 1) == T_EQUALS)
317 lex_match_id (lexer, "VARIABLES");
318 lex_match (lexer, T_EQUALS);
321 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
325 if (!parse_variables_const (lexer, dict, &vars, &var_cnt,
326 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
329 dsc->vars = xnrealloc ((void *)dsc->vars, var_cnt, sizeof *dsc->vars);
330 for (i = dsc->var_cnt; i < var_cnt; i++)
332 struct dsc_var *dv = &dsc->vars[i];
337 dsc->var_cnt = var_cnt;
339 if (lex_match (lexer, T_LPAREN))
341 if (lex_token (lexer) != T_ID)
343 lex_error (lexer, NULL);
346 if (try_name (dict, dsc, lex_tokcstr (lexer)))
348 struct dsc_var *dsc_var = &dsc->vars[dsc->var_cnt - 1];
349 dsc_var->z_name = xstrdup (lex_tokcstr (lexer));
353 msg (SE, _("Z-score variable name %s would be"
354 " a duplicate variable name."), lex_tokcstr (lexer));
356 if (!lex_force_match (lexer, T_RPAREN))
363 lex_error (lexer, NULL);
367 lex_match (lexer, T_SLASH);
371 msg (SE, _("No variables specified."));
375 /* Construct z-score varnames, show translation table. */
376 if (z_cnt || save_z_scores)
378 struct caseproto *proto;
384 for (i = 0; i < dsc->var_cnt; i++)
386 struct dsc_var *dsc_var = &dsc->vars[i];
387 if (dsc_var->z_name == NULL)
389 const char *name = var_get_name (dsc_var->v);
390 dsc_var->z_name = generate_z_varname (dict, dsc, name,
392 if (dsc_var->z_name == NULL)
400 proto = caseproto_create ();
401 for (i = 0; i < 1 + 2 * z_cnt; i++)
402 proto = caseproto_add_width (proto, 0);
403 dsc->z_writer = autopaging_writer_create (proto);
404 caseproto_unref (proto);
409 /* Figure out statistics to display. */
410 if (dsc->show_stats & (1ul << DSC_SKEWNESS))
411 dsc->show_stats |= 1ul << DSC_SESKEW;
412 if (dsc->show_stats & (1ul << DSC_KURTOSIS))
413 dsc->show_stats |= 1ul << DSC_SEKURT;
415 /* Figure out which statistics to calculate. */
416 dsc->calc_stats = dsc->show_stats;
418 dsc->calc_stats |= (1ul << DSC_MEAN) | (1ul << DSC_STDDEV);
419 if (dsc->sort_by_stat >= 0)
420 dsc->calc_stats |= 1ul << dsc->sort_by_stat;
421 if (dsc->show_stats & (1ul << DSC_SESKEW))
422 dsc->calc_stats |= 1ul << DSC_SKEWNESS;
423 if (dsc->show_stats & (1ul << DSC_SEKURT))
424 dsc->calc_stats |= 1ul << DSC_KURTOSIS;
426 /* Figure out maximum moment needed and allocate moments for
428 dsc->max_moment = MOMENT_NONE;
429 for (i = 0; i < DSC_N_STATS; i++)
430 if (dsc->calc_stats & (1ul << i) && dsc_info[i].moment > dsc->max_moment)
431 dsc->max_moment = dsc_info[i].moment;
432 if (dsc->max_moment != MOMENT_NONE)
433 for (i = 0; i < dsc->var_cnt; i++)
434 dsc->vars[i].moments = moments_create (dsc->max_moment);
437 grouper = casegrouper_create_splits (proc_open (ds), dict);
438 while (casegrouper_get_next_group (grouper, &group))
439 calc_descriptives (dsc, group, ds);
440 ok = casegrouper_destroy (grouper);
441 ok = proc_commit (ds) && ok;
445 setup_z_trns (dsc, ds);
450 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
458 /* Returns the statistic named by the current token and skips past the token.
459 Returns DSC_NONE if no statistic is given (e.g., subcommand with no
460 specifiers). Emits an error if the current token ID does not name a
462 static enum dsc_statistic
463 match_statistic (struct lexer *lexer)
465 if (lex_token (lexer) == T_ID)
467 enum dsc_statistic stat;
469 for (stat = 0; stat < DSC_N_STATS; stat++)
470 if (lex_match_id (lexer, dsc_info[stat].identifier))
474 lex_error (lexer, _("expecting statistic name: reverting to default"));
482 free_dsc_proc (struct dsc_proc *dsc)
489 for (i = 0; i < dsc->var_cnt; i++)
491 struct dsc_var *dsc_var = &dsc->vars[i];
492 free (dsc_var->z_name);
493 moments_destroy (dsc_var->moments);
495 casewriter_destroy (dsc->z_writer);
502 /* Returns false if NAME is a duplicate of any existing variable name or
503 of any previously-declared z-var name; otherwise returns true. */
505 try_name (const struct dictionary *dict, struct dsc_proc *dsc,
510 if (dict_lookup_var (dict, name) != NULL)
512 for (i = 0; i < dsc->var_cnt; i++)
514 struct dsc_var *dsc_var = &dsc->vars[i];
515 if (dsc_var->z_name != NULL && !utf8_strcasecmp (dsc_var->z_name, name))
521 /* Generates a name for a Z-score variable based on a variable
522 named VAR_NAME, given that *Z_CNT generated variable names are
523 known to already exist. If successful, returns the new name
524 as a dynamically allocated string. On failure, returns NULL. */
526 generate_z_varname (const struct dictionary *dict, struct dsc_proc *dsc,
527 const char *var_name, int *z_cnt)
529 char *z_name, *trunc_name;
531 /* Try a name based on the original variable name. */
532 z_name = xasprintf ("Z%s", var_name);
533 trunc_name = utf8_encoding_trunc (z_name, dict_get_encoding (dict),
536 if (try_name (dict, dsc, trunc_name))
540 /* Generate a synthetic name. */
548 sprintf (name, "ZSC%03d", *z_cnt);
549 else if (*z_cnt <= 108)
550 sprintf (name, "STDZ%02d", *z_cnt - 99);
551 else if (*z_cnt <= 117)
552 sprintf (name, "ZZZZ%02d", *z_cnt - 108);
553 else if (*z_cnt <= 126)
554 sprintf (name, "ZQZQ%02d", *z_cnt - 117);
557 msg (SE, _("Ran out of generic names for Z-score variables. "
558 "There are only 126 generic names: ZSC001-ZSC0999, "
559 "STDZ01-STDZ09, ZZZZ01-ZZZZ09, ZQZQ01-ZQZQ09."));
563 if (try_name (dict, dsc, name))
564 return xstrdup (name);
569 /* Outputs a table describing the mapping between source
570 variables and Z-score variables. */
572 dump_z_table (struct dsc_proc *dsc)
580 for (i = 0; i < dsc->var_cnt; i++)
581 if (dsc->vars[i].z_name != NULL)
585 t = tab_create (2, cnt + 1);
586 tab_title (t, _("Mapping of variables to corresponding Z-scores."));
587 tab_headers (t, 0, 0, 1, 0);
588 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, cnt);
589 tab_hline (t, TAL_2, 0, 1, 1);
590 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Source"));
591 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Target"));
596 for (i = 0, y = 1; i < dsc->var_cnt; i++)
597 if (dsc->vars[i].z_name != NULL)
599 tab_text (t, 0, y, TAB_LEFT, var_to_string (dsc->vars[i].v));
600 tab_text (t, 1, y++, TAB_LEFT, dsc->vars[i].z_name);
607 /* Transformation function to calculate Z-scores. Will return SYSMIS if any of
608 the following are true: 1) mean or standard deviation is SYSMIS 2) score is
609 SYSMIS 3) score is user missing and they were not included in the original
610 analyis. 4) any of the variables in the original analysis were missing
611 (either system or user-missing values that weren't included).
614 descriptives_trns_proc (void *trns_, struct ccase **c,
615 casenumber case_idx UNUSED)
617 struct dsc_trns *t = trns_;
618 struct dsc_z_score *z;
619 const struct variable **vars;
624 struct ccase *z_case;
626 z_case = casereader_read (t->z_reader);
631 t->count = case_num_idx (z_case, z_idx++);
632 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
634 z->mean = case_num_idx (z_case, z_idx++);
635 z->std_dev = case_num_idx (z_case, z_idx++);
643 msg (SE, _("Internal error processing Z scores"));
646 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
647 z->mean = z->std_dev = SYSMIS;
652 if (t->missing_type == DSC_LISTWISE)
655 for (vars = t->vars; vars < t->vars + t->var_cnt; vars++)
657 double score = case_num (*c, *vars);
658 if (var_is_num_missing (*vars, score, t->exclude))
666 *c = case_unshare (*c);
667 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
669 double input = case_num (*c, z->src_var);
670 double *output = &case_data_rw (*c, z->z_var)->f;
672 if (z->mean == SYSMIS || z->std_dev == SYSMIS || all_sysmis
673 || var_is_num_missing (z->src_var, input, t->exclude))
676 *output = (input - z->mean) / z->std_dev;
678 return TRNS_CONTINUE;
681 /* Frees a descriptives_trns struct. */
683 descriptives_trns_free (void *trns_)
685 struct dsc_trns *t = trns_;
686 bool ok = t->ok && !casereader_error (t->z_reader);
689 casereader_destroy (t->z_reader);
690 assert((t->missing_type != DSC_LISTWISE) ^ (t->vars != NULL));
697 /* Sets up a transformation to calculate Z scores. */
699 setup_z_trns (struct dsc_proc *dsc, struct dataset *ds)
704 for (cnt = i = 0; i < dsc->var_cnt; i++)
705 if (dsc->vars[i].z_name != NULL)
708 t = xmalloc (sizeof *t);
709 t->z_scores = xnmalloc (cnt, sizeof *t->z_scores);
710 t->z_score_cnt = cnt;
711 t->missing_type = dsc->missing_type;
712 t->exclude = dsc->exclude;
713 if ( t->missing_type == DSC_LISTWISE )
715 t->var_cnt = dsc->var_cnt;
716 t->vars = xnmalloc (t->var_cnt, sizeof *t->vars);
717 for (i = 0; i < t->var_cnt; i++)
718 t->vars[i] = dsc->vars[i].v;
725 t->z_reader = casewriter_make_reader (dsc->z_writer);
728 dsc->z_writer = NULL;
730 for (cnt = i = 0; i < dsc->var_cnt; i++)
732 struct dsc_var *dv = &dsc->vars[i];
733 if (dv->z_name != NULL)
735 struct dsc_z_score *z;
736 struct variable *dst_var;
739 dst_var = dict_create_var_assert (dataset_dict (ds), dv->z_name, 0);
741 label = xasprintf (_("Z-score of %s"),var_to_string (dv->v));
742 var_set_label (dst_var, label, false);
745 z = &t->z_scores[cnt++];
751 add_transformation (ds,
752 descriptives_trns_proc, descriptives_trns_free, t);
755 /* Statistical calculation. */
757 static bool listwise_missing (struct dsc_proc *dsc, const struct ccase *c);
759 /* Calculates and displays descriptive statistics for the cases
762 calc_descriptives (struct dsc_proc *dsc, struct casereader *group,
765 struct casereader *pass1, *pass2;
771 c = casereader_peek (group, 0);
774 casereader_destroy (group);
777 output_split_file_values (ds, c);
780 group = casereader_create_filter_weight (group, dataset_dict (ds),
784 pass2 = dsc->max_moment <= MOMENT_MEAN ? NULL : casereader_clone (pass1);
786 for (i = 0; i < dsc->var_cnt; i++)
788 struct dsc_var *dv = &dsc->vars[i];
790 dv->valid = dv->missing = 0.0;
791 if (dv->moments != NULL)
792 moments_clear (dv->moments);
796 dsc->missing_listwise = 0.;
799 /* First pass to handle most of the work. */
801 for (; (c = casereader_read (pass1)) != NULL; case_unref (c))
803 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
805 /* Check for missing values. */
806 if (listwise_missing (dsc, c))
808 dsc->missing_listwise += weight;
809 if (dsc->missing_type == DSC_LISTWISE)
812 dsc->valid += weight;
814 for (i = 0; i < dsc->var_cnt; i++)
816 struct dsc_var *dv = &dsc->vars[i];
817 double x = case_num (c, dv->v);
819 if (var_is_num_missing (dv->v, x, dsc->exclude))
821 dv->missing += weight;
825 if (dv->moments != NULL)
826 moments_pass_one (dv->moments, x, weight);
836 if (!casereader_destroy (pass1))
838 casereader_destroy (pass2);
842 /* Second pass for higher-order moments. */
843 if (dsc->max_moment > MOMENT_MEAN)
845 for (; (c = casereader_read (pass2)) != NULL; case_unref (c))
847 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
849 /* Check for missing values. */
850 if (dsc->missing_type == DSC_LISTWISE && listwise_missing (dsc, c))
853 for (i = 0; i < dsc->var_cnt; i++)
855 struct dsc_var *dv = &dsc->vars[i];
856 double x = case_num (c, dv->v);
858 if (var_is_num_missing (dv->v, x, dsc->exclude))
861 if (dv->moments != NULL)
862 moments_pass_two (dv->moments, x, weight);
865 if (!casereader_destroy (pass2))
869 /* Calculate results. */
872 c = case_create (casewriter_get_proto (dsc->z_writer));
874 case_data_rw_idx (c, z_idx++)->f = count;
879 for (i = 0; i < dsc->var_cnt; i++)
881 struct dsc_var *dv = &dsc->vars[i];
885 for (j = 0; j < DSC_N_STATS; j++)
886 dv->stats[j] = SYSMIS;
888 dv->valid = W = dsc->valid - dv->missing;
890 if (dv->moments != NULL)
891 moments_calculate (dv->moments, NULL,
892 &dv->stats[DSC_MEAN], &dv->stats[DSC_VARIANCE],
893 &dv->stats[DSC_SKEWNESS], &dv->stats[DSC_KURTOSIS]);
894 if (dsc->calc_stats & (1ul << DSC_SEMEAN)
895 && dv->stats[DSC_VARIANCE] != SYSMIS && W > 0.)
896 dv->stats[DSC_SEMEAN] = sqrt (dv->stats[DSC_VARIANCE]) / sqrt (W);
897 if (dsc->calc_stats & (1ul << DSC_STDDEV)
898 && dv->stats[DSC_VARIANCE] != SYSMIS)
899 dv->stats[DSC_STDDEV] = sqrt (dv->stats[DSC_VARIANCE]);
900 if (dsc->calc_stats & (1ul << DSC_SEKURT))
901 if (dv->stats[DSC_KURTOSIS] != SYSMIS)
902 dv->stats[DSC_SEKURT] = calc_sekurt (W);
903 if (dsc->calc_stats & (1ul << DSC_SESKEW)
904 && dv->stats[DSC_SKEWNESS] != SYSMIS)
905 dv->stats[DSC_SESKEW] = calc_seskew (W);
906 dv->stats[DSC_RANGE] = ((dv->min == DBL_MAX || dv->max == -DBL_MAX)
907 ? SYSMIS : dv->max - dv->min);
908 dv->stats[DSC_MIN] = dv->min == DBL_MAX ? SYSMIS : dv->min;
909 dv->stats[DSC_MAX] = dv->max == -DBL_MAX ? SYSMIS : dv->max;
910 if (dsc->calc_stats & (1ul << DSC_SUM))
911 dv->stats[DSC_SUM] = W * dv->stats[DSC_MEAN];
915 case_data_rw_idx (c, z_idx++)->f = dv->stats[DSC_MEAN];
916 case_data_rw_idx (c, z_idx++)->f = dv->stats[DSC_STDDEV];
921 casewriter_write (dsc->z_writer, c);
923 /* Output results. */
927 /* Returns true if any of the descriptives variables in DSC's
928 variable list have missing values in case C, false otherwise. */
930 listwise_missing (struct dsc_proc *dsc, const struct ccase *c)
934 for (i = 0; i < dsc->var_cnt; i++)
936 struct dsc_var *dv = &dsc->vars[i];
937 double x = case_num (c, dv->v);
939 if (var_is_num_missing (dv->v, x, dsc->exclude))
945 /* Statistical display. */
947 static algo_compare_func descriptives_compare_dsc_vars;
949 /* Displays a table of descriptive statistics for DSC. */
951 display (struct dsc_proc *dsc)
957 nc = 1 + (dsc->format == DSC_SERIAL ? 2 : 1);
958 for (i = 0; i < DSC_N_STATS; i++)
959 if (dsc->show_stats & (1ul << i))
962 if (dsc->sort_by_stat != DSC_NONE)
963 sort (dsc->vars, dsc->var_cnt, sizeof *dsc->vars,
964 descriptives_compare_dsc_vars, dsc);
966 t = tab_create (nc, dsc->var_cnt + 1);
967 tab_headers (t, 1, 0, 1, 0);
968 tab_box (t, TAL_1, TAL_1, -1, -1, 0, 0, nc - 1, dsc->var_cnt);
969 tab_box (t, -1, -1, -1, TAL_1, 1, 0, nc - 1, dsc->var_cnt);
970 tab_hline (t, TAL_2, 0, nc - 1, 1);
971 tab_vline (t, TAL_2, 1, 0, dsc->var_cnt);
974 tab_text (t, nc++, 0, TAB_LEFT | TAT_TITLE, _("Variable"));
975 if (dsc->format == DSC_SERIAL)
977 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Valid N"));
978 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Missing N"));
981 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, "N");
983 for (i = 0; i < DSC_N_STATS; i++)
984 if (dsc->show_stats & (1ul << i))
986 const char *title = gettext (dsc_info[i].name);
987 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, title);
990 for (i = 0; i < dsc->var_cnt; i++)
992 struct dsc_var *dv = &dsc->vars[i];
996 tab_text (t, nc++, i + 1, TAB_LEFT, var_to_string (dv->v));
997 tab_text_format (t, nc++, i + 1, 0, "%g", dv->valid);
998 if (dsc->format == DSC_SERIAL)
999 tab_text_format (t, nc++, i + 1, 0, "%g", dv->missing);
1001 for (j = 0; j < DSC_N_STATS; j++)
1002 if (dsc->show_stats & (1ul << j))
1003 tab_double (t, nc++, i + 1, TAB_NONE, dv->stats[j], NULL);
1006 tab_title (t, _("Valid cases = %g; cases with missing value(s) = %g."),
1007 dsc->valid, dsc->missing_listwise);
1012 /* Compares `struct dsc_var's A and B according to the ordering
1013 specified by CMD. */
1015 descriptives_compare_dsc_vars (const void *a_, const void *b_, const void *dsc_)
1017 const struct dsc_var *a = a_;
1018 const struct dsc_var *b = b_;
1019 const struct dsc_proc *dsc = dsc_;
1023 if (dsc->sort_by_stat == DSC_NAME)
1024 result = utf8_strcasecmp (var_get_name (a->v), var_get_name (b->v));
1027 double as = a->stats[dsc->sort_by_stat];
1028 double bs = b->stats[dsc->sort_by_stat];
1030 result = as < bs ? -1 : as > bs;
1033 if (!dsc->sort_ascending)