1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-2000, 2009-2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
23 #include "data/casegrouper.h"
24 #include "data/casereader.h"
25 #include "data/casewriter.h"
26 #include "data/dataset.h"
27 #include "data/dictionary.h"
28 #include "data/transformations.h"
29 #include "data/variable.h"
30 #include "language/command.h"
31 #include "language/dictionary/split-file.h"
32 #include "language/lexer/lexer.h"
33 #include "language/lexer/variable-parser.h"
34 #include "libpspp/array.h"
35 #include "libpspp/assertion.h"
36 #include "libpspp/compiler.h"
37 #include "libpspp/i18n.h"
38 #include "libpspp/message.h"
39 #include "math/moments.h"
40 #include "output/tab.h"
42 #include "gl/xalloc.h"
45 #define _(msgid) gettext (msgid)
46 #define N_(msgid) msgid
48 /* DESCRIPTIVES private data. */
52 /* Handling of missing values. */
55 DSC_VARIABLE, /* Handle missing values on a per-variable basis. */
56 DSC_LISTWISE /* Discard entire case if any variable is missing. */
59 /* Describes properties of a distribution for the purpose of
60 calculating a Z-score. */
63 const struct variable *src_var; /* Variable on which z-score is based. */
64 struct variable *z_var; /* New z-score variable. */
65 double mean; /* Distribution mean. */
66 double std_dev; /* Distribution standard deviation. */
69 /* DESCRIPTIVES transformation (for calculating Z-scores). */
72 struct dsc_z_score *z_scores; /* Array of Z-scores. */
73 int z_score_cnt; /* Number of Z-scores. */
74 const struct variable **vars; /* Variables for listwise missing checks. */
75 size_t var_cnt; /* Number of variables. */
76 enum dsc_missing_type missing_type; /* Treatment of missing values. */
77 enum mv_class exclude; /* Classes of missing values to exclude. */
78 struct casereader *z_reader; /* Reader for count, mean, stddev. */
79 casenumber count; /* Number left in this SPLIT FILE group.*/
83 /* Statistics. Used as bit indexes, so must be 32 or fewer. */
86 DSC_MEAN = 0, DSC_SEMEAN, DSC_STDDEV, DSC_VARIANCE, DSC_KURTOSIS,
87 DSC_SEKURT, DSC_SKEWNESS, DSC_SESKEW, DSC_RANGE, DSC_MIN,
88 DSC_MAX, DSC_SUM, DSC_N_STATS,
90 /* Only valid as sort criteria. */
91 DSC_NAME = -2, /* Sort by name. */
92 DSC_NONE = -1 /* Unsorted. */
95 /* Describes one statistic. */
96 struct dsc_statistic_info
98 const char *identifier; /* Identifier. */
99 const char *name; /* Full name. */
100 enum moment moment; /* Highest moment needed to calculate. */
103 /* Table of statistics, indexed by DSC_*. */
104 static const struct dsc_statistic_info dsc_info[DSC_N_STATS] =
106 {"MEAN", N_("Mean"), MOMENT_MEAN},
107 {"SEMEAN", N_("S.E. Mean"), MOMENT_VARIANCE},
108 {"STDDEV", N_("Std Dev"), MOMENT_VARIANCE},
109 {"VARIANCE", N_("Variance"), MOMENT_VARIANCE},
110 {"KURTOSIS", N_("Kurtosis"), MOMENT_KURTOSIS},
111 {"SEKURTOSIS", N_("S.E. Kurt"), MOMENT_NONE},
112 {"SKEWNESS", N_("Skewness"), MOMENT_SKEWNESS},
113 {"SESKEWNESS", N_("S.E. Skew"), MOMENT_NONE},
114 {"RANGE", N_("Range"), MOMENT_NONE},
115 {"MINIMUM", N_("Minimum"), MOMENT_NONE},
116 {"MAXIMUM", N_("Maximum"), MOMENT_NONE},
117 {"SUM", N_("Sum"), MOMENT_MEAN},
120 /* Statistics calculated by default if none are explicitly
122 #define DEFAULT_STATS \
123 ((1ul << DSC_MEAN) | (1ul << DSC_STDDEV) | (1ul << DSC_MIN) \
126 /* A variable specified on DESCRIPTIVES. */
129 const struct variable *v; /* Variable to calculate on. */
130 char *z_name; /* Name for z-score variable. */
131 double valid, missing; /* Valid, missing counts. */
132 struct moments *moments; /* Moments. */
133 double min, max; /* Maximum and mimimum values. */
134 double stats[DSC_N_STATS]; /* All the stats' values. */
140 DSC_LINE, /* Abbreviated format. */
141 DSC_SERIAL /* Long format. */
144 /* A DESCRIPTIVES procedure. */
147 /* Per-variable info. */
148 struct dsc_var *vars; /* Variables. */
149 size_t var_cnt; /* Number of variables. */
152 enum dsc_missing_type missing_type; /* Treatment of missing values. */
153 enum mv_class exclude; /* Classes of missing values to exclude. */
154 int show_var_labels; /* Nonzero to show variable labels. */
155 int show_index; /* Nonzero to show variable index. */
156 enum dsc_format format; /* Output format. */
158 /* Accumulated results. */
159 double missing_listwise; /* Sum of weights of cases missing listwise. */
160 double valid; /* Sum of weights of valid cases. */
161 bool bad_warn; /* Warn if bad weight found. */
162 enum dsc_statistic sort_by_stat; /* Statistic to sort by; -1: name. */
163 int sort_ascending; /* !0: ascending order; 0: descending. */
164 unsigned long show_stats; /* Statistics to display. */
165 unsigned long calc_stats; /* Statistics to calculate. */
166 enum moment max_moment; /* Highest moment needed for stats. */
169 struct casewriter *z_writer; /* Mean and stddev per SPLIT FILE group. */
173 static enum dsc_statistic match_statistic (struct lexer *);
174 static void free_dsc_proc (struct dsc_proc *);
176 /* Z-score functions. */
177 static bool try_name (const struct dictionary *dict,
178 struct dsc_proc *dsc, const char *name);
179 static char *generate_z_varname (const struct dictionary *dict,
180 struct dsc_proc *dsc,
181 const char *name, int *z_cnt);
182 static void dump_z_table (struct dsc_proc *);
183 static void setup_z_trns (struct dsc_proc *, struct dataset *);
185 /* Procedure execution functions. */
186 static void calc_descriptives (struct dsc_proc *, struct casereader *,
188 static void display (struct dsc_proc *dsc);
190 /* Parser and outline. */
192 /* Handles DESCRIPTIVES. */
194 cmd_descriptives (struct lexer *lexer, struct dataset *ds)
196 struct dictionary *dict = dataset_dict (ds);
197 struct dsc_proc *dsc;
198 const struct variable **vars = NULL;
200 int save_z_scores = 0;
205 struct casegrouper *grouper;
206 struct casereader *group;
208 /* Create and initialize dsc. */
209 dsc = xmalloc (sizeof *dsc);
212 dsc->missing_type = DSC_VARIABLE;
213 dsc->exclude = MV_ANY;
214 dsc->show_var_labels = 1;
216 dsc->format = DSC_LINE;
217 dsc->missing_listwise = 0.;
220 dsc->sort_by_stat = DSC_NONE;
221 dsc->sort_ascending = 1;
222 dsc->show_stats = dsc->calc_stats = DEFAULT_STATS;
223 dsc->z_writer = NULL;
225 /* Parse DESCRIPTIVES. */
226 while (lex_token (lexer) != T_ENDCMD)
228 if (lex_match_id (lexer, "MISSING"))
230 lex_match (lexer, T_EQUALS);
231 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
233 if (lex_match_id (lexer, "VARIABLE"))
234 dsc->missing_type = DSC_VARIABLE;
235 else if (lex_match_id (lexer, "LISTWISE"))
236 dsc->missing_type = DSC_LISTWISE;
237 else if (lex_match_id (lexer, "INCLUDE"))
238 dsc->exclude = MV_SYSTEM;
241 lex_error (lexer, NULL);
244 lex_match (lexer, T_COMMA);
247 else if (lex_match_id (lexer, "SAVE"))
249 else if (lex_match_id (lexer, "FORMAT"))
251 lex_match (lexer, T_EQUALS);
252 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
254 if (lex_match_id (lexer, "LABELS"))
255 dsc->show_var_labels = 1;
256 else if (lex_match_id (lexer, "NOLABELS"))
257 dsc->show_var_labels = 0;
258 else if (lex_match_id (lexer, "INDEX"))
260 else if (lex_match_id (lexer, "NOINDEX"))
262 else if (lex_match_id (lexer, "LINE"))
263 dsc->format = DSC_LINE;
264 else if (lex_match_id (lexer, "SERIAL"))
265 dsc->format = DSC_SERIAL;
268 lex_error (lexer, NULL);
271 lex_match (lexer, T_COMMA);
274 else if (lex_match_id (lexer, "STATISTICS"))
276 lex_match (lexer, T_EQUALS);
278 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
280 if (lex_match (lexer, T_ALL))
281 dsc->show_stats |= (1ul << DSC_N_STATS) - 1;
282 else if (lex_match_id (lexer, "DEFAULT"))
283 dsc->show_stats |= DEFAULT_STATS;
285 dsc->show_stats |= 1ul << (match_statistic (lexer));
286 lex_match (lexer, T_COMMA);
288 if (dsc->show_stats == 0)
289 dsc->show_stats = DEFAULT_STATS;
291 else if (lex_match_id (lexer, "SORT"))
293 lex_match (lexer, T_EQUALS);
294 if (lex_match_id (lexer, "NAME"))
295 dsc->sort_by_stat = DSC_NAME;
298 dsc->sort_by_stat = match_statistic (lexer);
299 if (dsc->sort_by_stat == DSC_NONE )
300 dsc->sort_by_stat = DSC_MEAN;
302 if (lex_match (lexer, T_LPAREN))
304 if (lex_match_id (lexer, "A"))
305 dsc->sort_ascending = 1;
306 else if (lex_match_id (lexer, "D"))
307 dsc->sort_ascending = 0;
309 lex_error (lexer, NULL);
310 lex_force_match (lexer, T_RPAREN);
313 else if (var_cnt == 0)
315 if (lex_next_token (lexer, 1) == T_EQUALS)
317 lex_match_id (lexer, "VARIABLES");
318 lex_match (lexer, T_EQUALS);
321 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
325 if (!parse_variables_const (lexer, dict, &vars, &var_cnt,
326 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
329 dsc->vars = xnrealloc ((void *)dsc->vars, var_cnt, sizeof *dsc->vars);
330 for (i = dsc->var_cnt; i < var_cnt; i++)
332 struct dsc_var *dv = &dsc->vars[i];
337 dsc->var_cnt = var_cnt;
339 if (lex_match (lexer, T_LPAREN))
341 if (lex_token (lexer) != T_ID)
343 lex_error (lexer, NULL);
346 if (try_name (dict, dsc, lex_tokcstr (lexer)))
348 struct dsc_var *dsc_var = &dsc->vars[dsc->var_cnt - 1];
349 dsc_var->z_name = xstrdup (lex_tokcstr (lexer));
353 msg (SE, _("Z-score variable name %s would be"
354 " a duplicate variable name."), lex_tokcstr (lexer));
356 if (!lex_force_match (lexer, T_RPAREN))
363 lex_error (lexer, NULL);
367 lex_match (lexer, T_SLASH);
371 msg (SE, _("No variables specified."));
375 /* Construct z-score varnames, show translation table. */
376 if (z_cnt || save_z_scores)
378 struct caseproto *proto;
384 for (i = 0; i < dsc->var_cnt; i++)
386 struct dsc_var *dsc_var = &dsc->vars[i];
387 if (dsc_var->z_name == NULL)
389 const char *name = var_get_name (dsc_var->v);
390 dsc_var->z_name = generate_z_varname (dict, dsc, name,
392 if (dsc_var->z_name == NULL)
400 proto = caseproto_create ();
401 for (i = 0; i < 1 + 2 * z_cnt; i++)
402 proto = caseproto_add_width (proto, 0);
403 dsc->z_writer = autopaging_writer_create (proto);
404 caseproto_unref (proto);
409 /* Figure out statistics to display. */
410 if (dsc->show_stats & (1ul << DSC_SKEWNESS))
411 dsc->show_stats |= 1ul << DSC_SESKEW;
412 if (dsc->show_stats & (1ul << DSC_KURTOSIS))
413 dsc->show_stats |= 1ul << DSC_SEKURT;
415 /* Figure out which statistics to calculate. */
416 dsc->calc_stats = dsc->show_stats;
418 dsc->calc_stats |= (1ul << DSC_MEAN) | (1ul << DSC_STDDEV);
419 if (dsc->sort_by_stat >= 0)
420 dsc->calc_stats |= 1ul << dsc->sort_by_stat;
421 if (dsc->show_stats & (1ul << DSC_SESKEW))
422 dsc->calc_stats |= 1ul << DSC_SKEWNESS;
423 if (dsc->show_stats & (1ul << DSC_SEKURT))
424 dsc->calc_stats |= 1ul << DSC_KURTOSIS;
426 /* Figure out maximum moment needed and allocate moments for
428 dsc->max_moment = MOMENT_NONE;
429 for (i = 0; i < DSC_N_STATS; i++)
430 if (dsc->calc_stats & (1ul << i) && dsc_info[i].moment > dsc->max_moment)
431 dsc->max_moment = dsc_info[i].moment;
432 if (dsc->max_moment != MOMENT_NONE)
433 for (i = 0; i < dsc->var_cnt; i++)
434 dsc->vars[i].moments = moments_create (dsc->max_moment);
437 grouper = casegrouper_create_splits (proc_open (ds), dict);
438 while (casegrouper_get_next_group (grouper, &group))
439 calc_descriptives (dsc, group, ds);
440 ok = casegrouper_destroy (grouper);
441 ok = proc_commit (ds) && ok;
445 setup_z_trns (dsc, ds);
450 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
458 /* Returns the statistic named by the current token and skips past the token.
459 Returns DSC_NONE if no statistic is given (e.g., subcommand with no
460 specifiers). Emits an error if the current token ID does not name a
462 static enum dsc_statistic
463 match_statistic (struct lexer *lexer)
465 if (lex_token (lexer) == T_ID)
467 enum dsc_statistic stat;
469 for (stat = 0; stat < DSC_N_STATS; stat++)
470 if (lex_match_id (lexer, dsc_info[stat].identifier))
474 lex_error (lexer, _("expecting statistic name: reverting to default"));
482 free_dsc_proc (struct dsc_proc *dsc)
489 for (i = 0; i < dsc->var_cnt; i++)
491 struct dsc_var *dsc_var = &dsc->vars[i];
492 free (dsc_var->z_name);
493 moments_destroy (dsc_var->moments);
495 casewriter_destroy (dsc->z_writer);
502 /* Returns false if NAME is a duplicate of any existing variable name or
503 of any previously-declared z-var name; otherwise returns true. */
505 try_name (const struct dictionary *dict, struct dsc_proc *dsc,
510 if (dict_lookup_var (dict, name) != NULL)
512 for (i = 0; i < dsc->var_cnt; i++)
514 struct dsc_var *dsc_var = &dsc->vars[i];
515 if (dsc_var->z_name != NULL && !utf8_strcasecmp (dsc_var->z_name, name))
521 /* Generates a name for a Z-score variable based on a variable
522 named VAR_NAME, given that *Z_CNT generated variable names are
523 known to already exist. If successful, returns the new name
524 as a dynamically allocated string. On failure, returns NULL. */
526 generate_z_varname (const struct dictionary *dict, struct dsc_proc *dsc,
527 const char *var_name, int *z_cnt)
529 char *z_name, *trunc_name;
531 /* Try a name based on the original variable name. */
532 z_name = xasprintf ("Z%s", var_name);
533 trunc_name = utf8_encoding_trunc (z_name, dict_get_encoding (dict),
536 if (try_name (dict, dsc, trunc_name))
540 /* Generate a synthetic name. */
548 sprintf (name, "ZSC%03d", *z_cnt);
549 else if (*z_cnt <= 108)
550 sprintf (name, "STDZ%02d", *z_cnt - 99);
551 else if (*z_cnt <= 117)
552 sprintf (name, "ZZZZ%02d", *z_cnt - 108);
553 else if (*z_cnt <= 126)
554 sprintf (name, "ZQZQ%02d", *z_cnt - 117);
557 msg (SE, _("Ran out of generic names for Z-score variables. "
558 "There are only 126 generic names: ZSC001-ZSC0999, "
559 "STDZ01-STDZ09, ZZZZ01-ZZZZ09, ZQZQ01-ZQZQ09."));
563 if (try_name (dict, dsc, name))
564 return xstrdup (name);
569 /* Outputs a table describing the mapping between source
570 variables and Z-score variables. */
572 dump_z_table (struct dsc_proc *dsc)
580 for (i = 0; i < dsc->var_cnt; i++)
581 if (dsc->vars[i].z_name != NULL)
585 t = tab_create (2, cnt + 1);
586 tab_title (t, _("Mapping of variables to corresponding Z-scores."));
587 tab_headers (t, 0, 0, 1, 0);
588 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, cnt);
589 tab_hline (t, TAL_2, 0, 1, 1);
590 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Source"));
591 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Target"));
596 for (i = 0, y = 1; i < dsc->var_cnt; i++)
597 if (dsc->vars[i].z_name != NULL)
599 tab_text (t, 0, y, TAB_LEFT, var_to_string (dsc->vars[i].v));
600 tab_text (t, 1, y++, TAB_LEFT, dsc->vars[i].z_name);
607 /* Transformation function to calculate Z-scores. Will return SYSMIS if any of
608 the following are true: 1) mean or standard deviation is SYSMIS 2) score is
609 SYSMIS 3) score is user missing and they were not included in the original
610 analyis. 4) any of the variables in the original analysis were missing
611 (either system or user-missing values that weren't included).
614 descriptives_trns_proc (void *trns_, struct ccase **c,
615 casenumber case_idx UNUSED)
617 struct dsc_trns *t = trns_;
618 struct dsc_z_score *z;
619 const struct variable **vars;
624 struct ccase *z_case;
626 z_case = casereader_read (t->z_reader);
631 t->count = case_num_idx (z_case, z_idx++);
632 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
634 z->mean = case_num_idx (z_case, z_idx++);
635 z->std_dev = case_num_idx (z_case, z_idx++);
643 msg (SE, _("Internal error processing Z scores"));
646 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
647 z->mean = z->std_dev = SYSMIS;
652 if (t->missing_type == DSC_LISTWISE)
655 for (vars = t->vars; vars < t->vars + t->var_cnt; vars++)
657 double score = case_num (*c, *vars);
658 if (var_is_num_missing (*vars, score, t->exclude))
666 *c = case_unshare (*c);
667 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
669 double input = case_num (*c, z->src_var);
670 double *output = &case_data_rw (*c, z->z_var)->f;
672 if (z->mean == SYSMIS || z->std_dev == SYSMIS || all_sysmis
673 || var_is_num_missing (z->src_var, input, t->exclude))
676 *output = (input - z->mean) / z->std_dev;
678 return TRNS_CONTINUE;
681 /* Frees a descriptives_trns struct. */
683 descriptives_trns_free (void *trns_)
685 struct dsc_trns *t = trns_;
686 bool ok = t->ok && !casereader_error (t->z_reader);
689 casereader_destroy (t->z_reader);
690 assert((t->missing_type != DSC_LISTWISE) ^ (t->vars != NULL));
695 /* Sets up a transformation to calculate Z scores. */
697 setup_z_trns (struct dsc_proc *dsc, struct dataset *ds)
702 for (cnt = i = 0; i < dsc->var_cnt; i++)
703 if (dsc->vars[i].z_name != NULL)
706 t = xmalloc (sizeof *t);
707 t->z_scores = xnmalloc (cnt, sizeof *t->z_scores);
708 t->z_score_cnt = cnt;
709 t->missing_type = dsc->missing_type;
710 t->exclude = dsc->exclude;
711 if ( t->missing_type == DSC_LISTWISE )
713 t->var_cnt = dsc->var_cnt;
714 t->vars = xnmalloc (t->var_cnt, sizeof *t->vars);
715 for (i = 0; i < t->var_cnt; i++)
716 t->vars[i] = dsc->vars[i].v;
723 t->z_reader = casewriter_make_reader (dsc->z_writer);
726 dsc->z_writer = NULL;
728 for (cnt = i = 0; i < dsc->var_cnt; i++)
730 struct dsc_var *dv = &dsc->vars[i];
731 if (dv->z_name != NULL)
733 struct dsc_z_score *z;
734 struct variable *dst_var;
736 dst_var = dict_create_var_assert (dataset_dict (ds), dv->z_name, 0);
737 var_set_label (dst_var,
738 xasprintf (_("Z-score of %s"),var_to_string (dv->v)),
741 z = &t->z_scores[cnt++];
747 add_transformation (ds,
748 descriptives_trns_proc, descriptives_trns_free, t);
751 /* Statistical calculation. */
753 static bool listwise_missing (struct dsc_proc *dsc, const struct ccase *c);
755 /* Calculates and displays descriptive statistics for the cases
758 calc_descriptives (struct dsc_proc *dsc, struct casereader *group,
761 struct casereader *pass1, *pass2;
767 c = casereader_peek (group, 0);
770 casereader_destroy (group);
773 output_split_file_values (ds, c);
776 group = casereader_create_filter_weight (group, dataset_dict (ds),
780 pass2 = dsc->max_moment <= MOMENT_MEAN ? NULL : casereader_clone (pass1);
782 for (i = 0; i < dsc->var_cnt; i++)
784 struct dsc_var *dv = &dsc->vars[i];
786 dv->valid = dv->missing = 0.0;
787 if (dv->moments != NULL)
788 moments_clear (dv->moments);
792 dsc->missing_listwise = 0.;
795 /* First pass to handle most of the work. */
797 for (; (c = casereader_read (pass1)) != NULL; case_unref (c))
799 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
801 /* Check for missing values. */
802 if (listwise_missing (dsc, c))
804 dsc->missing_listwise += weight;
805 if (dsc->missing_type == DSC_LISTWISE)
808 dsc->valid += weight;
810 for (i = 0; i < dsc->var_cnt; i++)
812 struct dsc_var *dv = &dsc->vars[i];
813 double x = case_num (c, dv->v);
815 if (var_is_num_missing (dv->v, x, dsc->exclude))
817 dv->missing += weight;
821 if (dv->moments != NULL)
822 moments_pass_one (dv->moments, x, weight);
832 if (!casereader_destroy (pass1))
834 casereader_destroy (pass2);
838 /* Second pass for higher-order moments. */
839 if (dsc->max_moment > MOMENT_MEAN)
841 for (; (c = casereader_read (pass2)) != NULL; case_unref (c))
843 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
845 /* Check for missing values. */
846 if (dsc->missing_type == DSC_LISTWISE && listwise_missing (dsc, c))
849 for (i = 0; i < dsc->var_cnt; i++)
851 struct dsc_var *dv = &dsc->vars[i];
852 double x = case_num (c, dv->v);
854 if (var_is_num_missing (dv->v, x, dsc->exclude))
857 if (dv->moments != NULL)
858 moments_pass_two (dv->moments, x, weight);
861 if (!casereader_destroy (pass2))
865 /* Calculate results. */
868 c = case_create (casewriter_get_proto (dsc->z_writer));
870 case_data_rw_idx (c, z_idx++)->f = count;
875 for (i = 0; i < dsc->var_cnt; i++)
877 struct dsc_var *dv = &dsc->vars[i];
881 for (j = 0; j < DSC_N_STATS; j++)
882 dv->stats[j] = SYSMIS;
884 dv->valid = W = dsc->valid - dv->missing;
886 if (dv->moments != NULL)
887 moments_calculate (dv->moments, NULL,
888 &dv->stats[DSC_MEAN], &dv->stats[DSC_VARIANCE],
889 &dv->stats[DSC_SKEWNESS], &dv->stats[DSC_KURTOSIS]);
890 if (dsc->calc_stats & (1ul << DSC_SEMEAN)
891 && dv->stats[DSC_VARIANCE] != SYSMIS && W > 0.)
892 dv->stats[DSC_SEMEAN] = sqrt (dv->stats[DSC_VARIANCE]) / sqrt (W);
893 if (dsc->calc_stats & (1ul << DSC_STDDEV)
894 && dv->stats[DSC_VARIANCE] != SYSMIS)
895 dv->stats[DSC_STDDEV] = sqrt (dv->stats[DSC_VARIANCE]);
896 if (dsc->calc_stats & (1ul << DSC_SEKURT))
897 if (dv->stats[DSC_KURTOSIS] != SYSMIS)
898 dv->stats[DSC_SEKURT] = calc_sekurt (W);
899 if (dsc->calc_stats & (1ul << DSC_SESKEW)
900 && dv->stats[DSC_SKEWNESS] != SYSMIS)
901 dv->stats[DSC_SESKEW] = calc_seskew (W);
902 dv->stats[DSC_RANGE] = ((dv->min == DBL_MAX || dv->max == -DBL_MAX)
903 ? SYSMIS : dv->max - dv->min);
904 dv->stats[DSC_MIN] = dv->min == DBL_MAX ? SYSMIS : dv->min;
905 dv->stats[DSC_MAX] = dv->max == -DBL_MAX ? SYSMIS : dv->max;
906 if (dsc->calc_stats & (1ul << DSC_SUM))
907 dv->stats[DSC_SUM] = W * dv->stats[DSC_MEAN];
911 case_data_rw_idx (c, z_idx++)->f = dv->stats[DSC_MEAN];
912 case_data_rw_idx (c, z_idx++)->f = dv->stats[DSC_STDDEV];
917 casewriter_write (dsc->z_writer, c);
919 /* Output results. */
923 /* Returns true if any of the descriptives variables in DSC's
924 variable list have missing values in case C, false otherwise. */
926 listwise_missing (struct dsc_proc *dsc, const struct ccase *c)
930 for (i = 0; i < dsc->var_cnt; i++)
932 struct dsc_var *dv = &dsc->vars[i];
933 double x = case_num (c, dv->v);
935 if (var_is_num_missing (dv->v, x, dsc->exclude))
941 /* Statistical display. */
943 static algo_compare_func descriptives_compare_dsc_vars;
945 /* Displays a table of descriptive statistics for DSC. */
947 display (struct dsc_proc *dsc)
953 nc = 1 + (dsc->format == DSC_SERIAL ? 2 : 1);
954 for (i = 0; i < DSC_N_STATS; i++)
955 if (dsc->show_stats & (1ul << i))
958 if (dsc->sort_by_stat != DSC_NONE)
959 sort (dsc->vars, dsc->var_cnt, sizeof *dsc->vars,
960 descriptives_compare_dsc_vars, dsc);
962 t = tab_create (nc, dsc->var_cnt + 1);
963 tab_headers (t, 1, 0, 1, 0);
964 tab_box (t, TAL_1, TAL_1, -1, -1, 0, 0, nc - 1, dsc->var_cnt);
965 tab_box (t, -1, -1, -1, TAL_1, 1, 0, nc - 1, dsc->var_cnt);
966 tab_hline (t, TAL_2, 0, nc - 1, 1);
967 tab_vline (t, TAL_2, 1, 0, dsc->var_cnt);
970 tab_text (t, nc++, 0, TAB_LEFT | TAT_TITLE, _("Variable"));
971 if (dsc->format == DSC_SERIAL)
973 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Valid N"));
974 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Missing N"));
977 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, "N");
979 for (i = 0; i < DSC_N_STATS; i++)
980 if (dsc->show_stats & (1ul << i))
982 const char *title = gettext (dsc_info[i].name);
983 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, title);
986 for (i = 0; i < dsc->var_cnt; i++)
988 struct dsc_var *dv = &dsc->vars[i];
992 tab_text (t, nc++, i + 1, TAB_LEFT, var_to_string (dv->v));
993 tab_text_format (t, nc++, i + 1, 0, "%g", dv->valid);
994 if (dsc->format == DSC_SERIAL)
995 tab_text_format (t, nc++, i + 1, 0, "%g", dv->missing);
997 for (j = 0; j < DSC_N_STATS; j++)
998 if (dsc->show_stats & (1ul << j))
999 tab_double (t, nc++, i + 1, TAB_NONE, dv->stats[j], NULL);
1002 tab_title (t, _("Valid cases = %g; cases with missing value(s) = %g."),
1003 dsc->valid, dsc->missing_listwise);
1008 /* Compares `struct dsc_var's A and B according to the ordering
1009 specified by CMD. */
1011 descriptives_compare_dsc_vars (const void *a_, const void *b_, const void *dsc_)
1013 const struct dsc_var *a = a_;
1014 const struct dsc_var *b = b_;
1015 const struct dsc_proc *dsc = dsc_;
1019 if (dsc->sort_by_stat == DSC_NAME)
1020 result = utf8_strcasecmp (var_get_name (a->v), var_get_name (b->v));
1023 double as = a->stats[dsc->sort_by_stat];
1024 double bs = b->stats[dsc->sort_by_stat];
1026 result = as < bs ? -1 : as > bs;
1029 if (!dsc->sort_ascending)