1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 /* FIXME: Many possible optimizations. */
28 #include <data/case.h>
29 #include <data/casefile.h>
30 #include <data/dictionary.h>
31 #include <data/procedure.h>
32 #include <data/transformations.h>
33 #include <data/variable.h>
34 #include <language/command.h>
35 #include <language/dictionary/split-file.h>
36 #include <language/lexer/lexer.h>
37 #include <language/lexer/variable-parser.h>
38 #include <libpspp/alloc.h>
39 #include <libpspp/array.h>
40 #include <libpspp/compiler.h>
41 #include <libpspp/magic.h>
42 #include <libpspp/message.h>
43 #include <libpspp/assertion.h>
44 #include <math/moments.h>
45 #include <output/manager.h>
46 #include <output/table.h>
49 #define _(msgid) gettext (msgid)
50 #define N_(msgid) msgid
52 /* DESCRIPTIVES private data. */
56 /* Handling of missing values. */
59 DSC_VARIABLE, /* Handle missing values on a per-variable basis. */
60 DSC_LISTWISE /* Discard entire case if any variable is missing. */
63 /* Describes properties of a distribution for the purpose of
64 calculating a Z-score. */
67 int src_idx; /* Source index into case data. */
68 int dst_idx; /* Destination index into case data. */
69 double mean; /* Distribution mean. */
70 double std_dev; /* Distribution standard deviation. */
71 struct variable *v; /* Variable on which z-score is based. */
74 /* DESCRIPTIVES transformation (for calculating Z-scores). */
77 struct dsc_z_score *z_scores; /* Array of Z-scores. */
78 int z_score_cnt; /* Number of Z-scores. */
79 struct variable **vars; /* Variables for listwise missing checks. */
80 size_t var_cnt; /* Number of variables. */
81 enum dsc_missing_type missing_type; /* Treatment of missing values. */
82 int include_user_missing; /* Nonzero to include user-missing values. */
85 /* Statistics. Used as bit indexes, so must be 32 or fewer. */
88 DSC_MEAN = 0, DSC_SEMEAN, DSC_STDDEV, DSC_VARIANCE, DSC_KURTOSIS,
89 DSC_SEKURT, DSC_SKEWNESS, DSC_SESKEW, DSC_RANGE, DSC_MIN,
90 DSC_MAX, DSC_SUM, DSC_N_STATS,
92 /* Only valid as sort criteria. */
93 DSC_NAME = -2, /* Sort by name. */
94 DSC_NONE = -1 /* Unsorted. */
97 /* Describes one statistic. */
98 struct dsc_statistic_info
100 const char *identifier; /* Identifier. */
101 const char *name; /* Full name. */
102 enum moment moment; /* Highest moment needed to calculate. */
105 /* Table of statistics, indexed by DSC_*. */
106 static const struct dsc_statistic_info dsc_info[DSC_N_STATS] =
108 {"MEAN", N_("Mean"), MOMENT_MEAN},
109 {"SEMEAN", N_("S E Mean"), MOMENT_VARIANCE},
110 {"STDDEV", N_("Std Dev"), MOMENT_VARIANCE},
111 {"VARIANCE", N_("Variance"), MOMENT_VARIANCE},
112 {"KURTOSIS", N_("Kurtosis"), MOMENT_KURTOSIS},
113 {"SEKURTOSIS", N_("S E Kurt"), MOMENT_NONE},
114 {"SKEWNESS", N_("Skewness"), MOMENT_SKEWNESS},
115 {"SESKEWNESS", N_("S E Skew"), MOMENT_NONE},
116 {"RANGE", N_("Range"), MOMENT_NONE},
117 {"MINIMUM", N_("Minimum"), MOMENT_NONE},
118 {"MAXIMUM", N_("Maximum"), MOMENT_NONE},
119 {"SUM", N_("Sum"), MOMENT_MEAN},
122 /* Statistics calculated by default if none are explicitly
124 #define DEFAULT_STATS \
125 ((1ul << DSC_MEAN) | (1ul << DSC_STDDEV) | (1ul << DSC_MIN) \
128 /* A variable specified on DESCRIPTIVES. */
131 struct variable *v; /* Variable to calculate on. */
132 char z_name[LONG_NAME_LEN + 1]; /* Name for z-score variable. */
133 double valid, missing; /* Valid, missing counts. */
134 struct moments *moments; /* Moments. */
135 double min, max; /* Maximum and mimimum values. */
136 double stats[DSC_N_STATS]; /* All the stats' values. */
142 DSC_LINE, /* Abbreviated format. */
143 DSC_SERIAL /* Long format. */
146 /* A DESCRIPTIVES procedure. */
149 /* Per-variable info. */
150 struct dsc_var *vars; /* Variables. */
151 size_t var_cnt; /* Number of variables. */
154 enum dsc_missing_type missing_type; /* Treatment of missing values. */
155 int include_user_missing; /* Nonzero to include user-missing values. */
156 int show_var_labels; /* Nonzero to show variable labels. */
157 int show_index; /* Nonzero to show variable index. */
158 enum dsc_format format; /* Output format. */
160 /* Accumulated results. */
161 double missing_listwise; /* Sum of weights of cases missing listwise. */
162 double valid; /* Sum of weights of valid cases. */
163 bool bad_warn; /* Warn if bad weight found. */
164 enum dsc_statistic sort_by_stat; /* Statistic to sort by; -1: name. */
165 int sort_ascending; /* !0: ascending order; 0: descending. */
166 unsigned long show_stats; /* Statistics to display. */
167 unsigned long calc_stats; /* Statistics to calculate. */
168 enum moment max_moment; /* Highest moment needed for stats. */
172 static enum dsc_statistic match_statistic (void);
173 static void free_dsc_proc (struct dsc_proc *);
175 /* Z-score functions. */
176 static bool try_name (const struct dictionary *dict,
177 struct dsc_proc *dsc, char *name);
178 static bool generate_z_varname (const struct dictionary *dict,
179 struct dsc_proc *dsc, char *z_name,
180 const char *name, size_t *z_cnt);
181 static void dump_z_table (struct dsc_proc *);
182 static void setup_z_trns (struct dsc_proc *, struct dataset *);
184 /* Procedure execution functions. */
185 static bool calc_descriptives (const struct ccase *first,
186 const struct casefile *, void *dsc_,
187 const struct dataset *);
188 static void display (struct dsc_proc *dsc);
190 /* Parser and outline. */
192 /* Handles DESCRIPTIVES. */
194 cmd_descriptives (struct dataset *ds)
196 struct dictionary *dict = dataset_dict (ds);
197 struct dsc_proc *dsc;
198 struct variable **vars = NULL;
200 int save_z_scores = 0;
205 /* Create and initialize dsc. */
206 dsc = xmalloc (sizeof *dsc);
209 dsc->missing_type = DSC_VARIABLE;
210 dsc->include_user_missing = 0;
211 dsc->show_var_labels = 1;
213 dsc->format = DSC_LINE;
214 dsc->missing_listwise = 0.;
217 dsc->sort_by_stat = DSC_NONE;
218 dsc->sort_ascending = 1;
219 dsc->show_stats = dsc->calc_stats = DEFAULT_STATS;
221 /* Parse DESCRIPTIVES. */
224 if (lex_match_id ("MISSING"))
227 while (token != '.' && token != '/')
229 if (lex_match_id ("VARIABLE"))
230 dsc->missing_type = DSC_VARIABLE;
231 else if (lex_match_id ("LISTWISE"))
232 dsc->missing_type = DSC_LISTWISE;
233 else if (lex_match_id ("INCLUDE"))
234 dsc->include_user_missing = 1;
243 else if (lex_match_id ("SAVE"))
245 else if (lex_match_id ("FORMAT"))
248 while (token != '.' && token != '/')
250 if (lex_match_id ("LABELS"))
251 dsc->show_var_labels = 1;
252 else if (lex_match_id ("NOLABELS"))
253 dsc->show_var_labels = 0;
254 else if (lex_match_id ("INDEX"))
256 else if (lex_match_id ("NOINDEX"))
258 else if (lex_match_id ("LINE"))
259 dsc->format = DSC_LINE;
260 else if (lex_match_id ("SERIAL"))
261 dsc->format = DSC_SERIAL;
270 else if (lex_match_id ("STATISTICS"))
274 while (token != '.' && token != '/')
276 if (lex_match (T_ALL))
277 dsc->show_stats |= (1ul << DSC_N_STATS) - 1;
278 else if (lex_match_id ("DEFAULT"))
279 dsc->show_stats |= DEFAULT_STATS;
281 dsc->show_stats |= 1ul << (match_statistic ());
284 if (dsc->show_stats == 0)
285 dsc->show_stats = DEFAULT_STATS;
287 else if (lex_match_id ("SORT"))
290 if (lex_match_id ("NAME"))
291 dsc->sort_by_stat = DSC_NAME;
294 dsc->sort_by_stat = match_statistic ();
295 if (dsc->sort_by_stat == DSC_NONE )
296 dsc->sort_by_stat = DSC_MEAN;
300 if (lex_match_id ("A"))
301 dsc->sort_ascending = 1;
302 else if (lex_match_id ("D"))
303 dsc->sort_ascending = 0;
306 lex_force_match (')');
309 else if (var_cnt == 0)
311 if (lex_look_ahead () == '=')
313 lex_match_id ("VARIABLES");
317 while (token != '.' && token != '/')
321 if (!parse_variables (dataset_dict (ds), &vars, &var_cnt,
322 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
325 dsc->vars = xnrealloc (dsc->vars, var_cnt, sizeof *dsc->vars);
326 for (i = dsc->var_cnt; i < var_cnt; i++)
328 struct dsc_var *dv = &dsc->vars[i];
330 dv->z_name[0] = '\0';
333 dsc->var_cnt = var_cnt;
342 if (try_name (dict, dsc, tokid))
344 strcpy (dsc->vars[dsc->var_cnt - 1].z_name, tokid);
348 msg (SE, _("Z-score variable name %s would be"
349 " a duplicate variable name."), tokid);
351 if (!lex_force_match (')'))
366 msg (SE, _("No variables specified."));
370 /* Construct z-score varnames, show translation table. */
371 if (z_cnt || save_z_scores)
377 for (i = 0; i < dsc->var_cnt; i++)
378 if (dsc->vars[i].z_name[0] == 0)
380 if (!generate_z_varname (dict, dsc, dsc->vars[i].z_name,
381 dsc->vars[i].v->name, &gen_cnt))
389 /* Figure out statistics to display. */
390 if (dsc->show_stats & (1ul << DSC_SKEWNESS))
391 dsc->show_stats |= 1ul << DSC_SESKEW;
392 if (dsc->show_stats & (1ul << DSC_KURTOSIS))
393 dsc->show_stats |= 1ul << DSC_SEKURT;
395 /* Figure out which statistics to calculate. */
396 dsc->calc_stats = dsc->show_stats;
398 dsc->calc_stats |= (1ul << DSC_MEAN) | (1ul << DSC_STDDEV);
399 if (dsc->sort_by_stat >= 0)
400 dsc->calc_stats |= 1ul << dsc->sort_by_stat;
401 if (dsc->show_stats & (1ul << DSC_SESKEW))
402 dsc->calc_stats |= 1ul << DSC_SKEWNESS;
403 if (dsc->show_stats & (1ul << DSC_SEKURT))
404 dsc->calc_stats |= 1ul << DSC_KURTOSIS;
406 /* Figure out maximum moment needed and allocate moments for
408 dsc->max_moment = MOMENT_NONE;
409 for (i = 0; i < DSC_N_STATS; i++)
410 if (dsc->calc_stats & (1ul << i) && dsc_info[i].moment > dsc->max_moment)
411 dsc->max_moment = dsc_info[i].moment;
412 if (dsc->max_moment != MOMENT_NONE)
413 for (i = 0; i < dsc->var_cnt; i++)
414 dsc->vars[i].moments = moments_create (dsc->max_moment);
417 ok = multipass_procedure_with_splits (ds, calc_descriptives, dsc);
421 setup_z_trns (dsc, ds);
426 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
434 /* Returns the statistic named by the current token and skips past the token.
435 Returns DSC_NONE if no statistic is given (e.g., subcommand with no
436 specifiers). Emits an error if the current token ID does not name a
438 static enum dsc_statistic
439 match_statistic (void)
443 enum dsc_statistic stat;
445 for (stat = 0; stat < DSC_N_STATS; stat++)
446 if (lex_match_id (dsc_info[stat].identifier))
450 lex_error (_("expecting statistic name: reverting to default"));
458 free_dsc_proc (struct dsc_proc *dsc)
465 for (i = 0; i < dsc->var_cnt; i++)
466 moments_destroy (dsc->vars[i].moments);
473 /* Returns false if NAME is a duplicate of any existing variable name or
474 of any previously-declared z-var name; otherwise returns true. */
476 try_name (const struct dictionary *dict, struct dsc_proc *dsc, char *name)
480 if (dict_lookup_var (dict, name) != NULL)
482 for (i = 0; i < dsc->var_cnt; i++)
483 if (!strcasecmp (dsc->vars[i].z_name, name))
488 /* Generates a name for a Z-score variable based on a variable
489 named VAR_NAME, given that *Z_CNT generated variable names are
490 known to already exist. If successful, returns true and
491 copies the new name into Z_NAME. On failure, returns false. */
493 generate_z_varname (const struct dictionary *dict, struct dsc_proc *dsc, char *z_name,
494 const char *var_name, size_t *z_cnt)
496 char name[LONG_NAME_LEN + 1];
498 /* Try a name based on the original variable name. */
500 str_copy_trunc (name + 1, sizeof name - 1, var_name);
501 if (try_name (dict, dsc, name))
503 strcpy (z_name, name);
507 /* Generate a synthetic name. */
513 sprintf (name, "ZSC%03d", *z_cnt);
514 else if (*z_cnt <= 108)
515 sprintf (name, "STDZ%02d", *z_cnt - 99);
516 else if (*z_cnt <= 117)
517 sprintf (name, "ZZZZ%02d", *z_cnt - 108);
518 else if (*z_cnt <= 126)
519 sprintf (name, "ZQZQ%02d", *z_cnt - 117);
522 msg (SE, _("Ran out of generic names for Z-score variables. "
523 "There are only 126 generic names: ZSC001-ZSC0999, "
524 "STDZ01-STDZ09, ZZZZ01-ZZZZ09, ZQZQ01-ZQZQ09."));
528 if (try_name (dict, dsc, name))
530 strcpy (z_name, name);
537 /* Outputs a table describing the mapping between source
538 variables and Z-score variables. */
540 dump_z_table (struct dsc_proc *dsc)
548 for (i = 0; i < dsc->var_cnt; i++)
549 if (dsc->vars[i].z_name[0] != '\0')
553 t = tab_create (2, cnt + 1, 0);
554 tab_title (t, _("Mapping of variables to corresponding Z-scores."));
555 tab_columns (t, SOM_COL_DOWN, 1);
556 tab_headers (t, 0, 0, 1, 0);
557 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, cnt);
558 tab_hline (t, TAL_2, 0, 1, 1);
559 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Source"));
560 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Target"));
561 tab_dim (t, tab_natural_dimensions);
566 for (i = 0, y = 1; i < dsc->var_cnt; i++)
567 if (dsc->vars[i].z_name[0] != '\0')
569 tab_text (t, 0, y, TAB_LEFT, dsc->vars[i].v->name);
570 tab_text (t, 1, y++, TAB_LEFT, dsc->vars[i].z_name);
577 /* Transformation function to calculate Z-scores. Will return SYSMIS if any of
578 the following are true: 1) mean or standard deviation is SYSMIS 2) score is
579 SYSMIS 3) score is user missing and they were not included in the original
580 analyis. 4) any of the variables in the original analysis were missing
581 (either system or user-missing values that weren't included).
584 descriptives_trns_proc (void *trns_, struct ccase * c,
585 casenumber case_idx UNUSED)
587 struct dsc_trns *t = trns_;
588 struct dsc_z_score *z;
589 struct variable **vars;
592 if (t->missing_type == DSC_LISTWISE)
595 for (vars = t->vars; vars < t->vars + t->var_cnt; vars++)
597 double score = case_num (c, (*vars)->fv);
599 || (!t->include_user_missing
600 && mv_is_num_user_missing (&(*vars)->miss, score)))
608 for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
610 double input = case_num (c, z->src_idx);
611 double *output = &case_data_rw (c, z->dst_idx)->f;
613 if (z->mean == SYSMIS || z->std_dev == SYSMIS
614 || all_sysmis || input == SYSMIS
615 || (!t->include_user_missing
616 && mv_is_num_user_missing (&z->v->miss, input)))
619 *output = (input - z->mean) / z->std_dev;
621 return TRNS_CONTINUE;
624 /* Frees a descriptives_trns struct. */
626 descriptives_trns_free (void *trns_)
628 struct dsc_trns *t = trns_;
631 assert((t->missing_type != DSC_LISTWISE) ^ (t->vars != NULL));
636 /* Sets up a transformation to calculate Z scores. */
638 setup_z_trns (struct dsc_proc *dsc, struct dataset *ds)
643 for (cnt = i = 0; i < dsc->var_cnt; i++)
644 if (dsc->vars[i].z_name[0] != '\0')
647 t = xmalloc (sizeof *t);
648 t->z_scores = xnmalloc (cnt, sizeof *t->z_scores);
649 t->z_score_cnt = cnt;
650 t->missing_type = dsc->missing_type;
651 t->include_user_missing = dsc->include_user_missing;
652 if ( t->missing_type == DSC_LISTWISE )
654 t->var_cnt = dsc->var_cnt;
655 t->vars = xnmalloc (t->var_cnt, sizeof *t->vars);
656 for (i = 0; i < t->var_cnt; i++)
657 t->vars[i] = dsc->vars[i].v;
665 for (cnt = i = 0; i < dsc->var_cnt; i++)
667 struct dsc_var *dv = &dsc->vars[i];
668 if (dv->z_name[0] != '\0')
670 struct dsc_z_score *z;
672 struct variable *dst_var;
674 dst_var = dict_create_var_assert (dataset_dict (ds), dv->z_name, 0);
677 dst_var->label = xmalloc (strlen (dv->v->label) + 12);
678 cp = stpcpy (dst_var->label, _("Z-score of "));
679 strcpy (cp, dv->v->label);
683 dst_var->label = xmalloc (strlen (dv->v->name) + 12);
684 cp = stpcpy (dst_var->label, _("Z-score of "));
685 strcpy (cp, dv->v->name);
688 z = &t->z_scores[cnt++];
689 z->src_idx = dv->v->fv;
690 z->dst_idx = dst_var->fv;
691 z->mean = dv->stats[DSC_MEAN];
692 z->std_dev = dv->stats[DSC_STDDEV];
697 add_transformation (ds,
698 descriptives_trns_proc, descriptives_trns_free, t);
701 /* Statistical calculation. */
703 static bool listwise_missing (struct dsc_proc *dsc, const struct ccase *c);
705 /* Calculates and displays descriptive statistics for the cases
708 calc_descriptives (const struct ccase *first,
709 const struct casefile *cf, void *dsc_,
710 const struct dataset *ds)
712 struct dsc_proc *dsc = dsc_;
713 struct casereader *reader;
717 output_split_file_values (ds, first);
719 for (i = 0; i < dsc->var_cnt; i++)
721 struct dsc_var *dv = &dsc->vars[i];
723 dv->valid = dv->missing = 0.0;
724 if (dv->moments != NULL)
725 moments_clear (dv->moments);
729 dsc->missing_listwise = 0.;
732 /* First pass to handle most of the work. */
733 for (reader = casefile_get_reader (cf);
734 casereader_read (reader, &c);
737 double weight = dict_get_case_weight (dataset_dict (ds), &c, &dsc->bad_warn);
741 /* Check for missing values. */
742 if (listwise_missing (dsc, &c))
744 dsc->missing_listwise += weight;
745 if (dsc->missing_type == DSC_LISTWISE)
748 dsc->valid += weight;
750 for (i = 0; i < dsc->var_cnt; i++)
752 struct dsc_var *dv = &dsc->vars[i];
753 double x = case_num (&c, dv->v->fv);
755 if (dsc->missing_type != DSC_LISTWISE
757 || (!dsc->include_user_missing
758 && mv_is_num_user_missing (&dv->v->miss, x))))
760 dv->missing += weight;
764 if (dv->moments != NULL)
765 moments_pass_one (dv->moments, x, weight);
773 casereader_destroy (reader);
775 /* Second pass for higher-order moments. */
776 if (dsc->max_moment > MOMENT_MEAN)
778 for (reader = casefile_get_reader (cf);
779 casereader_read (reader, &c);
782 double weight = dict_get_case_weight (dataset_dict (ds), &c,
787 /* Check for missing values. */
788 if (listwise_missing (dsc, &c)
789 && dsc->missing_type == DSC_LISTWISE)
792 for (i = 0; i < dsc->var_cnt; i++)
794 struct dsc_var *dv = &dsc->vars[i];
795 double x = case_num (&c, dv->v->fv);
797 if (dsc->missing_type != DSC_LISTWISE
799 || (!dsc->include_user_missing
800 && mv_is_num_user_missing (&dv->v->miss, x))))
803 if (dv->moments != NULL)
804 moments_pass_two (dv->moments, x, weight);
807 casereader_destroy (reader);
810 /* Calculate results. */
811 for (i = 0; i < dsc->var_cnt; i++)
813 struct dsc_var *dv = &dsc->vars[i];
817 for (j = 0; j < DSC_N_STATS; j++)
818 dv->stats[j] = SYSMIS;
820 dv->valid = W = dsc->valid - dv->missing;
822 if (dv->moments != NULL)
823 moments_calculate (dv->moments, NULL,
824 &dv->stats[DSC_MEAN], &dv->stats[DSC_VARIANCE],
825 &dv->stats[DSC_SKEWNESS], &dv->stats[DSC_KURTOSIS]);
826 if (dsc->calc_stats & (1ul << DSC_SEMEAN)
827 && dv->stats[DSC_VARIANCE] != SYSMIS && W > 0.)
828 dv->stats[DSC_SEMEAN] = sqrt (dv->stats[DSC_VARIANCE]) / sqrt (W);
829 if (dsc->calc_stats & (1ul << DSC_STDDEV)
830 && dv->stats[DSC_VARIANCE] != SYSMIS)
831 dv->stats[DSC_STDDEV] = sqrt (dv->stats[DSC_VARIANCE]);
832 if (dsc->calc_stats & (1ul << DSC_SEKURT))
833 if (dv->stats[DSC_KURTOSIS] != SYSMIS)
834 dv->stats[DSC_SEKURT] = calc_sekurt (W);
835 if (dsc->calc_stats & (1ul << DSC_SESKEW)
836 && dv->stats[DSC_SKEWNESS] != SYSMIS)
837 dv->stats[DSC_SESKEW] = calc_seskew (W);
838 dv->stats[DSC_RANGE] = ((dv->min == DBL_MAX || dv->max == -DBL_MAX)
839 ? SYSMIS : dv->max - dv->min);
840 dv->stats[DSC_MIN] = dv->min == DBL_MAX ? SYSMIS : dv->min;
841 dv->stats[DSC_MAX] = dv->max == -DBL_MAX ? SYSMIS : dv->max;
842 if (dsc->calc_stats & (1ul << DSC_SUM))
843 dv->stats[DSC_SUM] = W * dv->stats[DSC_MEAN];
846 /* Output results. */
852 /* Returns true if any of the descriptives variables in DSC's
853 variable list have missing values in case C, false otherwise. */
855 listwise_missing (struct dsc_proc *dsc, const struct ccase *c)
859 for (i = 0; i < dsc->var_cnt; i++)
861 struct dsc_var *dv = &dsc->vars[i];
862 double x = case_num (c, dv->v->fv);
865 || (!dsc->include_user_missing
866 && mv_is_num_user_missing (&dv->v->miss, x)))
872 /* Statistical display. */
874 static algo_compare_func descriptives_compare_dsc_vars;
876 /* Displays a table of descriptive statistics for DSC. */
878 display (struct dsc_proc *dsc)
884 nc = 1 + (dsc->format == DSC_SERIAL ? 2 : 1);
885 for (i = 0; i < DSC_N_STATS; i++)
886 if (dsc->show_stats & (1ul << i))
889 if (dsc->sort_by_stat != DSC_NONE)
890 sort (dsc->vars, dsc->var_cnt, sizeof *dsc->vars,
891 descriptives_compare_dsc_vars, dsc);
893 t = tab_create (nc, dsc->var_cnt + 1, 0);
894 tab_headers (t, 1, 0, 1, 0);
895 tab_box (t, TAL_1, TAL_1, -1, -1, 0, 0, nc - 1, dsc->var_cnt);
896 tab_box (t, -1, -1, -1, TAL_1, 1, 0, nc - 1, dsc->var_cnt);
897 tab_hline (t, TAL_2, 0, nc - 1, 1);
898 tab_vline (t, TAL_2, 1, 0, dsc->var_cnt);
899 tab_dim (t, tab_natural_dimensions);
902 tab_text (t, nc++, 0, TAB_LEFT | TAT_TITLE, _("Variable"));
903 if (dsc->format == DSC_SERIAL)
905 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Valid N"));
906 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Missing N"));
909 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, "N");
911 for (i = 0; i < DSC_N_STATS; i++)
912 if (dsc->show_stats & (1ul << i))
914 const char *title = gettext (dsc_info[i].name);
915 tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, title);
918 for (i = 0; i < dsc->var_cnt; i++)
920 struct dsc_var *dv = &dsc->vars[i];
924 tab_text (t, nc++, i + 1, TAB_LEFT, dv->v->name);
925 tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", dv->valid);
926 if (dsc->format == DSC_SERIAL)
927 tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", dv->missing);
928 for (j = 0; j < DSC_N_STATS; j++)
929 if (dsc->show_stats & (1ul << j))
930 tab_float (t, nc++, i + 1, TAB_NONE, dv->stats[j], 10, 3);
933 tab_title (t, _("Valid cases = %g; cases with missing value(s) = %g."),
934 dsc->valid, dsc->missing_listwise);
939 /* Compares `struct dsc_var's A and B according to the ordering
942 descriptives_compare_dsc_vars (const void *a_, const void *b_, const void *dsc_)
944 const struct dsc_var *a = a_;
945 const struct dsc_var *b = b_;
946 const struct dsc_proc *dsc = dsc_;
950 if (dsc->sort_by_stat == DSC_NAME)
951 result = strcasecmp (a->v->name, b->v->name);
954 double as = a->stats[dsc->sort_by_stat];
955 double bs = b->stats[dsc->sort_by_stat];
957 result = as < bs ? -1 : as > bs;
960 if (!dsc->sort_ascending)