1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2004, 2008, 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <gsl/gsl_cdf.h>
20 #include <libpspp/message.h>
25 #include <math/sort.h>
26 #include <math/order-stats.h>
27 #include <math/percentiles.h>
28 #include <math/tukey-hinges.h>
29 #include <math/box-whisker.h>
30 #include <math/trimmed-mean.h>
31 #include <math/extrema.h>
33 #include <data/case.h>
34 #include <data/casegrouper.h>
35 #include <data/casereader.h>
36 #include <data/casewriter.h>
37 #include <data/dictionary.h>
38 #include <data/procedure.h>
39 #include <data/subcase.h>
40 #include <data/value-labels.h>
41 #include <data/variable.h>
42 #include <language/command.h>
43 #include <language/dictionary/split-file.h>
44 #include <language/lexer/lexer.h>
45 #include <libpspp/compiler.h>
46 #include <libpspp/hash.h>
47 #include <libpspp/message.h>
48 #include <libpspp/misc.h>
49 #include <libpspp/str.h>
50 #include <math/moments.h>
51 #include <output/chart-provider.h>
52 #include <output/charts/box-whisker.h>
53 #include <output/charts/cartesian.h>
54 #include <output/manager.h>
55 #include <output/table.h>
61 #define _(msgid) gettext (msgid)
62 #define N_(msgid) msgid
65 #include <output/chart.h>
66 #include <output/charts/plot-hist.h>
67 #include <output/charts/plot-chart.h>
68 #include <math/histogram.h>
75 missing=miss:pairwise/!listwise,
77 incl:include/!exclude;
78 +compare=cmp:variables/!groups;
81 +plot[plt_]=stemleaf,boxplot,npplot,:spreadlevel(*d:n),histogram,all,none;
83 +statistics[st_]=descriptives,:extreme(*d:n),all,none.
91 static struct cmd_examine cmd;
93 static const struct variable **dependent_vars;
94 static size_t n_dependent_vars;
98 static subc_list_double percentile_list;
99 static enum pc_alg percentile_algorithm;
101 struct factor_metrics
103 struct moments1 *moments;
105 struct percentile **ptl;
108 struct statistic *tukey_hinges;
109 struct statistic *box_whisker;
110 struct statistic *trimmed_mean;
111 struct statistic *histogram;
112 struct order_stats *np;
114 /* Three quartiles indexing into PTL */
115 struct percentile **quartiles;
117 /* A reader sorted in ASCENDING order */
118 struct casereader *up_reader;
120 /* The minimum value of all the weights */
123 /* Sum of all weights, including those for missing values */
126 /* Sum of weights of non_missing values */
139 struct extrema *minima;
140 struct extrema *maxima;
147 union value value[2];
149 /* An array of factor metrics, one for each variable */
150 struct factor_metrics *metrics;
155 /* We need to make a list of this structure */
158 /* The independent variable */
159 const struct variable const* indep_var[2];
161 /* A list of results for this factor */
162 struct ll_list result_list ;
167 factor_destroy (struct xfactor *fctr)
169 struct ll *ll = ll_head (&fctr->result_list);
170 while (ll != ll_null (&fctr->result_list))
173 struct factor_result *result =
174 ll_data (ll, struct factor_result, ll);
177 for (v = 0; v < n_dependent_vars; ++v)
180 moments1_destroy (result->metrics[v].moments);
181 extrema_destroy (result->metrics[v].minima);
182 extrema_destroy (result->metrics[v].maxima);
183 statistic_destroy (result->metrics[v].trimmed_mean);
184 statistic_destroy (result->metrics[v].tukey_hinges);
185 statistic_destroy (result->metrics[v].box_whisker);
186 statistic_destroy (result->metrics[v].histogram);
187 for (i = 0 ; i < result->metrics[v].n_ptiles; ++i)
188 statistic_destroy ((struct statistic *) result->metrics[v].ptl[i]);
189 free (result->metrics[v].ptl);
190 free (result->metrics[v].quartiles);
191 casereader_destroy (result->metrics[v].up_reader);
194 for (i = 0; i < 2; i++)
195 if (fctr->indep_var[i])
196 value_destroy (&result->value[i],
197 var_get_width (fctr->indep_var[i]));
198 free (result->metrics);
204 static struct xfactor level0_factor;
205 static struct ll_list factor_list;
207 /* Parse the clause specifying the factors */
208 static int examine_parse_independent_vars (struct lexer *lexer,
209 const struct dictionary *dict,
210 struct cmd_examine *cmd);
212 /* Output functions */
213 static void show_summary (const struct variable **dependent_var, int n_dep_var,
214 const struct dictionary *dict,
215 const struct xfactor *f);
218 static void show_descriptives (const struct variable **dependent_var,
220 const struct xfactor *f);
223 static void show_percentiles (const struct variable **dependent_var,
225 const struct xfactor *f);
228 static void show_extremes (const struct variable **dependent_var,
230 const struct xfactor *f);
235 /* Per Split function */
236 static void run_examine (struct cmd_examine *, struct casereader *,
239 static void output_examine (const struct dictionary *dict);
242 void factor_calc (const struct ccase *c, int case_no,
243 double weight, bool case_missing);
246 /* Represent a factor as a string, so it can be
247 printed in a human readable fashion */
248 static void factor_to_string (const struct xfactor *fctr,
249 const struct factor_result *result,
252 /* Represent a factor as a string, so it can be
253 printed in a human readable fashion,
254 but sacrificing some readablility for the sake of brevity */
256 factor_to_string_concise (const struct xfactor *fctr,
257 const struct factor_result *result,
263 /* Categories of missing values to exclude. */
264 static enum mv_class exclude_values;
267 cmd_examine (struct lexer *lexer, struct dataset *ds)
269 struct casegrouper *grouper;
270 struct casereader *group;
273 subc_list_double_create (&percentile_list);
274 percentile_algorithm = PC_HAVERAGE;
276 ll_init (&factor_list);
278 if ( !parse_examine (lexer, ds, &cmd, NULL) )
280 subc_list_double_destroy (&percentile_list);
284 /* If /MISSING=INCLUDE is set, then user missing values are ignored */
285 exclude_values = cmd.incl == XMN_INCLUDE ? MV_SYSTEM : MV_ANY;
287 if ( cmd.st_n == SYSMIS )
290 if ( ! cmd.sbc_cinterval)
291 cmd.n_cinterval[0] = 95.0;
293 /* If descriptives have been requested, make sure the
294 quartiles are calculated */
295 if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES] )
297 subc_list_double_push (&percentile_list, 25);
298 subc_list_double_push (&percentile_list, 50);
299 subc_list_double_push (&percentile_list, 75);
302 grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds));
304 while (casegrouper_get_next_group (grouper, &group))
306 struct casereader *reader =
307 casereader_create_arithmetic_sequence (group, 1, 1);
309 run_examine (&cmd, reader, ds);
312 ok = casegrouper_destroy (grouper);
313 ok = proc_commit (ds) && ok;
315 if ( dependent_vars )
316 free (dependent_vars);
318 subc_list_double_destroy (&percentile_list);
320 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
328 struct casereader *data;
330 /* Copied directly from struct np. */
332 double dns_min, dns_max;
335 double slope, intercept;
336 double y_first, y_last;
337 double x_lower, x_upper;
341 static const struct chart_class np_plot_chart_class;
342 static const struct chart_class dnp_plot_chart_class;
344 /* Plot the normal and detrended normal plots for RESULT.
345 Label the plots with LABEL */
347 np_plot (struct np *np, const char *label)
349 struct np_plot_chart *np_plot, *dnp_plot;
353 msg (MW, _("Not creating plot because data set is empty."));
357 np_plot = xmalloc (sizeof *np_plot);
358 chart_init (&np_plot->chart, &np_plot_chart_class);
359 np_plot->label = xstrdup (label);
360 np_plot->data = casewriter_make_reader (np->writer);
361 np_plot->y_min = np->y_min;
362 np_plot->y_max = np->y_max;
363 np_plot->dns_min = np->dns_min;
364 np_plot->dns_max = np->dns_max;
366 /* Slope and intercept of the ideal normal probability line. */
367 np_plot->slope = 1.0 / np->stddev;
368 np_plot->intercept = -np->mean / np->stddev;
370 np_plot->y_first = gsl_cdf_ugaussian_Pinv (1 / (np->n + 1));
371 np_plot->y_last = gsl_cdf_ugaussian_Pinv (np->n / (np->n + 1));
373 /* Need to make sure that both the scatter plot and the ideal fit into the
375 np_plot->x_lower = MIN (
376 np->y_min, (np_plot->y_first - np_plot->intercept) / np_plot->slope);
377 np_plot->x_upper = MAX (
378 np->y_max, (np_plot->y_last - np_plot->intercept) / np_plot->slope) ;
379 np_plot->slack = (np_plot->x_upper - np_plot->x_lower) * 0.05 ;
381 dnp_plot = xmemdup (np_plot, sizeof *np_plot);
382 chart_init (&dnp_plot->chart, &dnp_plot_chart_class);
383 dnp_plot->label = xstrdup (dnp_plot->label);
384 dnp_plot->data = casereader_clone (dnp_plot->data);
386 chart_submit (&np_plot->chart);
387 chart_submit (&dnp_plot->chart);
391 np_plot_chart_draw (const struct chart *chart, plPlotter *lp)
393 const struct np_plot_chart *plot = (struct np_plot_chart *) chart;
394 struct chart_geometry geom;
395 struct casereader *data;
398 chart_geometry_init (lp, &geom);
399 chart_write_title (lp, &geom, _("Normal Q-Q Plot of %s"), plot->label);
400 chart_write_xlabel (lp, &geom, _("Observed Value"));
401 chart_write_ylabel (lp, &geom, _("Expected Normal"));
402 chart_write_xscale (lp, &geom,
403 plot->x_lower - plot->slack,
404 plot->x_upper + plot->slack, 5);
405 chart_write_yscale (lp, &geom, plot->y_first, plot->y_last, 5);
407 data = casereader_clone (plot->data);
408 for (; (c = casereader_read (data)) != NULL; case_unref (c))
409 chart_datum (lp, &geom, 0,
410 case_data_idx (c, NP_IDX_Y)->f,
411 case_data_idx (c, NP_IDX_NS)->f);
412 casereader_destroy (data);
414 chart_line (lp, &geom, plot->slope, plot->intercept,
415 plot->y_first, plot->y_last, CHART_DIM_Y);
417 chart_geometry_free (lp);
421 dnp_plot_chart_draw (const struct chart *chart, plPlotter *lp)
423 const struct np_plot_chart *plot = (struct np_plot_chart *) chart;
424 struct chart_geometry geom;
425 struct casereader *data;
428 chart_geometry_init (lp, &geom);
429 chart_write_title (lp, &geom, _("Detrended Normal Q-Q Plot of %s"),
431 chart_write_xlabel (lp, &geom, _("Observed Value"));
432 chart_write_ylabel (lp, &geom, _("Dev from Normal"));
433 chart_write_xscale (lp, &geom, plot->y_min, plot->y_max, 5);
434 chart_write_yscale (lp, &geom, plot->dns_min, plot->dns_max, 5);
436 data = casereader_clone (plot->data);
437 for (; (c = casereader_read (data)) != NULL; case_unref (c))
438 chart_datum (lp, &geom, 0, case_data_idx (c, NP_IDX_Y)->f,
439 case_data_idx (c, NP_IDX_DNS)->f);
440 casereader_destroy (data);
442 chart_line (lp, &geom, 0, 0, plot->y_min, plot->y_max, CHART_DIM_X);
444 chart_geometry_free (lp);
448 np_plot_chart_destroy (struct chart *chart)
450 struct np_plot_chart *plot = (struct np_plot_chart *) chart;
452 casereader_destroy (plot->data);
457 static const struct chart_class np_plot_chart_class =
460 np_plot_chart_destroy
463 static const struct chart_class dnp_plot_chart_class =
466 np_plot_chart_destroy
471 show_npplot (const struct variable **dependent_var,
473 const struct xfactor *fctr)
477 for (v = 0; v < n_dep_var; ++v)
480 for (ll = ll_head (&fctr->result_list);
481 ll != ll_null (&fctr->result_list);
485 const struct factor_result *result =
486 ll_data (ll, struct factor_result, ll);
488 ds_init_empty (&str);
489 ds_put_format (&str, "%s ", var_get_name (dependent_var[v]));
491 factor_to_string (fctr, result, &str);
493 np_plot ((struct np*) result->metrics[v].np, ds_cstr(&str));
495 statistic_destroy ((struct statistic *)result->metrics[v].np);
504 show_histogram (const struct variable **dependent_var,
506 const struct xfactor *fctr)
510 for (v = 0; v < n_dep_var; ++v)
513 for (ll = ll_head (&fctr->result_list);
514 ll != ll_null (&fctr->result_list);
518 const struct factor_result *result =
519 ll_data (ll, struct factor_result, ll);
520 struct histogram *histogram;
523 histogram = (struct histogram *) result->metrics[v].histogram;
524 if (histogram == NULL)
526 /* Probably all values are SYSMIS. */
530 ds_init_empty (&str);
531 ds_put_format (&str, "%s ", var_get_name (dependent_var[v]));
533 factor_to_string (fctr, result, &str);
535 moments1_calculate ((struct moments1 *) result->metrics[v].moments,
536 &n, &mean, &var, NULL, NULL);
537 chart_submit (histogram_chart_create (histogram, ds_cstr (&str),
538 n, mean, sqrt (var), false));
548 show_boxplot_groups (const struct variable **dependent_var,
550 const struct xfactor *fctr)
555 for (v = 0; v < n_dep_var; ++v)
559 struct chart *ch = chart_create ();
560 double y_min = DBL_MAX;
561 double y_max = -DBL_MAX;
563 for (ll = ll_head (&fctr->result_list);
564 ll != ll_null (&fctr->result_list);
567 const struct extremum *max, *min;
568 const struct factor_result *result =
569 ll_data (ll, struct factor_result, ll);
571 const struct ll_list *max_list =
572 extrema_list (result->metrics[v].maxima);
574 const struct ll_list *min_list =
575 extrema_list (result->metrics[v].minima);
577 if ( ll_is_empty (max_list))
579 msg (MW, _("Not creating plot because data set is empty."));
583 max = (const struct extremum *)
584 ll_data (ll_head(max_list), struct extremum, ll);
586 min = (const struct extremum *)
587 ll_data (ll_head (min_list), struct extremum, ll);
589 y_max = MAX (y_max, max->value);
590 y_min = MIN (y_min, min->value);
593 boxplot_draw_yscale (ch, y_max, y_min);
595 if ( fctr->indep_var[0])
596 chart_write_title (ch, _("Boxplot of %s vs. %s"),
597 var_to_string (dependent_var[v]),
598 var_to_string (fctr->indep_var[0]) );
600 chart_write_title (ch, _("Boxplot of %s"),
601 var_to_string (dependent_var[v]));
603 for (ll = ll_head (&fctr->result_list);
604 ll != ll_null (&fctr->result_list);
607 const struct factor_result *result =
608 ll_data (ll, struct factor_result, ll);
611 const double box_width = (ch->data_right - ch->data_left)
612 / (ll_count (&fctr->result_list) * 2.0 ) ;
614 const double box_centre = (f++ * 2 + 1) * box_width + ch->data_left;
616 ds_init_empty (&str);
617 factor_to_string_concise (fctr, result, &str);
619 boxplot_draw_boxplot (ch,
620 box_centre, box_width,
621 (const struct box_whisker *)
622 result->metrics[v].box_whisker,
636 show_boxplot_variables (const struct variable **dependent_var,
638 const struct xfactor *fctr
645 const struct ll_list *result_list = &fctr->result_list;
647 for (ll = ll_head (result_list);
648 ll != ll_null (result_list);
653 struct chart *ch = chart_create ();
654 double y_min = DBL_MAX;
655 double y_max = -DBL_MAX;
657 const struct factor_result *result =
658 ll_data (ll, struct factor_result, ll);
660 const double box_width = (ch->data_right - ch->data_left)
661 / (n_dep_var * 2.0 ) ;
663 for (v = 0; v < n_dep_var; ++v)
665 const struct ll *max_ll =
666 ll_head (extrema_list (result->metrics[v].maxima));
667 const struct ll *min_ll =
668 ll_head (extrema_list (result->metrics[v].minima));
670 const struct extremum *max =
671 (const struct extremum *) ll_data (max_ll, struct extremum, ll);
673 const struct extremum *min =
674 (const struct extremum *) ll_data (min_ll, struct extremum, ll);
676 y_max = MAX (y_max, max->value);
677 y_min = MIN (y_min, min->value);
681 boxplot_draw_yscale (ch, y_max, y_min);
683 ds_init_empty (&title);
684 factor_to_string (fctr, result, &title);
687 ds_put_format (&title, "%s = ", var_get_name (fctr->indep_var[0]));
688 var_append_value_name (fctr->indep_var[0], &result->value[0], &title);
691 chart_write_title (ch, "%s", ds_cstr (&title));
694 for (v = 0; v < n_dep_var; ++v)
697 const double box_centre = (v * 2 + 1) * box_width + ch->data_left;
699 ds_init_empty (&str);
700 ds_init_cstr (&str, var_get_name (dependent_var[v]));
702 boxplot_draw_boxplot (ch,
703 box_centre, box_width,
704 (const struct box_whisker *) result->metrics[v].box_whisker,
716 /* Show all the appropriate tables */
718 output_examine (const struct dictionary *dict)
722 show_summary (dependent_vars, n_dependent_vars, dict, &level0_factor);
724 if ( cmd.a_statistics[XMN_ST_EXTREME] )
725 show_extremes (dependent_vars, n_dependent_vars, &level0_factor);
727 if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES] )
728 show_descriptives (dependent_vars, n_dependent_vars, &level0_factor);
730 if ( cmd.sbc_percentiles)
731 show_percentiles (dependent_vars, n_dependent_vars, &level0_factor);
735 if (cmd.a_plot[XMN_PLT_BOXPLOT])
736 show_boxplot_groups (dependent_vars, n_dependent_vars, &level0_factor);
738 if (cmd.a_plot[XMN_PLT_HISTOGRAM])
739 show_histogram (dependent_vars, n_dependent_vars, &level0_factor);
741 if (cmd.a_plot[XMN_PLT_NPPLOT])
742 show_npplot (dependent_vars, n_dependent_vars, &level0_factor);
745 for (ll = ll_head (&factor_list);
746 ll != ll_null (&factor_list); ll = ll_next (ll))
748 struct xfactor *factor = ll_data (ll, struct xfactor, ll);
749 show_summary (dependent_vars, n_dependent_vars, dict, factor);
751 if ( cmd.a_statistics[XMN_ST_EXTREME] )
752 show_extremes (dependent_vars, n_dependent_vars, factor);
754 if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES] )
755 show_descriptives (dependent_vars, n_dependent_vars, factor);
757 if ( cmd.sbc_percentiles)
758 show_percentiles (dependent_vars, n_dependent_vars, factor);
760 if (cmd.a_plot[XMN_PLT_BOXPLOT] &&
761 cmd.cmp == XMN_GROUPS)
762 show_boxplot_groups (dependent_vars, n_dependent_vars, factor);
765 if (cmd.a_plot[XMN_PLT_BOXPLOT] &&
766 cmd.cmp == XMN_VARIABLES)
767 show_boxplot_variables (dependent_vars, n_dependent_vars,
770 if (cmd.a_plot[XMN_PLT_HISTOGRAM])
771 show_histogram (dependent_vars, n_dependent_vars, factor);
773 if (cmd.a_plot[XMN_PLT_NPPLOT])
774 show_npplot (dependent_vars, n_dependent_vars, factor);
778 /* Parse the PERCENTILES subcommand */
780 xmn_custom_percentiles (struct lexer *lexer, struct dataset *ds UNUSED,
781 struct cmd_examine *p UNUSED, void *aux UNUSED)
783 lex_match (lexer, '=');
785 lex_match (lexer, '(');
787 while ( lex_is_number (lexer) )
789 subc_list_double_push (&percentile_list, lex_number (lexer));
793 lex_match (lexer, ',') ;
795 lex_match (lexer, ')');
797 lex_match (lexer, '=');
799 if ( lex_match_id (lexer, "HAVERAGE"))
800 percentile_algorithm = PC_HAVERAGE;
802 else if ( lex_match_id (lexer, "WAVERAGE"))
803 percentile_algorithm = PC_WAVERAGE;
805 else if ( lex_match_id (lexer, "ROUND"))
806 percentile_algorithm = PC_ROUND;
808 else if ( lex_match_id (lexer, "EMPIRICAL"))
809 percentile_algorithm = PC_EMPIRICAL;
811 else if ( lex_match_id (lexer, "AEMPIRICAL"))
812 percentile_algorithm = PC_AEMPIRICAL;
814 else if ( lex_match_id (lexer, "NONE"))
815 percentile_algorithm = PC_NONE;
818 if ( 0 == subc_list_double_count (&percentile_list))
820 subc_list_double_push (&percentile_list, 5);
821 subc_list_double_push (&percentile_list, 10);
822 subc_list_double_push (&percentile_list, 25);
823 subc_list_double_push (&percentile_list, 50);
824 subc_list_double_push (&percentile_list, 75);
825 subc_list_double_push (&percentile_list, 90);
826 subc_list_double_push (&percentile_list, 95);
832 /* TOTAL and NOTOTAL are simple, mutually exclusive flags */
834 xmn_custom_total (struct lexer *lexer UNUSED, struct dataset *ds UNUSED,
835 struct cmd_examine *p, void *aux UNUSED)
837 if ( p->sbc_nototal )
839 msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL");
847 xmn_custom_nototal (struct lexer *lexer UNUSED, struct dataset *ds UNUSED,
848 struct cmd_examine *p, void *aux UNUSED)
852 msg (SE, _("%s and %s are mutually exclusive"), "TOTAL", "NOTOTAL");
861 /* Parser for the variables sub command
862 Returns 1 on success */
864 xmn_custom_variables (struct lexer *lexer, struct dataset *ds,
865 struct cmd_examine *cmd,
868 const struct dictionary *dict = dataset_dict (ds);
869 lex_match (lexer, '=');
871 if ( (lex_token (lexer) != T_ID || dict_lookup_var (dict, lex_tokid (lexer)) == NULL)
872 && lex_token (lexer) != T_ALL)
877 if (!parse_variables_const (lexer, dict, &dependent_vars, &n_dependent_vars,
878 PV_NO_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH) )
880 free (dependent_vars);
884 assert (n_dependent_vars);
887 if ( lex_match (lexer, T_BY))
890 success = examine_parse_independent_vars (lexer, dict, cmd);
893 free (dependent_vars);
903 /* Parse the clause specifying the factors */
905 examine_parse_independent_vars (struct lexer *lexer,
906 const struct dictionary *dict,
907 struct cmd_examine *cmd)
910 struct xfactor *sf = xmalloc (sizeof *sf);
912 ll_init (&sf->result_list);
914 if ( (lex_token (lexer) != T_ID ||
915 dict_lookup_var (dict, lex_tokid (lexer)) == NULL)
916 && lex_token (lexer) != T_ALL)
922 sf->indep_var[0] = parse_variable (lexer, dict);
923 sf->indep_var[1] = NULL;
925 if ( lex_token (lexer) == T_BY )
927 lex_match (lexer, T_BY);
929 if ( (lex_token (lexer) != T_ID ||
930 dict_lookup_var (dict, lex_tokid (lexer)) == NULL)
931 && lex_token (lexer) != T_ALL)
937 sf->indep_var[1] = parse_variable (lexer, dict);
939 ll_push_tail (&factor_list, &sf->ll);
942 ll_push_tail (&factor_list, &sf->ll);
944 lex_match (lexer, ',');
946 if ( lex_token (lexer) == '.' || lex_token (lexer) == '/' )
949 success = examine_parse_independent_vars (lexer, dict, cmd);
958 examine_group (struct cmd_examine *cmd, struct casereader *reader, int level,
959 const struct dictionary *dict, struct xfactor *factor)
962 const struct variable *wv = dict_get_weight (dict);
965 struct factor_result *result = xzalloc (sizeof (*result));
968 for (i = 0; i < 2; i++)
969 if (factor->indep_var[i])
970 value_init (&result->value[i], var_get_width (factor->indep_var[i]));
972 result->metrics = xcalloc (n_dependent_vars, sizeof (*result->metrics));
974 if ( cmd->a_statistics[XMN_ST_EXTREME] )
975 n_extrema = cmd->st_n;
978 c = casereader_peek (reader, 0);
982 for (i = 0; i < 2; i++)
983 if (factor->indep_var[i])
984 value_copy (&result->value[i], case_data (c, factor->indep_var[i]),
985 var_get_width (factor->indep_var[i]));
989 for (v = 0; v < n_dependent_vars; ++v)
991 struct casewriter *writer;
992 struct casereader *input = casereader_clone (reader);
994 result->metrics[v].moments = moments1_create (MOMENT_KURTOSIS);
995 result->metrics[v].minima = extrema_create (n_extrema, EXTREME_MINIMA);
996 result->metrics[v].maxima = extrema_create (n_extrema, EXTREME_MAXIMA);
997 result->metrics[v].cmin = DBL_MAX;
999 if (cmd->a_statistics[XMN_ST_DESCRIPTIVES] ||
1000 cmd->a_plot[XMN_PLT_BOXPLOT] ||
1001 cmd->a_plot[XMN_PLT_NPPLOT] ||
1002 cmd->sbc_percentiles)
1004 /* In this case, we need to sort the data, so we create a sorting
1006 struct subcase up_ordering;
1007 subcase_init_var (&up_ordering, dependent_vars[v], SC_ASCEND);
1008 writer = sort_create_writer (&up_ordering,
1009 casereader_get_proto (reader));
1010 subcase_destroy (&up_ordering);
1014 /* but in this case, sorting is unnecessary, so an ordinary
1015 casewriter is sufficient */
1017 autopaging_writer_create (casereader_get_proto (reader));
1021 /* Sort or just iterate, whilst calculating moments etc */
1022 while ((c = casereader_read (input)) != NULL)
1024 int n_vals = caseproto_get_n_widths (casereader_get_proto (reader));
1025 const casenumber loc = case_data_idx (c, n_vals - 1)->f;
1027 const double weight = wv ? case_data (c, wv)->f : 1.0;
1028 const union value *value = case_data (c, dependent_vars[v]);
1030 if (weight != SYSMIS)
1031 minimize (&result->metrics[v].cmin, weight);
1033 moments1_add (result->metrics[v].moments,
1037 result->metrics[v].n += weight;
1039 if ( ! var_is_value_missing (dependent_vars[v], value, MV_ANY) )
1040 result->metrics[v].n_valid += weight;
1042 extrema_add (result->metrics[v].maxima,
1047 extrema_add (result->metrics[v].minima,
1052 casewriter_write (writer, c);
1054 casereader_destroy (input);
1055 result->metrics[v].up_reader = casewriter_make_reader (writer);
1058 /* If percentiles or descriptives have been requested, then a
1059 second pass through the data (which has now been sorted)
1061 if ( cmd->a_statistics[XMN_ST_DESCRIPTIVES] ||
1062 cmd->a_plot[XMN_PLT_BOXPLOT] ||
1063 cmd->a_plot[XMN_PLT_NPPLOT] ||
1064 cmd->sbc_percentiles)
1066 for (v = 0; v < n_dependent_vars; ++v)
1070 struct order_stats **os ;
1071 struct factor_metrics *metric = &result->metrics[v];
1073 metric->n_ptiles = percentile_list.n_data;
1075 metric->ptl = xcalloc (metric->n_ptiles,
1076 sizeof (struct percentile *));
1078 metric->quartiles = xcalloc (3, sizeof (*metric->quartiles));
1080 for (i = 0 ; i < metric->n_ptiles; ++i)
1082 metric->ptl[i] = (struct percentile *)
1083 percentile_create (percentile_list.data[i] / 100.0, metric->n_valid);
1085 if ( percentile_list.data[i] == 25)
1086 metric->quartiles[0] = metric->ptl[i];
1087 else if ( percentile_list.data[i] == 50)
1088 metric->quartiles[1] = metric->ptl[i];
1089 else if ( percentile_list.data[i] == 75)
1090 metric->quartiles[2] = metric->ptl[i];
1093 metric->tukey_hinges = tukey_hinges_create (metric->n_valid, metric->cmin);
1094 metric->trimmed_mean = trimmed_mean_create (metric->n_valid, 0.05);
1096 n_os = metric->n_ptiles + 2;
1098 if ( cmd->a_plot[XMN_PLT_NPPLOT] )
1100 metric->np = np_create (metric->moments);
1104 os = xcalloc (sizeof (struct order_stats *), n_os);
1106 for (i = 0 ; i < metric->n_ptiles ; ++i )
1108 os[i] = (struct order_stats *) metric->ptl[i];
1111 os[i] = (struct order_stats *) metric->tukey_hinges;
1112 os[i+1] = (struct order_stats *) metric->trimmed_mean;
1114 if (cmd->a_plot[XMN_PLT_NPPLOT])
1115 os[i+2] = metric->np;
1117 order_stats_accumulate (os, n_os,
1118 casereader_clone (metric->up_reader),
1119 wv, dependent_vars[v], MV_ANY);
1124 /* FIXME: Do this in the above loop */
1125 if ( cmd->a_plot[XMN_PLT_HISTOGRAM] )
1128 struct casereader *input = casereader_clone (reader);
1130 for (v = 0; v < n_dependent_vars; ++v)
1132 const struct extremum *max, *min;
1133 struct factor_metrics *metric = &result->metrics[v];
1135 const struct ll_list *max_list =
1136 extrema_list (result->metrics[v].maxima);
1138 const struct ll_list *min_list =
1139 extrema_list (result->metrics[v].minima);
1141 if ( ll_is_empty (max_list))
1143 msg (MW, _("Not creating plot because data set is empty."));
1147 assert (! ll_is_empty (min_list));
1149 max = (const struct extremum *)
1150 ll_data (ll_head(max_list), struct extremum, ll);
1152 min = (const struct extremum *)
1153 ll_data (ll_head (min_list), struct extremum, ll);
1155 metric->histogram = histogram_create (10, min->value, max->value);
1158 while ((c = casereader_read (input)) != NULL)
1160 const double weight = wv ? case_data (c, wv)->f : 1.0;
1162 for (v = 0; v < n_dependent_vars; ++v)
1164 struct factor_metrics *metric = &result->metrics[v];
1165 if ( metric->histogram)
1166 histogram_add ((struct histogram *) metric->histogram,
1167 case_data (c, dependent_vars[v])->f, weight);
1171 casereader_destroy (input);
1174 /* In this case, a third iteration is required */
1175 if (cmd->a_plot[XMN_PLT_BOXPLOT])
1177 for (v = 0; v < n_dependent_vars; ++v)
1179 struct factor_metrics *metric = &result->metrics[v];
1180 int n_vals = caseproto_get_n_widths (casereader_get_proto (
1181 metric->up_reader));
1183 metric->box_whisker =
1184 box_whisker_create ((struct tukey_hinges *) metric->tukey_hinges,
1185 cmd->v_id, n_vals - 1);
1187 order_stats_accumulate ((struct order_stats **) &metric->box_whisker,
1189 casereader_clone (metric->up_reader),
1190 wv, dependent_vars[v], MV_ANY);
1194 ll_push_tail (&factor->result_list, &result->ll);
1195 casereader_destroy (reader);
1200 run_examine (struct cmd_examine *cmd, struct casereader *input,
1204 const struct dictionary *dict = dataset_dict (ds);
1206 struct casereader *level0 = casereader_clone (input);
1208 c = casereader_peek (input, 0);
1211 casereader_destroy (input);
1215 output_split_file_values (ds, c);
1218 ll_init (&level0_factor.result_list);
1220 examine_group (cmd, level0, 0, dict, &level0_factor);
1222 for (ll = ll_head (&factor_list);
1223 ll != ll_null (&factor_list);
1226 struct xfactor *factor = ll_data (ll, struct xfactor, ll);
1228 struct casereader *group = NULL;
1229 struct casereader *level1;
1230 struct casegrouper *grouper1 = NULL;
1232 level1 = casereader_clone (input);
1233 level1 = sort_execute_1var (level1, factor->indep_var[0]);
1234 grouper1 = casegrouper_create_vars (level1, &factor->indep_var[0], 1);
1236 while (casegrouper_get_next_group (grouper1, &group))
1238 struct casereader *group_copy = casereader_clone (group);
1240 if ( !factor->indep_var[1])
1241 examine_group (cmd, group_copy, 1, dict, factor);
1245 struct casereader *group2 = NULL;
1246 struct casegrouper *grouper2 = NULL;
1248 group_copy = sort_execute_1var (group_copy,
1249 factor->indep_var[1]);
1251 grouper2 = casegrouper_create_vars (group_copy,
1252 &factor->indep_var[1], 1);
1254 while (casegrouper_get_next_group (grouper2, &group2))
1256 examine_group (cmd, group2, 2, dict, factor);
1259 casegrouper_destroy (grouper2);
1262 casereader_destroy (group);
1264 casegrouper_destroy (grouper1);
1267 casereader_destroy (input);
1269 output_examine (dict);
1271 factor_destroy (&level0_factor);
1275 for (ll = ll_head (&factor_list);
1276 ll != ll_null (&factor_list);
1279 struct xfactor *f = ll_data (ll, struct xfactor, ll);
1288 show_summary (const struct variable **dependent_var, int n_dep_var,
1289 const struct dictionary *dict,
1290 const struct xfactor *fctr)
1292 const struct variable *wv = dict_get_weight (dict);
1293 const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0;
1295 static const char *subtitle[]=
1303 int heading_columns = 1;
1305 const int heading_rows = 3;
1306 struct tab_table *tbl;
1313 if ( fctr->indep_var[0] )
1315 heading_columns = 2;
1317 if ( fctr->indep_var[1] )
1319 heading_columns = 3;
1323 n_rows *= ll_count (&fctr->result_list);
1324 n_rows += heading_rows;
1326 n_cols = heading_columns + 6;
1328 tbl = tab_create (n_cols, n_rows, 0);
1329 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
1331 tab_dim (tbl, tab_natural_dimensions, NULL, NULL);
1333 /* Outline the box */
1338 n_cols - 1, n_rows - 1);
1340 /* Vertical lines for the data only */
1345 n_cols - 1, n_rows - 1);
1348 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
1349 tab_hline (tbl, TAL_1, heading_columns, n_cols - 1, 1 );
1350 tab_hline (tbl, TAL_1, heading_columns, n_cols - 1, heading_rows -1 );
1352 tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
1355 tab_title (tbl, _("Case Processing Summary"));
1357 tab_joint_text (tbl, heading_columns, 0,
1359 TAB_CENTER | TAT_TITLE,
1362 /* Remove lines ... */
1369 for (j = 0 ; j < 3 ; ++j)
1371 tab_text (tbl, heading_columns + j * 2 , 2, TAB_CENTER | TAT_TITLE,
1374 tab_text (tbl, heading_columns + j * 2 + 1, 2, TAB_CENTER | TAT_TITLE,
1377 tab_joint_text (tbl, heading_columns + j * 2 , 1,
1378 heading_columns + j * 2 + 1, 1,
1379 TAB_CENTER | TAT_TITLE,
1382 tab_box (tbl, -1, -1,
1384 heading_columns + j * 2, 1,
1385 heading_columns + j * 2 + 1, 1);
1389 /* Titles for the independent variables */
1390 if ( fctr->indep_var[0] )
1392 tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
1393 var_to_string (fctr->indep_var[0]));
1395 if ( fctr->indep_var[1] )
1397 tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
1398 var_to_string (fctr->indep_var[1]));
1402 for (v = 0 ; v < n_dep_var ; ++v)
1406 const union value *last_value = NULL;
1409 tab_hline (tbl, TAL_1, 0, n_cols -1 ,
1410 v * ll_count (&fctr->result_list)
1415 v * ll_count (&fctr->result_list) + heading_rows,
1416 TAB_LEFT | TAT_TITLE,
1417 var_to_string (dependent_var[v])
1421 for (ll = ll_head (&fctr->result_list);
1422 ll != ll_null (&fctr->result_list); ll = ll_next (ll))
1425 const struct factor_result *result =
1426 ll_data (ll, struct factor_result, ll);
1428 if ( fctr->indep_var[0] )
1431 if ( last_value == NULL ||
1432 !value_equal (last_value, &result->value[0],
1433 var_get_width (fctr->indep_var[0])))
1437 last_value = &result->value[0];
1438 ds_init_empty (&str);
1440 var_append_value_name (fctr->indep_var[0], &result->value[0],
1445 v * ll_count (&fctr->result_list),
1446 TAB_LEFT | TAT_TITLE,
1451 if ( fctr->indep_var[1] && j > 0)
1452 tab_hline (tbl, TAL_1, 1, n_cols - 1,
1454 v * ll_count (&fctr->result_list));
1457 if ( fctr->indep_var[1])
1461 ds_init_empty (&str);
1463 var_append_value_name (fctr->indep_var[1],
1464 &result->value[1], &str);
1468 v * ll_count (&fctr->result_list),
1469 TAB_LEFT | TAT_TITLE,
1477 moments1_calculate (result->metrics[v].moments,
1478 &n, &result->metrics[v].mean,
1479 &result->metrics[v].variance,
1480 &result->metrics[v].skewness,
1481 &result->metrics[v].kurtosis);
1483 result->metrics[v].se_mean = sqrt (result->metrics[v].variance / n) ;
1486 tab_double (tbl, heading_columns,
1487 heading_rows + j + v * ll_count (&fctr->result_list),
1491 tab_text (tbl, heading_columns + 1,
1492 heading_rows + j + v * ll_count (&fctr->result_list),
1493 TAB_RIGHT | TAT_PRINTF,
1494 "%g%%", n * 100.0 / result->metrics[v].n);
1497 tab_double (tbl, heading_columns + 2,
1498 heading_rows + j + v * ll_count (&fctr->result_list),
1500 result->metrics[v].n - n,
1503 tab_text (tbl, heading_columns + 3,
1504 heading_rows + j + v * ll_count (&fctr->result_list),
1505 TAB_RIGHT | TAT_PRINTF,
1507 (result->metrics[v].n - n) * 100.0 / result->metrics[v].n
1510 /* Total Valid + Missing */
1511 tab_double (tbl, heading_columns + 4,
1512 heading_rows + j + v * ll_count (&fctr->result_list),
1514 result->metrics[v].n,
1517 tab_text (tbl, heading_columns + 5,
1518 heading_rows + j + v * ll_count (&fctr->result_list),
1519 TAB_RIGHT | TAT_PRINTF,
1521 (result->metrics[v].n) * 100.0 / result->metrics[v].n
1532 #define DESCRIPTIVE_ROWS 13
1535 show_descriptives (const struct variable **dependent_var,
1537 const struct xfactor *fctr)
1540 int heading_columns = 3;
1542 const int heading_rows = 1;
1543 struct tab_table *tbl;
1550 if ( fctr->indep_var[0] )
1552 heading_columns = 4;
1554 if ( fctr->indep_var[1] )
1556 heading_columns = 5;
1560 n_rows *= ll_count (&fctr->result_list) * DESCRIPTIVE_ROWS;
1561 n_rows += heading_rows;
1563 n_cols = heading_columns + 2;
1565 tbl = tab_create (n_cols, n_rows, 0);
1566 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
1568 tab_dim (tbl, tab_natural_dimensions, NULL, NULL);
1570 /* Outline the box */
1575 n_cols - 1, n_rows - 1);
1578 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
1579 tab_hline (tbl, TAL_2, 1, n_cols - 1, heading_rows );
1581 tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows - 1);
1584 if ( fctr->indep_var[0])
1585 tab_text (tbl, 1, 0, TAT_TITLE, var_to_string (fctr->indep_var[0]));
1587 if ( fctr->indep_var[1])
1588 tab_text (tbl, 2, 0, TAT_TITLE, var_to_string (fctr->indep_var[1]));
1590 for (v = 0 ; v < n_dep_var ; ++v )
1595 const int row_var_start =
1596 v * DESCRIPTIVE_ROWS * ll_count(&fctr->result_list);
1600 heading_rows + row_var_start,
1601 TAB_LEFT | TAT_TITLE,
1602 var_to_string (dependent_var[v])
1605 for (ll = ll_head (&fctr->result_list);
1606 ll != ll_null (&fctr->result_list); i++, ll = ll_next (ll))
1608 const struct factor_result *result =
1609 ll_data (ll, struct factor_result, ll);
1612 gsl_cdf_tdist_Qinv ((1 - cmd.n_cinterval[0] / 100.0) / 2.0,
1613 result->metrics[v].n - 1);
1615 if ( i > 0 || v > 0 )
1617 const int left_col = (i == 0) ? 0 : 1;
1618 tab_hline (tbl, TAL_1, left_col, n_cols - 1,
1619 heading_rows + row_var_start + i * DESCRIPTIVE_ROWS);
1622 if ( fctr->indep_var[0])
1625 ds_init_empty (&vstr);
1626 var_append_value_name (fctr->indep_var[0],
1627 &result->value[0], &vstr);
1630 heading_rows + row_var_start + i * DESCRIPTIVE_ROWS,
1639 tab_text (tbl, n_cols - 4,
1640 heading_rows + row_var_start + i * DESCRIPTIVE_ROWS,
1644 tab_text (tbl, n_cols - 4,
1645 heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS,
1646 TAB_LEFT | TAT_PRINTF,
1647 _("%g%% Confidence Interval for Mean"),
1648 cmd.n_cinterval[0]);
1650 tab_text (tbl, n_cols - 3,
1651 heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS,
1655 tab_text (tbl, n_cols - 3,
1656 heading_rows + row_var_start + 2 + i * DESCRIPTIVE_ROWS,
1660 tab_text (tbl, n_cols - 4,
1661 heading_rows + row_var_start + 3 + i * DESCRIPTIVE_ROWS,
1662 TAB_LEFT | TAT_PRINTF,
1663 _("5%% Trimmed Mean"));
1665 tab_text (tbl, n_cols - 4,
1666 heading_rows + row_var_start + 4 + i * DESCRIPTIVE_ROWS,
1670 tab_text (tbl, n_cols - 4,
1671 heading_rows + row_var_start + 5 + i * DESCRIPTIVE_ROWS,
1675 tab_text (tbl, n_cols - 4,
1676 heading_rows + row_var_start + 6 + i * DESCRIPTIVE_ROWS,
1678 _("Std. Deviation"));
1680 tab_text (tbl, n_cols - 4,
1681 heading_rows + row_var_start + 7 + i * DESCRIPTIVE_ROWS,
1685 tab_text (tbl, n_cols - 4,
1686 heading_rows + row_var_start + 8 + i * DESCRIPTIVE_ROWS,
1690 tab_text (tbl, n_cols - 4,
1691 heading_rows + row_var_start + 9 + i * DESCRIPTIVE_ROWS,
1695 tab_text (tbl, n_cols - 4,
1696 heading_rows + row_var_start + 10 + i * DESCRIPTIVE_ROWS,
1698 _("Interquartile Range"));
1701 tab_text (tbl, n_cols - 4,
1702 heading_rows + row_var_start + 11 + i * DESCRIPTIVE_ROWS,
1706 tab_text (tbl, n_cols - 4,
1707 heading_rows + row_var_start + 12 + i * DESCRIPTIVE_ROWS,
1712 /* Now the statistics ... */
1714 tab_double (tbl, n_cols - 2,
1715 heading_rows + row_var_start + i * DESCRIPTIVE_ROWS,
1717 result->metrics[v].mean,
1720 tab_double (tbl, n_cols - 1,
1721 heading_rows + row_var_start + i * DESCRIPTIVE_ROWS,
1723 result->metrics[v].se_mean,
1727 tab_double (tbl, n_cols - 2,
1728 heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS,
1730 result->metrics[v].mean - t *
1731 result->metrics[v].se_mean,
1734 tab_double (tbl, n_cols - 2,
1735 heading_rows + row_var_start + 2 + i * DESCRIPTIVE_ROWS,
1737 result->metrics[v].mean + t *
1738 result->metrics[v].se_mean,
1742 tab_double (tbl, n_cols - 2,
1743 heading_rows + row_var_start + 3 + i * DESCRIPTIVE_ROWS,
1745 trimmed_mean_calculate ((struct trimmed_mean *) result->metrics[v].trimmed_mean),
1749 tab_double (tbl, n_cols - 2,
1750 heading_rows + row_var_start + 4 + i * DESCRIPTIVE_ROWS,
1752 percentile_calculate (result->metrics[v].quartiles[1], percentile_algorithm),
1756 tab_double (tbl, n_cols - 2,
1757 heading_rows + row_var_start + 5 + i * DESCRIPTIVE_ROWS,
1759 result->metrics[v].variance,
1762 tab_double (tbl, n_cols - 2,
1763 heading_rows + row_var_start + 6 + i * DESCRIPTIVE_ROWS,
1765 sqrt (result->metrics[v].variance),
1768 tab_double (tbl, n_cols - 2,
1769 heading_rows + row_var_start + 10 + i * DESCRIPTIVE_ROWS,
1771 percentile_calculate (result->metrics[v].quartiles[2],
1772 percentile_algorithm) -
1773 percentile_calculate (result->metrics[v].quartiles[0],
1774 percentile_algorithm),
1778 tab_double (tbl, n_cols - 2,
1779 heading_rows + row_var_start + 11 + i * DESCRIPTIVE_ROWS,
1781 result->metrics[v].skewness,
1784 tab_double (tbl, n_cols - 2,
1785 heading_rows + row_var_start + 12 + i * DESCRIPTIVE_ROWS,
1787 result->metrics[v].kurtosis,
1790 tab_double (tbl, n_cols - 1,
1791 heading_rows + row_var_start + 11 + i * DESCRIPTIVE_ROWS,
1793 calc_seskew (result->metrics[v].n),
1796 tab_double (tbl, n_cols - 1,
1797 heading_rows + row_var_start + 12 + i * DESCRIPTIVE_ROWS,
1799 calc_sekurt (result->metrics[v].n),
1803 struct extremum *minimum, *maximum ;
1805 struct ll *max_ll = ll_head (extrema_list (result->metrics[v].maxima));
1806 struct ll *min_ll = ll_head (extrema_list (result->metrics[v].minima));
1808 maximum = ll_data (max_ll, struct extremum, ll);
1809 minimum = ll_data (min_ll, struct extremum, ll);
1811 tab_double (tbl, n_cols - 2,
1812 heading_rows + row_var_start + 7 + i * DESCRIPTIVE_ROWS,
1817 tab_double (tbl, n_cols - 2,
1818 heading_rows + row_var_start + 8 + i * DESCRIPTIVE_ROWS,
1823 tab_double (tbl, n_cols - 2,
1824 heading_rows + row_var_start + 9 + i * DESCRIPTIVE_ROWS,
1826 maximum->value - minimum->value,
1832 tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
1834 tab_title (tbl, _("Descriptives"));
1836 tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE,
1839 tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE,
1848 show_extremes (const struct variable **dependent_var,
1850 const struct xfactor *fctr)
1853 int heading_columns = 3;
1855 const int heading_rows = 1;
1856 struct tab_table *tbl;
1863 if ( fctr->indep_var[0] )
1865 heading_columns = 4;
1867 if ( fctr->indep_var[1] )
1869 heading_columns = 5;
1873 n_rows *= ll_count (&fctr->result_list) * cmd.st_n * 2;
1874 n_rows += heading_rows;
1876 n_cols = heading_columns + 2;
1878 tbl = tab_create (n_cols, n_rows, 0);
1879 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
1881 tab_dim (tbl, tab_natural_dimensions, NULL, NULL);
1883 /* Outline the box */
1888 n_cols - 1, n_rows - 1);
1891 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
1892 tab_hline (tbl, TAL_2, 1, n_cols - 1, heading_rows );
1893 tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows - 1);
1895 if ( fctr->indep_var[0])
1896 tab_text (tbl, 1, 0, TAT_TITLE, var_to_string (fctr->indep_var[0]));
1898 if ( fctr->indep_var[1])
1899 tab_text (tbl, 2, 0, TAT_TITLE, var_to_string (fctr->indep_var[1]));
1901 for (v = 0 ; v < n_dep_var ; ++v )
1905 const int row_var_start = v * cmd.st_n * 2 * ll_count(&fctr->result_list);
1909 heading_rows + row_var_start,
1910 TAB_LEFT | TAT_TITLE,
1911 var_to_string (dependent_var[v])
1914 for (ll = ll_head (&fctr->result_list);
1915 ll != ll_null (&fctr->result_list); i++, ll = ll_next (ll))
1920 const int row_result_start = i * cmd.st_n * 2;
1922 const struct factor_result *result =
1923 ll_data (ll, struct factor_result, ll);
1926 tab_hline (tbl, TAL_1, 1, n_cols - 1,
1927 heading_rows + row_var_start + row_result_start);
1929 tab_hline (tbl, TAL_1, heading_columns - 2, n_cols - 1,
1930 heading_rows + row_var_start + row_result_start + cmd.st_n);
1932 for ( e = 1; e <= cmd.st_n; ++e )
1934 tab_text (tbl, n_cols - 3,
1935 heading_rows + row_var_start + row_result_start + e - 1,
1936 TAB_RIGHT | TAT_PRINTF,
1939 tab_text (tbl, n_cols - 3,
1940 heading_rows + row_var_start + row_result_start + cmd.st_n + e - 1,
1941 TAB_RIGHT | TAT_PRINTF,
1946 min_ll = ll_head (extrema_list (result->metrics[v].minima));
1947 for (e = 0; e < cmd.st_n;)
1949 struct extremum *minimum = ll_data (min_ll, struct extremum, ll);
1950 double weight = minimum->weight;
1952 while (weight-- > 0 && e < cmd.st_n)
1954 tab_double (tbl, n_cols - 1,
1955 heading_rows + row_var_start + row_result_start + cmd.st_n + e,
1961 tab_fixed (tbl, n_cols - 2,
1962 heading_rows + row_var_start +
1963 row_result_start + cmd.st_n + e,
1970 min_ll = ll_next (min_ll);
1974 max_ll = ll_head (extrema_list (result->metrics[v].maxima));
1975 for (e = 0; e < cmd.st_n;)
1977 struct extremum *maximum = ll_data (max_ll, struct extremum, ll);
1978 double weight = maximum->weight;
1980 while (weight-- > 0 && e < cmd.st_n)
1982 tab_double (tbl, n_cols - 1,
1983 heading_rows + row_var_start +
1984 row_result_start + e,
1990 tab_fixed (tbl, n_cols - 2,
1991 heading_rows + row_var_start +
1992 row_result_start + e,
1999 max_ll = ll_next (max_ll);
2003 if ( fctr->indep_var[0])
2006 ds_init_empty (&vstr);
2007 var_append_value_name (fctr->indep_var[0],
2008 &result->value[0], &vstr);
2011 heading_rows + row_var_start + row_result_start,
2020 tab_text (tbl, n_cols - 4,
2021 heading_rows + row_var_start + row_result_start,
2025 tab_text (tbl, n_cols - 4,
2026 heading_rows + row_var_start + row_result_start + cmd.st_n,
2032 tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
2035 tab_title (tbl, _("Extreme Values"));
2038 tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE,
2042 tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE,
2048 #define PERCENTILE_ROWS 2
2051 show_percentiles (const struct variable **dependent_var,
2053 const struct xfactor *fctr)
2057 int heading_columns = 2;
2059 const int n_percentiles = subc_list_double_count (&percentile_list);
2060 const int heading_rows = 2;
2061 struct tab_table *tbl;
2068 if ( fctr->indep_var[0] )
2070 heading_columns = 3;
2072 if ( fctr->indep_var[1] )
2074 heading_columns = 4;
2078 n_rows *= ll_count (&fctr->result_list) * PERCENTILE_ROWS;
2079 n_rows += heading_rows;
2081 n_cols = heading_columns + n_percentiles;
2083 tbl = tab_create (n_cols, n_rows, 0);
2084 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
2086 tab_dim (tbl, tab_natural_dimensions, NULL, NULL);
2088 /* Outline the box */
2093 n_cols - 1, n_rows - 1);
2096 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
2097 tab_hline (tbl, TAL_2, 1, n_cols - 1, heading_rows );
2099 if ( fctr->indep_var[0])
2100 tab_text (tbl, 1, 1, TAT_TITLE, var_to_string (fctr->indep_var[0]));
2102 if ( fctr->indep_var[1])
2103 tab_text (tbl, 2, 1, TAT_TITLE, var_to_string (fctr->indep_var[1]));
2105 for (v = 0 ; v < n_dep_var ; ++v )
2111 const int row_var_start =
2112 v * PERCENTILE_ROWS * ll_count(&fctr->result_list);
2116 heading_rows + row_var_start,
2117 TAB_LEFT | TAT_TITLE,
2118 var_to_string (dependent_var[v])
2121 for (ll = ll_head (&fctr->result_list);
2122 ll != ll_null (&fctr->result_list); i++, ll = ll_next (ll))
2125 const struct factor_result *result =
2126 ll_data (ll, struct factor_result, ll);
2128 if ( i > 0 || v > 0 )
2130 const int left_col = (i == 0) ? 0 : 1;
2131 tab_hline (tbl, TAL_1, left_col, n_cols - 1,
2132 heading_rows + row_var_start + i * PERCENTILE_ROWS);
2135 if ( fctr->indep_var[0])
2138 ds_init_empty (&vstr);
2139 var_append_value_name (fctr->indep_var[0],
2140 &result->value[0], &vstr);
2143 heading_rows + row_var_start + i * PERCENTILE_ROWS,
2152 tab_text (tbl, n_cols - n_percentiles - 1,
2153 heading_rows + row_var_start + i * PERCENTILE_ROWS,
2155 ptile_alg_desc [percentile_algorithm]);
2158 tab_text (tbl, n_cols - n_percentiles - 1,
2159 heading_rows + row_var_start + 1 + i * PERCENTILE_ROWS,
2161 _("Tukey's Hinges"));
2164 tab_vline (tbl, TAL_1, n_cols - n_percentiles -1, heading_rows, n_rows - 1);
2166 tukey_hinges_calculate ((struct tukey_hinges *) result->metrics[v].tukey_hinges,
2169 for (j = 0; j < n_percentiles; ++j)
2171 double hinge = SYSMIS;
2172 tab_double (tbl, n_cols - n_percentiles + j,
2173 heading_rows + row_var_start + i * PERCENTILE_ROWS,
2175 percentile_calculate (result->metrics[v].ptl[j],
2176 percentile_algorithm),
2180 if ( result->metrics[v].ptl[j]->ptile == 0.5)
2182 else if ( result->metrics[v].ptl[j]->ptile == 0.25)
2184 else if ( result->metrics[v].ptl[j]->ptile == 0.75)
2187 if ( hinge != SYSMIS)
2188 tab_double (tbl, n_cols - n_percentiles + j,
2189 heading_rows + row_var_start + 1 + i * PERCENTILE_ROWS,
2199 tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
2201 tab_title (tbl, _("Percentiles"));
2204 for (i = 0 ; i < n_percentiles; ++i )
2206 tab_text (tbl, n_cols - n_percentiles + i, 1,
2207 TAB_CENTER | TAT_TITLE | TAT_PRINTF,
2209 subc_list_double_at (&percentile_list, i)
2215 tab_joint_text (tbl,
2216 n_cols - n_percentiles, 0,
2218 TAB_CENTER | TAT_TITLE,
2221 /* Vertical lines for the data only */
2225 n_cols - n_percentiles, 1,
2226 n_cols - 1, n_rows - 1);
2228 tab_hline (tbl, TAL_1, n_cols - n_percentiles, n_cols - 1, 1);
2236 factor_to_string_concise (const struct xfactor *fctr,
2237 const struct factor_result *result,
2241 if (fctr->indep_var[0])
2243 var_append_value_name (fctr->indep_var[0], &result->value[0], str);
2245 if ( fctr->indep_var[1] )
2247 ds_put_cstr (str, ",");
2249 var_append_value_name (fctr->indep_var[1], &result->value[1], str);
2251 ds_put_cstr (str, ")");
2258 factor_to_string (const struct xfactor *fctr,
2259 const struct factor_result *result,
2263 if (fctr->indep_var[0])
2265 ds_put_format (str, "(%s = ", var_get_name (fctr->indep_var[0]));
2267 var_append_value_name (fctr->indep_var[0], &result->value[0], str);
2269 if ( fctr->indep_var[1] )
2271 ds_put_cstr (str, ",");
2272 ds_put_format (str, "%s = ", var_get_name (fctr->indep_var[1]));
2274 var_append_value_name (fctr->indep_var[1], &result->value[1], str);
2276 ds_put_cstr (str, ")");