1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2004, 2008 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <gsl/gsl_cdf.h>
20 #include <libpspp/message.h>
25 #include <math/sort.h>
26 #include <math/order-stats.h>
27 #include <math/percentiles.h>
28 #include <math/tukey-hinges.h>
29 #include <math/box-whisker.h>
30 #include <math/trimmed-mean.h>
31 #include <math/extrema.h>
33 #include <data/case.h>
34 #include <data/casegrouper.h>
35 #include <data/casereader.h>
36 #include <data/casewriter.h>
37 #include <data/dictionary.h>
38 #include <data/procedure.h>
39 #include <data/subcase.h>
40 #include <data/value-labels.h>
41 #include <data/variable.h>
42 #include <language/command.h>
43 #include <language/dictionary/split-file.h>
44 #include <language/lexer/lexer.h>
45 #include <libpspp/compiler.h>
46 #include <libpspp/hash.h>
47 #include <libpspp/message.h>
48 #include <libpspp/misc.h>
49 #include <libpspp/str.h>
50 #include <math/moments.h>
51 #include <output/charts/box-whisker.h>
52 #include <output/charts/cartesian.h>
53 #include <output/manager.h>
54 #include <output/table.h>
60 #define _(msgid) gettext (msgid)
61 #define N_(msgid) msgid
64 #include <output/chart.h>
65 #include <output/charts/plot-hist.h>
66 #include <output/charts/plot-chart.h>
67 #include <math/histogram.h>
74 missing=miss:pairwise/!listwise,
76 incl:include/!exclude;
77 +compare=cmp:variables/!groups;
80 +plot[plt_]=stemleaf,boxplot,npplot,:spreadlevel(*d:n),histogram,all,none;
82 +statistics[st_]=descriptives,:extreme(*d:n),all,none.
90 static struct cmd_examine cmd;
92 static const struct variable **dependent_vars;
93 static size_t n_dependent_vars;
97 static subc_list_double percentile_list;
98 static enum pc_alg percentile_algorithm;
100 struct factor_metrics
102 struct moments1 *moments;
104 struct percentile **ptl;
107 struct statistic *tukey_hinges;
108 struct statistic *box_whisker;
109 struct statistic *trimmed_mean;
110 struct statistic *histogram;
111 struct order_stats *np;
113 /* Three quartiles indexing into PTL */
114 struct percentile **quartiles;
116 /* A reader sorted in ASCENDING order */
117 struct casereader *up_reader;
119 /* The minimum value of all the weights */
122 /* Sum of all weights, including those for missing values */
135 struct extrema *minima;
136 struct extrema *maxima;
143 union value *value[2];
145 /* An array of factor metrics, one for each variable */
146 struct factor_metrics *metrics;
151 /* We need to make a list of this structure */
154 /* The independent variable */
155 const struct variable const* indep_var[2];
157 /* A list of results for this factor */
158 struct ll_list result_list ;
163 factor_destroy (struct xfactor *fctr)
165 struct ll *ll = ll_head (&fctr->result_list);
166 while (ll != ll_null (&fctr->result_list))
169 struct factor_result *result =
170 ll_data (ll, struct factor_result, ll);
172 for (v = 0; v < n_dependent_vars; ++v)
175 moments1_destroy (result->metrics[v].moments);
176 extrema_destroy (result->metrics[v].minima);
177 extrema_destroy (result->metrics[v].maxima);
178 statistic_destroy (result->metrics[v].trimmed_mean);
179 statistic_destroy (result->metrics[v].tukey_hinges);
180 statistic_destroy (result->metrics[v].box_whisker);
181 statistic_destroy (result->metrics[v].histogram);
182 for (i = 0 ; i < result->metrics[v].n_ptiles; ++i)
183 statistic_destroy ((struct statistic *) result->metrics[v].ptl[i]);
184 free (result->metrics[v].ptl);
185 free (result->metrics[v].quartiles);
186 casereader_destroy (result->metrics[v].up_reader);
189 free (result->value[0]);
190 free (result->value[1]);
191 free (result->metrics);
197 static struct xfactor level0_factor;
198 static struct ll_list factor_list = LL_INITIALIZER (factor_list);
200 /* Parse the clause specifying the factors */
201 static int examine_parse_independent_vars (struct lexer *lexer,
202 const struct dictionary *dict,
203 struct cmd_examine *cmd);
205 /* Output functions */
206 static void show_summary (const struct variable **dependent_var, int n_dep_var,
207 const struct xfactor *f);
210 static void show_descriptives (const struct variable **dependent_var,
212 const struct xfactor *f);
215 static void show_percentiles (const struct variable **dependent_var,
217 const struct xfactor *f);
220 static void show_extremes (const struct variable **dependent_var,
222 const struct xfactor *f);
227 /* Per Split function */
228 static void run_examine (struct cmd_examine *, struct casereader *,
231 static void output_examine (void);
234 void factor_calc (const struct ccase *c, int case_no,
235 double weight, bool case_missing);
238 /* Represent a factor as a string, so it can be
239 printed in a human readable fashion */
240 static void factor_to_string (const struct xfactor *fctr,
241 const struct factor_result *result,
244 /* Represent a factor as a string, so it can be
245 printed in a human readable fashion,
246 but sacrificing some readablility for the sake of brevity */
248 factor_to_string_concise (const struct xfactor *fctr,
249 const struct factor_result *result,
255 /* Categories of missing values to exclude. */
256 static enum mv_class exclude_values;
259 cmd_examine (struct lexer *lexer, struct dataset *ds)
261 struct casegrouper *grouper;
262 struct casereader *group;
265 subc_list_double_create (&percentile_list);
266 percentile_algorithm = PC_HAVERAGE;
268 if ( !parse_examine (lexer, ds, &cmd, NULL) )
270 subc_list_double_destroy (&percentile_list);
274 /* If /MISSING=INCLUDE is set, then user missing values are ignored */
275 exclude_values = cmd.incl == XMN_INCLUDE ? MV_SYSTEM : MV_ANY;
277 if ( cmd.st_n == SYSMIS )
280 if ( ! cmd.sbc_cinterval)
281 cmd.n_cinterval[0] = 95.0;
283 /* If descriptives have been requested, make sure the
284 quartiles are calculated */
285 if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES] )
287 subc_list_double_push (&percentile_list, 25);
288 subc_list_double_push (&percentile_list, 50);
289 subc_list_double_push (&percentile_list, 75);
292 grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds));
294 while (casegrouper_get_next_group (grouper, &group))
296 struct casereader *reader =
297 casereader_create_arithmetic_sequence (group, 1, 1);
299 run_examine (&cmd, reader, ds);
302 ok = casegrouper_destroy (grouper);
303 ok = proc_commit (ds) && ok;
305 if ( dependent_vars )
306 free (dependent_vars);
308 subc_list_double_destroy (&percentile_list);
310 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
314 /* Plot the normal and detrended normal plots for RESULT.
315 Label the plots with LABEL */
317 np_plot (struct np *np, const char *label)
319 double yfirst = 0, ylast = 0;
326 struct chart *np_chart;
328 /* Detrended Normal Plot */
329 struct chart *dnp_chart;
331 /* The slope and intercept of the ideal normal probability line */
332 const double slope = 1.0 / np->stddev;
333 const double intercept = -np->mean / np->stddev;
337 msg (MW, _("Not creating plot because data set is empty."));
341 np_chart = chart_create ();
342 dnp_chart = chart_create ();
344 if ( !np_chart || ! dnp_chart )
347 chart_write_title (np_chart, _("Normal Q-Q Plot of %s"), label);
348 chart_write_xlabel (np_chart, _("Observed Value"));
349 chart_write_ylabel (np_chart, _("Expected Normal"));
351 chart_write_title (dnp_chart, _("Detrended Normal Q-Q Plot of %s"),
353 chart_write_xlabel (dnp_chart, _("Observed Value"));
354 chart_write_ylabel (dnp_chart, _("Dev from Normal"));
356 yfirst = gsl_cdf_ugaussian_Pinv (1 / (np->n + 1));
357 ylast = gsl_cdf_ugaussian_Pinv (np->n / (np->n + 1));
359 /* Need to make sure that both the scatter plot and the ideal fit into the
361 x_lower = MIN (np->y_min, (yfirst - intercept) / slope) ;
362 x_upper = MAX (np->y_max, (ylast - intercept) / slope) ;
363 slack = (x_upper - x_lower) * 0.05 ;
365 chart_write_xscale (np_chart, x_lower - slack, x_upper + slack, 5);
366 chart_write_xscale (dnp_chart, np->y_min, np->y_max, 5);
368 chart_write_yscale (np_chart, yfirst, ylast, 5);
369 chart_write_yscale (dnp_chart, np->dns_min, np->dns_max, 5);
373 struct casereader *reader = casewriter_make_reader (np->writer);
374 while (casereader_read (reader, &c))
376 chart_datum (np_chart, 0, case_data_idx (&c, NP_IDX_Y)->f, case_data_idx (&c, NP_IDX_NS)->f);
377 chart_datum (dnp_chart, 0, case_data_idx (&c, NP_IDX_Y)->f, case_data_idx (&c, NP_IDX_DNS)->f);
381 casereader_destroy (reader);
384 chart_line (dnp_chart, 0, 0, np->y_min, np->y_max , CHART_DIM_X);
385 chart_line (np_chart, slope, intercept, yfirst, ylast , CHART_DIM_Y);
387 chart_submit (np_chart);
388 chart_submit (dnp_chart);
393 show_npplot (const struct variable **dependent_var,
395 const struct xfactor *fctr)
399 for (v = 0; v < n_dep_var; ++v)
402 for (ll = ll_head (&fctr->result_list);
403 ll != ll_null (&fctr->result_list);
407 const struct factor_result *result =
408 ll_data (ll, struct factor_result, ll);
410 ds_init_empty (&str);
411 ds_put_format (&str, "%s ", var_get_name (dependent_var[v]));
413 factor_to_string (fctr, result, &str);
415 np_plot ((struct np*) result->metrics[v].np, ds_cstr(&str));
417 statistic_destroy ((struct statistic *)result->metrics[v].np);
426 show_histogram (const struct variable **dependent_var,
428 const struct xfactor *fctr)
432 for (v = 0; v < n_dep_var; ++v)
435 for (ll = ll_head (&fctr->result_list);
436 ll != ll_null (&fctr->result_list);
440 const struct factor_result *result =
441 ll_data (ll, struct factor_result, ll);
443 ds_init_empty (&str);
444 ds_put_format (&str, "%s ", var_get_name (dependent_var[v]));
446 factor_to_string (fctr, result, &str);
448 histogram_plot ((struct histogram *) result->metrics[v].histogram,
450 (struct moments1 *) result->metrics[v].moments);
460 show_boxplot_groups (const struct variable **dependent_var,
462 const struct xfactor *fctr)
466 for (v = 0; v < n_dep_var; ++v)
470 struct chart *ch = chart_create ();
471 double y_min = DBL_MAX;
472 double y_max = -DBL_MAX;
474 for (ll = ll_head (&fctr->result_list);
475 ll != ll_null (&fctr->result_list);
478 const struct extremum *max, *min;
479 const struct factor_result *result =
480 ll_data (ll, struct factor_result, ll);
482 const struct ll_list *max_list =
483 extrema_list (result->metrics[v].maxima);
485 const struct ll_list *min_list =
486 extrema_list (result->metrics[v].minima);
488 if ( ll_is_empty (max_list))
490 msg (MW, _("Not creating plot because data set is empty."));
494 max = (const struct extremum *)
495 ll_data (ll_head(max_list), struct extremum, ll);
497 min = (const struct extremum *)
498 ll_data (ll_head (min_list), struct extremum, ll);
500 y_max = MAX (y_max, max->value);
501 y_min = MIN (y_min, min->value);
504 boxplot_draw_yscale (ch, y_max, y_min);
506 if ( fctr->indep_var[0])
507 chart_write_title (ch, _("Boxplot of %s vs. %s"),
508 var_to_string (dependent_var[v]),
509 var_to_string (fctr->indep_var[0]) );
511 chart_write_title (ch, _("Boxplot of %s"),
512 var_to_string (dependent_var[v]));
514 for (ll = ll_head (&fctr->result_list);
515 ll != ll_null (&fctr->result_list);
518 const struct factor_result *result =
519 ll_data (ll, struct factor_result, ll);
522 const double box_width = (ch->data_right - ch->data_left)
523 / (ll_count (&fctr->result_list) * 2.0 ) ;
525 const double box_centre = (f++ * 2 + 1) * box_width + ch->data_left;
527 ds_init_empty (&str);
528 factor_to_string_concise (fctr, result, &str);
530 boxplot_draw_boxplot (ch,
531 box_centre, box_width,
532 (const struct box_whisker *)
533 result->metrics[v].box_whisker,
546 show_boxplot_variables (const struct variable **dependent_var,
548 const struct xfactor *fctr
554 const struct ll_list *result_list = &fctr->result_list;
556 for (ll = ll_head (result_list);
557 ll != ll_null (result_list);
562 struct chart *ch = chart_create ();
563 double y_min = DBL_MAX;
564 double y_max = -DBL_MAX;
566 const struct factor_result *result =
567 ll_data (ll, struct factor_result, ll);
569 const double box_width = (ch->data_right - ch->data_left)
570 / (n_dep_var * 2.0 ) ;
572 for (v = 0; v < n_dep_var; ++v)
574 const struct ll *max_ll =
575 ll_head (extrema_list (result->metrics[v].maxima));
576 const struct ll *min_ll =
577 ll_head (extrema_list (result->metrics[v].minima));
579 const struct extremum *max =
580 (const struct extremum *) ll_data (max_ll, struct extremum, ll);
582 const struct extremum *min =
583 (const struct extremum *) ll_data (min_ll, struct extremum, ll);
585 y_max = MAX (y_max, max->value);
586 y_min = MIN (y_min, min->value);
590 boxplot_draw_yscale (ch, y_max, y_min);
592 ds_init_empty (&title);
593 factor_to_string (fctr, result, &title);
596 ds_put_format (&title, "%s = ", var_get_name (fctr->indep_var[0]));
597 var_append_value_name (fctr->indep_var[0], result->value[0], &title);
600 chart_write_title (ch, ds_cstr (&title));
603 for (v = 0; v < n_dep_var; ++v)
606 const double box_centre = (v * 2 + 1) * box_width + ch->data_left;
608 ds_init_empty (&str);
609 ds_init_cstr (&str, var_get_name (dependent_var[v]));
611 boxplot_draw_boxplot (ch,
612 box_centre, box_width,
613 (const struct box_whisker *) result->metrics[v].box_whisker,
624 /* Show all the appropriate tables */
626 output_examine (void)
630 show_summary (dependent_vars, n_dependent_vars, &level0_factor);
632 if ( cmd.a_statistics[XMN_ST_EXTREME] )
633 show_extremes (dependent_vars, n_dependent_vars, &level0_factor);
635 if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES] )
636 show_descriptives (dependent_vars, n_dependent_vars, &level0_factor);
638 if ( cmd.sbc_percentiles)
639 show_percentiles (dependent_vars, n_dependent_vars, &level0_factor);
643 if (cmd.a_plot[XMN_PLT_BOXPLOT])
644 show_boxplot_groups (dependent_vars, n_dependent_vars, &level0_factor);
646 if (cmd.a_plot[XMN_PLT_HISTOGRAM])
647 show_histogram (dependent_vars, n_dependent_vars, &level0_factor);
649 if (cmd.a_plot[XMN_PLT_NPPLOT])
650 show_npplot (dependent_vars, n_dependent_vars, &level0_factor);
653 for (ll = ll_head (&factor_list);
654 ll != ll_null (&factor_list); ll = ll_next (ll))
656 struct xfactor *factor = ll_data (ll, struct xfactor, ll);
657 show_summary (dependent_vars, n_dependent_vars, factor);
659 if ( cmd.a_statistics[XMN_ST_EXTREME] )
660 show_extremes (dependent_vars, n_dependent_vars, factor);
662 if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES] )
663 show_descriptives (dependent_vars, n_dependent_vars, factor);
665 if ( cmd.sbc_percentiles)
666 show_percentiles (dependent_vars, n_dependent_vars, factor);
668 if (cmd.a_plot[XMN_PLT_BOXPLOT] &&
669 cmd.cmp == XMN_GROUPS)
670 show_boxplot_groups (dependent_vars, n_dependent_vars, factor);
673 if (cmd.a_plot[XMN_PLT_BOXPLOT] &&
674 cmd.cmp == XMN_VARIABLES)
675 show_boxplot_variables (dependent_vars, n_dependent_vars,
678 if (cmd.a_plot[XMN_PLT_HISTOGRAM])
679 show_histogram (dependent_vars, n_dependent_vars, factor);
681 if (cmd.a_plot[XMN_PLT_NPPLOT])
682 show_npplot (dependent_vars, n_dependent_vars, factor);
686 /* Parse the PERCENTILES subcommand */
688 xmn_custom_percentiles (struct lexer *lexer, struct dataset *ds UNUSED,
689 struct cmd_examine *p UNUSED, void *aux UNUSED)
691 lex_match (lexer, '=');
693 lex_match (lexer, '(');
695 while ( lex_is_number (lexer) )
697 subc_list_double_push (&percentile_list, lex_number (lexer));
701 lex_match (lexer, ',') ;
703 lex_match (lexer, ')');
705 lex_match (lexer, '=');
707 if ( lex_match_id (lexer, "HAVERAGE"))
708 percentile_algorithm = PC_HAVERAGE;
710 else if ( lex_match_id (lexer, "WAVERAGE"))
711 percentile_algorithm = PC_WAVERAGE;
713 else if ( lex_match_id (lexer, "ROUND"))
714 percentile_algorithm = PC_ROUND;
716 else if ( lex_match_id (lexer, "EMPIRICAL"))
717 percentile_algorithm = PC_EMPIRICAL;
719 else if ( lex_match_id (lexer, "AEMPIRICAL"))
720 percentile_algorithm = PC_AEMPIRICAL;
722 else if ( lex_match_id (lexer, "NONE"))
723 percentile_algorithm = PC_NONE;
726 if ( 0 == subc_list_double_count (&percentile_list))
728 subc_list_double_push (&percentile_list, 5);
729 subc_list_double_push (&percentile_list, 10);
730 subc_list_double_push (&percentile_list, 25);
731 subc_list_double_push (&percentile_list, 50);
732 subc_list_double_push (&percentile_list, 75);
733 subc_list_double_push (&percentile_list, 90);
734 subc_list_double_push (&percentile_list, 95);
740 /* TOTAL and NOTOTAL are simple, mutually exclusive flags */
742 xmn_custom_total (struct lexer *lexer UNUSED, struct dataset *ds UNUSED,
743 struct cmd_examine *p, void *aux UNUSED)
745 if ( p->sbc_nototal )
747 msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL");
755 xmn_custom_nototal (struct lexer *lexer UNUSED, struct dataset *ds UNUSED,
756 struct cmd_examine *p, void *aux UNUSED)
760 msg (SE, _("%s and %s are mutually exclusive"), "TOTAL", "NOTOTAL");
769 /* Parser for the variables sub command
770 Returns 1 on success */
772 xmn_custom_variables (struct lexer *lexer, struct dataset *ds,
773 struct cmd_examine *cmd,
776 const struct dictionary *dict = dataset_dict (ds);
777 lex_match (lexer, '=');
779 if ( (lex_token (lexer) != T_ID || dict_lookup_var (dict, lex_tokid (lexer)) == NULL)
780 && lex_token (lexer) != T_ALL)
785 if (!parse_variables_const (lexer, dict, &dependent_vars, &n_dependent_vars,
786 PV_NO_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH) )
788 free (dependent_vars);
792 assert (n_dependent_vars);
795 if ( lex_match (lexer, T_BY))
798 success = examine_parse_independent_vars (lexer, dict, cmd);
801 free (dependent_vars);
811 /* Parse the clause specifying the factors */
813 examine_parse_independent_vars (struct lexer *lexer,
814 const struct dictionary *dict,
815 struct cmd_examine *cmd)
818 struct xfactor *sf = xmalloc (sizeof *sf);
820 ll_init (&sf->result_list);
822 if ( (lex_token (lexer) != T_ID ||
823 dict_lookup_var (dict, lex_tokid (lexer)) == NULL)
824 && lex_token (lexer) != T_ALL)
830 sf->indep_var[0] = parse_variable (lexer, dict);
831 sf->indep_var[1] = NULL;
833 if ( lex_token (lexer) == T_BY )
835 lex_match (lexer, T_BY);
837 if ( (lex_token (lexer) != T_ID ||
838 dict_lookup_var (dict, lex_tokid (lexer)) == NULL)
839 && lex_token (lexer) != T_ALL)
845 sf->indep_var[1] = parse_variable (lexer, dict);
847 ll_push_tail (&factor_list, &sf->ll);
850 ll_push_tail (&factor_list, &sf->ll);
852 lex_match (lexer, ',');
854 if ( lex_token (lexer) == '.' || lex_token (lexer) == '/' )
857 success = examine_parse_independent_vars (lexer, dict, cmd);
866 examine_group (struct cmd_examine *cmd, struct casereader *reader, int level,
867 const struct dictionary *dict, struct xfactor *factor)
870 const struct variable *wv = dict_get_weight (dict);
873 struct factor_result *result = xzalloc (sizeof (*result));
875 result->metrics = xcalloc (n_dependent_vars, sizeof (*result->metrics));
877 if ( cmd->a_statistics[XMN_ST_EXTREME] )
878 n_extrema = cmd->st_n;
881 if (casereader_peek (reader, 0, &c))
886 value_dup (case_data (&c, factor->indep_var[0]),
887 var_get_width (factor->indep_var[0]));
891 value_dup (case_data (&c, factor->indep_var[1]),
892 var_get_width (factor->indep_var[1]));
897 for (v = 0; v < n_dependent_vars; ++v)
899 struct casewriter *writer;
900 struct casereader *input = casereader_clone (reader);
902 result->metrics[v].moments = moments1_create (MOMENT_KURTOSIS);
903 result->metrics[v].minima = extrema_create (n_extrema, EXTREME_MINIMA);
904 result->metrics[v].maxima = extrema_create (n_extrema, EXTREME_MAXIMA);
905 result->metrics[v].cmin = DBL_MAX;
907 if (cmd->a_statistics[XMN_ST_DESCRIPTIVES] ||
908 cmd->a_plot[XMN_PLT_BOXPLOT] ||
909 cmd->a_plot[XMN_PLT_NPPLOT] ||
910 cmd->sbc_percentiles)
912 /* In this case, we need to sort the data, so we create a sorting
914 struct subcase up_ordering;
915 subcase_init_var (&up_ordering, dependent_vars[v], SC_ASCEND);
916 writer = sort_create_writer (&up_ordering,
917 casereader_get_value_cnt (reader));
918 subcase_destroy (&up_ordering);
922 /* but in this case, sorting is unnecessary, so an ordinary
923 casewriter is sufficient */
925 autopaging_writer_create (casereader_get_value_cnt (reader));
929 /* Sort or just iterate, whilst calculating moments etc */
930 while (casereader_read (input, &c))
932 const casenumber loc =
933 case_data_idx (&c, casereader_get_value_cnt (reader) - 1)->f;
935 const double weight = wv ? case_data (&c, wv)->f : 1.0;
937 if (weight != SYSMIS)
938 minimize (&result->metrics[v].cmin, weight);
940 moments1_add (result->metrics[v].moments,
941 case_data (&c, dependent_vars[v])->f,
944 result->metrics[v].n += weight;
946 extrema_add (result->metrics[v].maxima,
947 case_data (&c, dependent_vars[v])->f,
951 extrema_add (result->metrics[v].minima,
952 case_data (&c, dependent_vars[v])->f,
956 casewriter_write (writer, &c);
958 casereader_destroy (input);
959 result->metrics[v].up_reader = casewriter_make_reader (writer);
962 /* If percentiles or descriptives have been requested, then a
963 second pass through the data (which has now been sorted)
965 if ( cmd->a_statistics[XMN_ST_DESCRIPTIVES] ||
966 cmd->a_plot[XMN_PLT_BOXPLOT] ||
967 cmd->a_plot[XMN_PLT_NPPLOT] ||
968 cmd->sbc_percentiles)
970 for (v = 0; v < n_dependent_vars; ++v)
974 struct order_stats **os ;
975 struct factor_metrics *metric = &result->metrics[v];
977 metric->n_ptiles = percentile_list.n_data;
979 metric->ptl = xcalloc (metric->n_ptiles,
980 sizeof (struct percentile *));
982 metric->quartiles = xcalloc (3, sizeof (*metric->quartiles));
984 for (i = 0 ; i < metric->n_ptiles; ++i)
986 metric->ptl[i] = (struct percentile *)
987 percentile_create (percentile_list.data[i] / 100.0, metric->n);
989 if ( percentile_list.data[i] == 25)
990 metric->quartiles[0] = metric->ptl[i];
991 else if ( percentile_list.data[i] == 50)
992 metric->quartiles[1] = metric->ptl[i];
993 else if ( percentile_list.data[i] == 75)
994 metric->quartiles[2] = metric->ptl[i];
997 metric->tukey_hinges = tukey_hinges_create (metric->n, metric->cmin);
998 metric->trimmed_mean = trimmed_mean_create (metric->n, 0.05);
1000 n_os = metric->n_ptiles + 2;
1002 if ( cmd->a_plot[XMN_PLT_NPPLOT] )
1004 metric->np = np_create (metric->moments);
1008 os = xcalloc (sizeof (struct order_stats *), n_os);
1010 for (i = 0 ; i < metric->n_ptiles ; ++i )
1012 os[i] = (struct order_stats *) metric->ptl[i];
1015 os[i] = (struct order_stats *) metric->tukey_hinges;
1016 os[i+1] = (struct order_stats *) metric->trimmed_mean;
1018 if (cmd->a_plot[XMN_PLT_NPPLOT])
1019 os[i+2] = metric->np;
1021 order_stats_accumulate (os, n_os,
1022 casereader_clone (metric->up_reader),
1023 wv, dependent_vars[v], MV_ANY);
1028 /* FIXME: Do this in the above loop */
1029 if ( cmd->a_plot[XMN_PLT_HISTOGRAM] )
1032 struct casereader *input = casereader_clone (reader);
1034 for (v = 0; v < n_dependent_vars; ++v)
1036 const struct extremum *max, *min;
1037 struct factor_metrics *metric = &result->metrics[v];
1039 const struct ll_list *max_list =
1040 extrema_list (result->metrics[v].maxima);
1042 const struct ll_list *min_list =
1043 extrema_list (result->metrics[v].minima);
1045 if ( ll_is_empty (max_list))
1047 msg (MW, _("Not creating plot because data set is empty."));
1051 assert (! ll_is_empty (min_list));
1053 max = (const struct extremum *)
1054 ll_data (ll_head(max_list), struct extremum, ll);
1056 min = (const struct extremum *)
1057 ll_data (ll_head (min_list), struct extremum, ll);
1059 metric->histogram = histogram_create (10, min->value, max->value);
1062 while (casereader_read (input, &c))
1064 const double weight = wv ? case_data (&c, wv)->f : 1.0;
1066 for (v = 0; v < n_dependent_vars; ++v)
1068 struct factor_metrics *metric = &result->metrics[v];
1069 if ( metric->histogram)
1070 histogram_add ((struct histogram *) metric->histogram,
1071 case_data (&c, dependent_vars[v])->f, weight);
1075 casereader_destroy (input);
1078 /* In this case, a third iteration is required */
1079 if (cmd->a_plot[XMN_PLT_BOXPLOT])
1081 for (v = 0; v < n_dependent_vars; ++v)
1083 struct factor_metrics *metric = &result->metrics[v];
1085 metric->box_whisker =
1086 box_whisker_create ((struct tukey_hinges *) metric->tukey_hinges,
1088 casereader_get_value_cnt (metric->up_reader)
1091 order_stats_accumulate ((struct order_stats **) &metric->box_whisker,
1093 casereader_clone (metric->up_reader),
1094 wv, dependent_vars[v], MV_ANY);
1098 ll_push_tail (&factor->result_list, &result->ll);
1099 casereader_destroy (reader);
1104 run_examine (struct cmd_examine *cmd, struct casereader *input,
1108 const struct dictionary *dict = dataset_dict (ds);
1110 struct casereader *level0 = casereader_clone (input);
1112 if (!casereader_peek (input, 0, &c))
1114 casereader_destroy (input);
1118 output_split_file_values (ds, &c);
1121 ll_init (&level0_factor.result_list);
1123 examine_group (cmd, level0, 0, dict, &level0_factor);
1125 for (ll = ll_head (&factor_list);
1126 ll != ll_null (&factor_list);
1129 struct xfactor *factor = ll_data (ll, struct xfactor, ll);
1131 struct casereader *group = NULL;
1132 struct casereader *level1;
1133 struct casegrouper *grouper1 = NULL;
1135 level1 = casereader_clone (input);
1136 level1 = sort_execute_1var (level1, factor->indep_var[0]);
1137 grouper1 = casegrouper_create_vars (level1, &factor->indep_var[0], 1);
1139 while (casegrouper_get_next_group (grouper1, &group))
1141 struct casereader *group_copy = casereader_clone (group);
1143 if ( !factor->indep_var[1])
1144 examine_group (cmd, group_copy, 1, dict, factor);
1148 struct casereader *group2 = NULL;
1149 struct casegrouper *grouper2 = NULL;
1151 group_copy = sort_execute_1var (group_copy,
1152 factor->indep_var[1]);
1154 grouper2 = casegrouper_create_vars (group_copy,
1155 &factor->indep_var[1], 1);
1157 while (casegrouper_get_next_group (grouper2, &group2))
1159 examine_group (cmd, group2, 2, dict, factor);
1162 casegrouper_destroy (grouper2);
1165 casereader_destroy (group);
1167 casegrouper_destroy (grouper1);
1170 casereader_destroy (input);
1174 factor_destroy (&level0_factor);
1178 for (ll = ll_head (&factor_list);
1179 ll != ll_null (&factor_list);
1182 struct xfactor *f = ll_data (ll, struct xfactor, ll);
1191 show_summary (const struct variable **dependent_var, int n_dep_var,
1192 const struct xfactor *fctr)
1194 static const char *subtitle[]=
1202 int heading_columns = 1;
1204 const int heading_rows = 3;
1205 struct tab_table *tbl;
1212 if ( fctr->indep_var[0] )
1214 heading_columns = 2;
1216 if ( fctr->indep_var[1] )
1218 heading_columns = 3;
1222 n_rows *= ll_count (&fctr->result_list);
1223 n_rows += heading_rows;
1225 n_cols = heading_columns + 6;
1227 tbl = tab_create (n_cols, n_rows, 0);
1228 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
1230 tab_dim (tbl, tab_natural_dimensions);
1232 /* Outline the box */
1237 n_cols - 1, n_rows - 1);
1239 /* Vertical lines for the data only */
1244 n_cols - 1, n_rows - 1);
1247 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
1248 tab_hline (tbl, TAL_1, heading_columns, n_cols - 1, 1 );
1249 tab_hline (tbl, TAL_1, heading_columns, n_cols - 1, heading_rows -1 );
1251 tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
1254 tab_title (tbl, _("Case Processing Summary"));
1256 tab_joint_text (tbl, heading_columns, 0,
1258 TAB_CENTER | TAT_TITLE,
1261 /* Remove lines ... */
1268 for (j = 0 ; j < 3 ; ++j)
1270 tab_text (tbl, heading_columns + j * 2 , 2, TAB_CENTER | TAT_TITLE,
1273 tab_text (tbl, heading_columns + j * 2 + 1, 2, TAB_CENTER | TAT_TITLE,
1276 tab_joint_text (tbl, heading_columns + j * 2 , 1,
1277 heading_columns + j * 2 + 1, 1,
1278 TAB_CENTER | TAT_TITLE,
1281 tab_box (tbl, -1, -1,
1283 heading_columns + j * 2, 1,
1284 heading_columns + j * 2 + 1, 1);
1288 /* Titles for the independent variables */
1289 if ( fctr->indep_var[0] )
1291 tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
1292 var_to_string (fctr->indep_var[0]));
1294 if ( fctr->indep_var[1] )
1296 tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
1297 var_to_string (fctr->indep_var[1]));
1301 for (v = 0 ; v < n_dep_var ; ++v)
1305 union value *last_value = NULL;
1308 tab_hline (tbl, TAL_1, 0, n_cols -1 ,
1309 v * ll_count (&fctr->result_list)
1314 v * ll_count (&fctr->result_list) + heading_rows,
1315 TAB_LEFT | TAT_TITLE,
1316 var_to_string (dependent_var[v])
1320 for (ll = ll_head (&fctr->result_list);
1321 ll != ll_null (&fctr->result_list); ll = ll_next (ll))
1324 const struct factor_result *result =
1325 ll_data (ll, struct factor_result, ll);
1327 if ( fctr->indep_var[0] )
1330 if ( last_value == NULL ||
1331 compare_values_short (last_value, result->value[0],
1332 fctr->indep_var[0]))
1336 last_value = result->value[0];
1337 ds_init_empty (&str);
1339 var_append_value_name (fctr->indep_var[0], result->value[0],
1344 v * ll_count (&fctr->result_list),
1345 TAB_LEFT | TAT_TITLE,
1350 if ( fctr->indep_var[1] && j > 0)
1351 tab_hline (tbl, TAL_1, 1, n_cols - 1,
1353 v * ll_count (&fctr->result_list));
1356 if ( fctr->indep_var[1])
1360 ds_init_empty (&str);
1362 var_append_value_name (fctr->indep_var[1],
1363 result->value[1], &str);
1367 v * ll_count (&fctr->result_list),
1368 TAB_LEFT | TAT_TITLE,
1376 moments1_calculate (result->metrics[v].moments,
1377 &n, &result->metrics[v].mean,
1378 &result->metrics[v].variance,
1379 &result->metrics[v].skewness,
1380 &result->metrics[v].kurtosis);
1382 result->metrics[v].se_mean = sqrt (result->metrics[v].variance / n) ;
1385 tab_float (tbl, heading_columns,
1386 heading_rows + j + v * ll_count (&fctr->result_list),
1390 tab_text (tbl, heading_columns + 1,
1391 heading_rows + j + v * ll_count (&fctr->result_list),
1392 TAB_RIGHT | TAT_PRINTF,
1393 "%g%%", n * 100.0 / result->metrics[v].n);
1396 tab_float (tbl, heading_columns + 2,
1397 heading_rows + j + v * ll_count (&fctr->result_list),
1399 result->metrics[v].n - n,
1402 tab_text (tbl, heading_columns + 3,
1403 heading_rows + j + v * ll_count (&fctr->result_list),
1404 TAB_RIGHT | TAT_PRINTF,
1406 (result->metrics[v].n - n) * 100.0 / result->metrics[v].n
1409 /* Total Valid + Missing */
1410 tab_float (tbl, heading_columns + 4,
1411 heading_rows + j + v * ll_count (&fctr->result_list),
1413 result->metrics[v].n,
1416 tab_text (tbl, heading_columns + 5,
1417 heading_rows + j + v * ll_count (&fctr->result_list),
1418 TAB_RIGHT | TAT_PRINTF,
1420 (result->metrics[v].n) * 100.0 / result->metrics[v].n
1431 #define DESCRIPTIVE_ROWS 13
1434 show_descriptives (const struct variable **dependent_var,
1436 const struct xfactor *fctr)
1439 int heading_columns = 3;
1441 const int heading_rows = 1;
1442 struct tab_table *tbl;
1449 if ( fctr->indep_var[0] )
1451 heading_columns = 4;
1453 if ( fctr->indep_var[1] )
1455 heading_columns = 5;
1459 n_rows *= ll_count (&fctr->result_list) * DESCRIPTIVE_ROWS;
1460 n_rows += heading_rows;
1462 n_cols = heading_columns + 2;
1464 tbl = tab_create (n_cols, n_rows, 0);
1465 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
1467 tab_dim (tbl, tab_natural_dimensions);
1469 /* Outline the box */
1474 n_cols - 1, n_rows - 1);
1477 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
1478 tab_hline (tbl, TAL_2, 1, n_cols - 1, heading_rows );
1480 tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows - 1);
1483 if ( fctr->indep_var[0])
1484 tab_text (tbl, 1, 0, TAT_TITLE, var_to_string (fctr->indep_var[0]));
1486 if ( fctr->indep_var[1])
1487 tab_text (tbl, 2, 0, TAT_TITLE, var_to_string (fctr->indep_var[1]));
1489 for (v = 0 ; v < n_dep_var ; ++v )
1494 const int row_var_start =
1495 v * DESCRIPTIVE_ROWS * ll_count(&fctr->result_list);
1499 heading_rows + row_var_start,
1500 TAB_LEFT | TAT_TITLE,
1501 var_to_string (dependent_var[v])
1504 for (ll = ll_head (&fctr->result_list);
1505 ll != ll_null (&fctr->result_list); i++, ll = ll_next (ll))
1507 const struct factor_result *result =
1508 ll_data (ll, struct factor_result, ll);
1511 gsl_cdf_tdist_Qinv ((1 - cmd.n_cinterval[0] / 100.0) / 2.0,
1512 result->metrics[v].n - 1);
1514 if ( i > 0 || v > 0 )
1516 const int left_col = (i == 0) ? 0 : 1;
1517 tab_hline (tbl, TAL_1, left_col, n_cols - 1,
1518 heading_rows + row_var_start + i * DESCRIPTIVE_ROWS);
1521 if ( fctr->indep_var[0])
1524 ds_init_empty (&vstr);
1525 var_append_value_name (fctr->indep_var[0],
1526 result->value[0], &vstr);
1529 heading_rows + row_var_start + i * DESCRIPTIVE_ROWS,
1538 tab_text (tbl, n_cols - 4,
1539 heading_rows + row_var_start + i * DESCRIPTIVE_ROWS,
1543 tab_text (tbl, n_cols - 4,
1544 heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS,
1545 TAB_LEFT | TAT_PRINTF,
1546 _("%g%% Confidence Interval for Mean"),
1547 cmd.n_cinterval[0]);
1549 tab_text (tbl, n_cols - 3,
1550 heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS,
1554 tab_text (tbl, n_cols - 3,
1555 heading_rows + row_var_start + 2 + i * DESCRIPTIVE_ROWS,
1559 tab_text (tbl, n_cols - 4,
1560 heading_rows + row_var_start + 3 + i * DESCRIPTIVE_ROWS,
1561 TAB_LEFT | TAT_PRINTF,
1562 _("5%% Trimmed Mean"));
1564 tab_text (tbl, n_cols - 4,
1565 heading_rows + row_var_start + 4 + i * DESCRIPTIVE_ROWS,
1569 tab_text (tbl, n_cols - 4,
1570 heading_rows + row_var_start + 5 + i * DESCRIPTIVE_ROWS,
1574 tab_text (tbl, n_cols - 4,
1575 heading_rows + row_var_start + 6 + i * DESCRIPTIVE_ROWS,
1577 _("Std. Deviation"));
1579 tab_text (tbl, n_cols - 4,
1580 heading_rows + row_var_start + 7 + i * DESCRIPTIVE_ROWS,
1584 tab_text (tbl, n_cols - 4,
1585 heading_rows + row_var_start + 8 + i * DESCRIPTIVE_ROWS,
1589 tab_text (tbl, n_cols - 4,
1590 heading_rows + row_var_start + 9 + i * DESCRIPTIVE_ROWS,
1594 tab_text (tbl, n_cols - 4,
1595 heading_rows + row_var_start + 10 + i * DESCRIPTIVE_ROWS,
1597 _("Interquartile Range"));
1600 tab_text (tbl, n_cols - 4,
1601 heading_rows + row_var_start + 11 + i * DESCRIPTIVE_ROWS,
1605 tab_text (tbl, n_cols - 4,
1606 heading_rows + row_var_start + 12 + i * DESCRIPTIVE_ROWS,
1611 /* Now the statistics ... */
1613 tab_float (tbl, n_cols - 2,
1614 heading_rows + row_var_start + i * DESCRIPTIVE_ROWS,
1616 result->metrics[v].mean,
1619 tab_float (tbl, n_cols - 1,
1620 heading_rows + row_var_start + i * DESCRIPTIVE_ROWS,
1622 result->metrics[v].se_mean,
1626 tab_float (tbl, n_cols - 2,
1627 heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS,
1629 result->metrics[v].mean - t *
1630 result->metrics[v].se_mean,
1633 tab_float (tbl, n_cols - 2,
1634 heading_rows + row_var_start + 2 + i * DESCRIPTIVE_ROWS,
1636 result->metrics[v].mean + t *
1637 result->metrics[v].se_mean,
1641 tab_float (tbl, n_cols - 2,
1642 heading_rows + row_var_start + 3 + i * DESCRIPTIVE_ROWS,
1644 trimmed_mean_calculate ((struct trimmed_mean *) result->metrics[v].trimmed_mean),
1648 tab_float (tbl, n_cols - 2,
1649 heading_rows + row_var_start + 4 + i * DESCRIPTIVE_ROWS,
1651 percentile_calculate (result->metrics[v].quartiles[1], percentile_algorithm),
1655 tab_float (tbl, n_cols - 2,
1656 heading_rows + row_var_start + 5 + i * DESCRIPTIVE_ROWS,
1658 result->metrics[v].variance,
1661 tab_float (tbl, n_cols - 2,
1662 heading_rows + row_var_start + 6 + i * DESCRIPTIVE_ROWS,
1664 sqrt (result->metrics[v].variance),
1667 tab_float (tbl, n_cols - 2,
1668 heading_rows + row_var_start + 10 + i * DESCRIPTIVE_ROWS,
1670 percentile_calculate (result->metrics[v].quartiles[2],
1671 percentile_algorithm) -
1672 percentile_calculate (result->metrics[v].quartiles[0],
1673 percentile_algorithm),
1677 tab_float (tbl, n_cols - 2,
1678 heading_rows + row_var_start + 11 + i * DESCRIPTIVE_ROWS,
1680 result->metrics[v].skewness,
1683 tab_float (tbl, n_cols - 2,
1684 heading_rows + row_var_start + 12 + i * DESCRIPTIVE_ROWS,
1686 result->metrics[v].kurtosis,
1689 tab_float (tbl, n_cols - 1,
1690 heading_rows + row_var_start + 11 + i * DESCRIPTIVE_ROWS,
1692 calc_seskew (result->metrics[v].n),
1695 tab_float (tbl, n_cols - 1,
1696 heading_rows + row_var_start + 12 + i * DESCRIPTIVE_ROWS,
1698 calc_sekurt (result->metrics[v].n),
1702 struct extremum *minimum, *maximum ;
1704 struct ll *max_ll = ll_head (extrema_list (result->metrics[v].maxima));
1705 struct ll *min_ll = ll_head (extrema_list (result->metrics[v].minima));
1707 maximum = ll_data (max_ll, struct extremum, ll);
1708 minimum = ll_data (min_ll, struct extremum, ll);
1710 tab_float (tbl, n_cols - 2,
1711 heading_rows + row_var_start + 7 + i * DESCRIPTIVE_ROWS,
1716 tab_float (tbl, n_cols - 2,
1717 heading_rows + row_var_start + 8 + i * DESCRIPTIVE_ROWS,
1722 tab_float (tbl, n_cols - 2,
1723 heading_rows + row_var_start + 9 + i * DESCRIPTIVE_ROWS,
1725 maximum->value - minimum->value,
1731 tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
1733 tab_title (tbl, _("Descriptives"));
1735 tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE,
1738 tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE,
1747 show_extremes (const struct variable **dependent_var,
1749 const struct xfactor *fctr)
1752 int heading_columns = 3;
1754 const int heading_rows = 1;
1755 struct tab_table *tbl;
1762 if ( fctr->indep_var[0] )
1764 heading_columns = 4;
1766 if ( fctr->indep_var[1] )
1768 heading_columns = 5;
1772 n_rows *= ll_count (&fctr->result_list) * cmd.st_n * 2;
1773 n_rows += heading_rows;
1775 n_cols = heading_columns + 2;
1777 tbl = tab_create (n_cols, n_rows, 0);
1778 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
1780 tab_dim (tbl, tab_natural_dimensions);
1782 /* Outline the box */
1787 n_cols - 1, n_rows - 1);
1790 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
1791 tab_hline (tbl, TAL_2, 1, n_cols - 1, heading_rows );
1792 tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows - 1);
1794 if ( fctr->indep_var[0])
1795 tab_text (tbl, 1, 0, TAT_TITLE, var_to_string (fctr->indep_var[0]));
1797 if ( fctr->indep_var[1])
1798 tab_text (tbl, 2, 0, TAT_TITLE, var_to_string (fctr->indep_var[1]));
1800 for (v = 0 ; v < n_dep_var ; ++v )
1804 const int row_var_start = v * cmd.st_n * 2 * ll_count(&fctr->result_list);
1808 heading_rows + row_var_start,
1809 TAB_LEFT | TAT_TITLE,
1810 var_to_string (dependent_var[v])
1813 for (ll = ll_head (&fctr->result_list);
1814 ll != ll_null (&fctr->result_list); i++, ll = ll_next (ll))
1819 const int row_result_start = i * cmd.st_n * 2;
1821 const struct factor_result *result =
1822 ll_data (ll, struct factor_result, ll);
1825 tab_hline (tbl, TAL_1, 1, n_cols - 1,
1826 heading_rows + row_var_start + row_result_start);
1828 tab_hline (tbl, TAL_1, heading_columns - 2, n_cols - 1,
1829 heading_rows + row_var_start + row_result_start + cmd.st_n);
1831 for ( e = 1; e <= cmd.st_n; ++e )
1833 tab_text (tbl, n_cols - 3,
1834 heading_rows + row_var_start + row_result_start + e - 1,
1835 TAB_RIGHT | TAT_PRINTF,
1838 tab_text (tbl, n_cols - 3,
1839 heading_rows + row_var_start + row_result_start + cmd.st_n + e - 1,
1840 TAB_RIGHT | TAT_PRINTF,
1845 min_ll = ll_head (extrema_list (result->metrics[v].minima));
1846 for (e = 0; e < cmd.st_n;)
1848 struct extremum *minimum = ll_data (min_ll, struct extremum, ll);
1849 double weight = minimum->weight;
1851 while (weight-- > 0 && e < cmd.st_n)
1853 tab_float (tbl, n_cols - 1,
1854 heading_rows + row_var_start + row_result_start + cmd.st_n + e,
1860 tab_float (tbl, n_cols - 2,
1861 heading_rows + row_var_start + row_result_start + cmd.st_n + e,
1868 min_ll = ll_next (min_ll);
1872 max_ll = ll_head (extrema_list (result->metrics[v].maxima));
1873 for (e = 0; e < cmd.st_n;)
1875 struct extremum *maximum = ll_data (max_ll, struct extremum, ll);
1876 double weight = maximum->weight;
1878 while (weight-- > 0 && e < cmd.st_n)
1880 tab_float (tbl, n_cols - 1,
1881 heading_rows + row_var_start + row_result_start + e,
1887 tab_float (tbl, n_cols - 2,
1888 heading_rows + row_var_start + row_result_start + e,
1895 max_ll = ll_next (max_ll);
1899 if ( fctr->indep_var[0])
1902 ds_init_empty (&vstr);
1903 var_append_value_name (fctr->indep_var[0],
1904 result->value[0], &vstr);
1907 heading_rows + row_var_start + row_result_start,
1916 tab_text (tbl, n_cols - 4,
1917 heading_rows + row_var_start + row_result_start,
1921 tab_text (tbl, n_cols - 4,
1922 heading_rows + row_var_start + row_result_start + cmd.st_n,
1928 tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
1931 tab_title (tbl, _("Extreme Values"));
1934 tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE,
1938 tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE,
1944 #define PERCENTILE_ROWS 2
1947 show_percentiles (const struct variable **dependent_var,
1949 const struct xfactor *fctr)
1953 int heading_columns = 2;
1955 const int n_percentiles = subc_list_double_count (&percentile_list);
1956 const int heading_rows = 2;
1957 struct tab_table *tbl;
1964 if ( fctr->indep_var[0] )
1966 heading_columns = 3;
1968 if ( fctr->indep_var[1] )
1970 heading_columns = 4;
1974 n_rows *= ll_count (&fctr->result_list) * PERCENTILE_ROWS;
1975 n_rows += heading_rows;
1977 n_cols = heading_columns + n_percentiles;
1979 tbl = tab_create (n_cols, n_rows, 0);
1980 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
1982 tab_dim (tbl, tab_natural_dimensions);
1984 /* Outline the box */
1989 n_cols - 1, n_rows - 1);
1992 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
1993 tab_hline (tbl, TAL_2, 1, n_cols - 1, heading_rows );
1995 if ( fctr->indep_var[0])
1996 tab_text (tbl, 1, 1, TAT_TITLE, var_to_string (fctr->indep_var[0]));
1998 if ( fctr->indep_var[1])
1999 tab_text (tbl, 2, 1, TAT_TITLE, var_to_string (fctr->indep_var[1]));
2001 for (v = 0 ; v < n_dep_var ; ++v )
2007 const int row_var_start =
2008 v * PERCENTILE_ROWS * ll_count(&fctr->result_list);
2012 heading_rows + row_var_start,
2013 TAB_LEFT | TAT_TITLE,
2014 var_to_string (dependent_var[v])
2017 for (ll = ll_head (&fctr->result_list);
2018 ll != ll_null (&fctr->result_list); i++, ll = ll_next (ll))
2021 const struct factor_result *result =
2022 ll_data (ll, struct factor_result, ll);
2024 if ( i > 0 || v > 0 )
2026 const int left_col = (i == 0) ? 0 : 1;
2027 tab_hline (tbl, TAL_1, left_col, n_cols - 1,
2028 heading_rows + row_var_start + i * PERCENTILE_ROWS);
2031 if ( fctr->indep_var[0])
2034 ds_init_empty (&vstr);
2035 var_append_value_name (fctr->indep_var[0],
2036 result->value[0], &vstr);
2039 heading_rows + row_var_start + i * PERCENTILE_ROWS,
2048 tab_text (tbl, n_cols - n_percentiles - 1,
2049 heading_rows + row_var_start + i * PERCENTILE_ROWS,
2051 ptile_alg_desc [percentile_algorithm]);
2054 tab_text (tbl, n_cols - n_percentiles - 1,
2055 heading_rows + row_var_start + 1 + i * PERCENTILE_ROWS,
2057 _("Tukey's Hinges"));
2060 tab_vline (tbl, TAL_1, n_cols - n_percentiles -1, heading_rows, n_rows - 1);
2062 tukey_hinges_calculate ((struct tukey_hinges *) result->metrics[v].tukey_hinges,
2065 for (j = 0; j < n_percentiles; ++j)
2067 double hinge = SYSMIS;
2068 tab_float (tbl, n_cols - n_percentiles + j,
2069 heading_rows + row_var_start + i * PERCENTILE_ROWS,
2071 percentile_calculate (result->metrics[v].ptl[j],
2072 percentile_algorithm),
2076 if ( result->metrics[v].ptl[j]->ptile == 0.5)
2078 else if ( result->metrics[v].ptl[j]->ptile == 0.25)
2080 else if ( result->metrics[v].ptl[j]->ptile == 0.75)
2083 if ( hinge != SYSMIS)
2084 tab_float (tbl, n_cols - n_percentiles + j,
2085 heading_rows + row_var_start + 1 + i * PERCENTILE_ROWS,
2095 tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
2097 tab_title (tbl, _("Percentiles"));
2100 for (i = 0 ; i < n_percentiles; ++i )
2102 tab_text (tbl, n_cols - n_percentiles + i, 1,
2103 TAB_CENTER | TAT_TITLE | TAT_PRINTF,
2105 subc_list_double_at (&percentile_list, i)
2111 tab_joint_text (tbl,
2112 n_cols - n_percentiles, 0,
2114 TAB_CENTER | TAT_TITLE,
2117 /* Vertical lines for the data only */
2121 n_cols - n_percentiles, 1,
2122 n_cols - 1, n_rows - 1);
2124 tab_hline (tbl, TAL_1, n_cols - n_percentiles, n_cols - 1, 1);
2132 factor_to_string_concise (const struct xfactor *fctr,
2133 const struct factor_result *result,
2137 if (fctr->indep_var[0])
2139 var_append_value_name (fctr->indep_var[0], result->value[0], str);
2141 if ( fctr->indep_var[1] )
2143 ds_put_cstr (str, ",");
2145 var_append_value_name (fctr->indep_var[1], result->value[1], str);
2147 ds_put_cstr (str, ")");
2154 factor_to_string (const struct xfactor *fctr,
2155 const struct factor_result *result,
2159 if (fctr->indep_var[0])
2161 ds_put_format (str, "(%s = ", var_get_name (fctr->indep_var[0]));
2163 var_append_value_name (fctr->indep_var[0], result->value[0], str);
2165 if ( fctr->indep_var[1] )
2167 ds_put_cstr (str, ",");
2168 ds_put_format (str, "%s = ", var_get_name (fctr->indep_var[1]));
2170 var_append_value_name (fctr->indep_var[1], result->value[1], str);
2172 ds_put_cstr (str, ")");