1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2004, 2008, 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <gsl/gsl_cdf.h>
20 #include <libpspp/message.h>
25 #include <math/sort.h>
26 #include <math/order-stats.h>
27 #include <math/percentiles.h>
28 #include <math/tukey-hinges.h>
29 #include <math/box-whisker.h>
30 #include <math/trimmed-mean.h>
31 #include <math/extrema.h>
33 #include <data/case.h>
34 #include <data/casegrouper.h>
35 #include <data/casereader.h>
36 #include <data/casewriter.h>
37 #include <data/dictionary.h>
38 #include <data/procedure.h>
39 #include <data/subcase.h>
40 #include <data/value-labels.h>
41 #include <data/variable.h>
42 #include <language/command.h>
43 #include <language/dictionary/split-file.h>
44 #include <language/lexer/lexer.h>
45 #include <libpspp/compiler.h>
46 #include <libpspp/hash.h>
47 #include <libpspp/message.h>
48 #include <libpspp/misc.h>
49 #include <libpspp/str.h>
50 #include <math/moments.h>
51 #include <output/charts/box-whisker.h>
52 #include <output/charts/cartesian.h>
53 #include <output/manager.h>
54 #include <output/table.h>
60 #define _(msgid) gettext (msgid)
61 #define N_(msgid) msgid
64 #include <output/chart.h>
65 #include <output/charts/plot-hist.h>
66 #include <output/charts/plot-chart.h>
67 #include <math/histogram.h>
74 missing=miss:pairwise/!listwise,
76 incl:include/!exclude;
77 +compare=cmp:variables/!groups;
80 +plot[plt_]=stemleaf,boxplot,npplot,:spreadlevel(*d:n),histogram,all,none;
82 +statistics[st_]=descriptives,:extreme(*d:n),all,none.
90 static struct cmd_examine cmd;
92 static const struct variable **dependent_vars;
93 static size_t n_dependent_vars;
97 static subc_list_double percentile_list;
98 static enum pc_alg percentile_algorithm;
100 struct factor_metrics
102 struct moments1 *moments;
104 struct percentile **ptl;
107 struct statistic *tukey_hinges;
108 struct statistic *box_whisker;
109 struct statistic *trimmed_mean;
110 struct statistic *histogram;
111 struct order_stats *np;
113 /* Three quartiles indexing into PTL */
114 struct percentile **quartiles;
116 /* A reader sorted in ASCENDING order */
117 struct casereader *up_reader;
119 /* The minimum value of all the weights */
122 /* Sum of all weights, including those for missing values */
125 /* Sum of weights of non_missing values */
138 struct extrema *minima;
139 struct extrema *maxima;
146 union value *value[2];
148 /* An array of factor metrics, one for each variable */
149 struct factor_metrics *metrics;
154 /* We need to make a list of this structure */
157 /* The independent variable */
158 const struct variable const* indep_var[2];
160 /* A list of results for this factor */
161 struct ll_list result_list ;
166 factor_destroy (struct xfactor *fctr)
168 struct ll *ll = ll_head (&fctr->result_list);
169 while (ll != ll_null (&fctr->result_list))
172 struct factor_result *result =
173 ll_data (ll, struct factor_result, ll);
175 for (v = 0; v < n_dependent_vars; ++v)
178 moments1_destroy (result->metrics[v].moments);
179 extrema_destroy (result->metrics[v].minima);
180 extrema_destroy (result->metrics[v].maxima);
181 statistic_destroy (result->metrics[v].trimmed_mean);
182 statistic_destroy (result->metrics[v].tukey_hinges);
183 statistic_destroy (result->metrics[v].box_whisker);
184 statistic_destroy (result->metrics[v].histogram);
185 for (i = 0 ; i < result->metrics[v].n_ptiles; ++i)
186 statistic_destroy ((struct statistic *) result->metrics[v].ptl[i]);
187 free (result->metrics[v].ptl);
188 free (result->metrics[v].quartiles);
189 casereader_destroy (result->metrics[v].up_reader);
192 free (result->value[0]);
193 free (result->value[1]);
194 free (result->metrics);
200 static struct xfactor level0_factor;
201 static struct ll_list factor_list;
203 /* Parse the clause specifying the factors */
204 static int examine_parse_independent_vars (struct lexer *lexer,
205 const struct dictionary *dict,
206 struct cmd_examine *cmd);
208 /* Output functions */
209 static void show_summary (const struct variable **dependent_var, int n_dep_var,
210 const struct dictionary *dict,
211 const struct xfactor *f);
214 static void show_descriptives (const struct variable **dependent_var,
216 const struct xfactor *f);
219 static void show_percentiles (const struct variable **dependent_var,
221 const struct xfactor *f);
224 static void show_extremes (const struct variable **dependent_var,
226 const struct xfactor *f);
231 /* Per Split function */
232 static void run_examine (struct cmd_examine *, struct casereader *,
235 static void output_examine (const struct dictionary *dict);
238 void factor_calc (const struct ccase *c, int case_no,
239 double weight, bool case_missing);
242 /* Represent a factor as a string, so it can be
243 printed in a human readable fashion */
244 static void factor_to_string (const struct xfactor *fctr,
245 const struct factor_result *result,
248 /* Represent a factor as a string, so it can be
249 printed in a human readable fashion,
250 but sacrificing some readablility for the sake of brevity */
252 factor_to_string_concise (const struct xfactor *fctr,
253 const struct factor_result *result,
259 /* Categories of missing values to exclude. */
260 static enum mv_class exclude_values;
263 cmd_examine (struct lexer *lexer, struct dataset *ds)
265 struct casegrouper *grouper;
266 struct casereader *group;
269 subc_list_double_create (&percentile_list);
270 percentile_algorithm = PC_HAVERAGE;
272 ll_init (&factor_list);
274 if ( !parse_examine (lexer, ds, &cmd, NULL) )
276 subc_list_double_destroy (&percentile_list);
280 /* If /MISSING=INCLUDE is set, then user missing values are ignored */
281 exclude_values = cmd.incl == XMN_INCLUDE ? MV_SYSTEM : MV_ANY;
283 if ( cmd.st_n == SYSMIS )
286 if ( ! cmd.sbc_cinterval)
287 cmd.n_cinterval[0] = 95.0;
289 /* If descriptives have been requested, make sure the
290 quartiles are calculated */
291 if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES] )
293 subc_list_double_push (&percentile_list, 25);
294 subc_list_double_push (&percentile_list, 50);
295 subc_list_double_push (&percentile_list, 75);
298 grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds));
300 while (casegrouper_get_next_group (grouper, &group))
302 struct casereader *reader =
303 casereader_create_arithmetic_sequence (group, 1, 1);
305 run_examine (&cmd, reader, ds);
308 ok = casegrouper_destroy (grouper);
309 ok = proc_commit (ds) && ok;
311 if ( dependent_vars )
312 free (dependent_vars);
314 subc_list_double_destroy (&percentile_list);
316 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
320 /* Plot the normal and detrended normal plots for RESULT.
321 Label the plots with LABEL */
323 np_plot (struct np *np, const char *label)
325 double yfirst = 0, ylast = 0;
332 struct chart *np_chart;
334 /* Detrended Normal Plot */
335 struct chart *dnp_chart;
337 /* The slope and intercept of the ideal normal probability line */
338 const double slope = 1.0 / np->stddev;
339 const double intercept = -np->mean / np->stddev;
343 msg (MW, _("Not creating plot because data set is empty."));
347 np_chart = chart_create ();
348 dnp_chart = chart_create ();
350 if ( !np_chart || ! dnp_chart )
353 chart_write_title (np_chart, _("Normal Q-Q Plot of %s"), label);
354 chart_write_xlabel (np_chart, _("Observed Value"));
355 chart_write_ylabel (np_chart, _("Expected Normal"));
357 chart_write_title (dnp_chart, _("Detrended Normal Q-Q Plot of %s"),
359 chart_write_xlabel (dnp_chart, _("Observed Value"));
360 chart_write_ylabel (dnp_chart, _("Dev from Normal"));
362 yfirst = gsl_cdf_ugaussian_Pinv (1 / (np->n + 1));
363 ylast = gsl_cdf_ugaussian_Pinv (np->n / (np->n + 1));
365 /* Need to make sure that both the scatter plot and the ideal fit into the
367 x_lower = MIN (np->y_min, (yfirst - intercept) / slope) ;
368 x_upper = MAX (np->y_max, (ylast - intercept) / slope) ;
369 slack = (x_upper - x_lower) * 0.05 ;
371 chart_write_xscale (np_chart, x_lower - slack, x_upper + slack, 5);
372 chart_write_xscale (dnp_chart, np->y_min, np->y_max, 5);
374 chart_write_yscale (np_chart, yfirst, ylast, 5);
375 chart_write_yscale (dnp_chart, np->dns_min, np->dns_max, 5);
378 struct casereader *reader = casewriter_make_reader (np->writer);
380 while ((c = casereader_read (reader)) != NULL)
382 chart_datum (np_chart, 0, case_data_idx (c, NP_IDX_Y)->f, case_data_idx (c, NP_IDX_NS)->f);
383 chart_datum (dnp_chart, 0, case_data_idx (c, NP_IDX_Y)->f, case_data_idx (c, NP_IDX_DNS)->f);
387 casereader_destroy (reader);
390 chart_line (dnp_chart, 0, 0, np->y_min, np->y_max , CHART_DIM_X);
391 chart_line (np_chart, slope, intercept, yfirst, ylast , CHART_DIM_Y);
393 chart_submit (np_chart);
394 chart_submit (dnp_chart);
399 show_npplot (const struct variable **dependent_var,
401 const struct xfactor *fctr)
405 for (v = 0; v < n_dep_var; ++v)
408 for (ll = ll_head (&fctr->result_list);
409 ll != ll_null (&fctr->result_list);
413 const struct factor_result *result =
414 ll_data (ll, struct factor_result, ll);
416 ds_init_empty (&str);
417 ds_put_format (&str, "%s ", var_get_name (dependent_var[v]));
419 factor_to_string (fctr, result, &str);
421 np_plot ((struct np*) result->metrics[v].np, ds_cstr(&str));
423 statistic_destroy ((struct statistic *)result->metrics[v].np);
432 show_histogram (const struct variable **dependent_var,
434 const struct xfactor *fctr)
438 for (v = 0; v < n_dep_var; ++v)
441 for (ll = ll_head (&fctr->result_list);
442 ll != ll_null (&fctr->result_list);
446 const struct factor_result *result =
447 ll_data (ll, struct factor_result, ll);
449 ds_init_empty (&str);
450 ds_put_format (&str, "%s ", var_get_name (dependent_var[v]));
452 factor_to_string (fctr, result, &str);
454 histogram_plot ((struct histogram *) result->metrics[v].histogram,
456 (struct moments1 *) result->metrics[v].moments);
466 show_boxplot_groups (const struct variable **dependent_var,
468 const struct xfactor *fctr)
472 for (v = 0; v < n_dep_var; ++v)
476 struct chart *ch = chart_create ();
477 double y_min = DBL_MAX;
478 double y_max = -DBL_MAX;
480 for (ll = ll_head (&fctr->result_list);
481 ll != ll_null (&fctr->result_list);
484 const struct extremum *max, *min;
485 const struct factor_result *result =
486 ll_data (ll, struct factor_result, ll);
488 const struct ll_list *max_list =
489 extrema_list (result->metrics[v].maxima);
491 const struct ll_list *min_list =
492 extrema_list (result->metrics[v].minima);
494 if ( ll_is_empty (max_list))
496 msg (MW, _("Not creating plot because data set is empty."));
500 max = (const struct extremum *)
501 ll_data (ll_head(max_list), struct extremum, ll);
503 min = (const struct extremum *)
504 ll_data (ll_head (min_list), struct extremum, ll);
506 y_max = MAX (y_max, max->value);
507 y_min = MIN (y_min, min->value);
510 boxplot_draw_yscale (ch, y_max, y_min);
512 if ( fctr->indep_var[0])
513 chart_write_title (ch, _("Boxplot of %s vs. %s"),
514 var_to_string (dependent_var[v]),
515 var_to_string (fctr->indep_var[0]) );
517 chart_write_title (ch, _("Boxplot of %s"),
518 var_to_string (dependent_var[v]));
520 for (ll = ll_head (&fctr->result_list);
521 ll != ll_null (&fctr->result_list);
524 const struct factor_result *result =
525 ll_data (ll, struct factor_result, ll);
528 const double box_width = (ch->data_right - ch->data_left)
529 / (ll_count (&fctr->result_list) * 2.0 ) ;
531 const double box_centre = (f++ * 2 + 1) * box_width + ch->data_left;
533 ds_init_empty (&str);
534 factor_to_string_concise (fctr, result, &str);
536 boxplot_draw_boxplot (ch,
537 box_centre, box_width,
538 (const struct box_whisker *)
539 result->metrics[v].box_whisker,
552 show_boxplot_variables (const struct variable **dependent_var,
554 const struct xfactor *fctr
560 const struct ll_list *result_list = &fctr->result_list;
562 for (ll = ll_head (result_list);
563 ll != ll_null (result_list);
568 struct chart *ch = chart_create ();
569 double y_min = DBL_MAX;
570 double y_max = -DBL_MAX;
572 const struct factor_result *result =
573 ll_data (ll, struct factor_result, ll);
575 const double box_width = (ch->data_right - ch->data_left)
576 / (n_dep_var * 2.0 ) ;
578 for (v = 0; v < n_dep_var; ++v)
580 const struct ll *max_ll =
581 ll_head (extrema_list (result->metrics[v].maxima));
582 const struct ll *min_ll =
583 ll_head (extrema_list (result->metrics[v].minima));
585 const struct extremum *max =
586 (const struct extremum *) ll_data (max_ll, struct extremum, ll);
588 const struct extremum *min =
589 (const struct extremum *) ll_data (min_ll, struct extremum, ll);
591 y_max = MAX (y_max, max->value);
592 y_min = MIN (y_min, min->value);
596 boxplot_draw_yscale (ch, y_max, y_min);
598 ds_init_empty (&title);
599 factor_to_string (fctr, result, &title);
602 ds_put_format (&title, "%s = ", var_get_name (fctr->indep_var[0]));
603 var_append_value_name (fctr->indep_var[0], result->value[0], &title);
606 chart_write_title (ch, ds_cstr (&title));
609 for (v = 0; v < n_dep_var; ++v)
612 const double box_centre = (v * 2 + 1) * box_width + ch->data_left;
614 ds_init_empty (&str);
615 ds_init_cstr (&str, var_get_name (dependent_var[v]));
617 boxplot_draw_boxplot (ch,
618 box_centre, box_width,
619 (const struct box_whisker *) result->metrics[v].box_whisker,
630 /* Show all the appropriate tables */
632 output_examine (const struct dictionary *dict)
636 show_summary (dependent_vars, n_dependent_vars, dict, &level0_factor);
638 if ( cmd.a_statistics[XMN_ST_EXTREME] )
639 show_extremes (dependent_vars, n_dependent_vars, &level0_factor);
641 if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES] )
642 show_descriptives (dependent_vars, n_dependent_vars, &level0_factor);
644 if ( cmd.sbc_percentiles)
645 show_percentiles (dependent_vars, n_dependent_vars, &level0_factor);
649 if (cmd.a_plot[XMN_PLT_BOXPLOT])
650 show_boxplot_groups (dependent_vars, n_dependent_vars, &level0_factor);
652 if (cmd.a_plot[XMN_PLT_HISTOGRAM])
653 show_histogram (dependent_vars, n_dependent_vars, &level0_factor);
655 if (cmd.a_plot[XMN_PLT_NPPLOT])
656 show_npplot (dependent_vars, n_dependent_vars, &level0_factor);
659 for (ll = ll_head (&factor_list);
660 ll != ll_null (&factor_list); ll = ll_next (ll))
662 struct xfactor *factor = ll_data (ll, struct xfactor, ll);
663 show_summary (dependent_vars, n_dependent_vars, dict, factor);
665 if ( cmd.a_statistics[XMN_ST_EXTREME] )
666 show_extremes (dependent_vars, n_dependent_vars, factor);
668 if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES] )
669 show_descriptives (dependent_vars, n_dependent_vars, factor);
671 if ( cmd.sbc_percentiles)
672 show_percentiles (dependent_vars, n_dependent_vars, factor);
674 if (cmd.a_plot[XMN_PLT_BOXPLOT] &&
675 cmd.cmp == XMN_GROUPS)
676 show_boxplot_groups (dependent_vars, n_dependent_vars, factor);
679 if (cmd.a_plot[XMN_PLT_BOXPLOT] &&
680 cmd.cmp == XMN_VARIABLES)
681 show_boxplot_variables (dependent_vars, n_dependent_vars,
684 if (cmd.a_plot[XMN_PLT_HISTOGRAM])
685 show_histogram (dependent_vars, n_dependent_vars, factor);
687 if (cmd.a_plot[XMN_PLT_NPPLOT])
688 show_npplot (dependent_vars, n_dependent_vars, factor);
692 /* Parse the PERCENTILES subcommand */
694 xmn_custom_percentiles (struct lexer *lexer, struct dataset *ds UNUSED,
695 struct cmd_examine *p UNUSED, void *aux UNUSED)
697 lex_match (lexer, '=');
699 lex_match (lexer, '(');
701 while ( lex_is_number (lexer) )
703 subc_list_double_push (&percentile_list, lex_number (lexer));
707 lex_match (lexer, ',') ;
709 lex_match (lexer, ')');
711 lex_match (lexer, '=');
713 if ( lex_match_id (lexer, "HAVERAGE"))
714 percentile_algorithm = PC_HAVERAGE;
716 else if ( lex_match_id (lexer, "WAVERAGE"))
717 percentile_algorithm = PC_WAVERAGE;
719 else if ( lex_match_id (lexer, "ROUND"))
720 percentile_algorithm = PC_ROUND;
722 else if ( lex_match_id (lexer, "EMPIRICAL"))
723 percentile_algorithm = PC_EMPIRICAL;
725 else if ( lex_match_id (lexer, "AEMPIRICAL"))
726 percentile_algorithm = PC_AEMPIRICAL;
728 else if ( lex_match_id (lexer, "NONE"))
729 percentile_algorithm = PC_NONE;
732 if ( 0 == subc_list_double_count (&percentile_list))
734 subc_list_double_push (&percentile_list, 5);
735 subc_list_double_push (&percentile_list, 10);
736 subc_list_double_push (&percentile_list, 25);
737 subc_list_double_push (&percentile_list, 50);
738 subc_list_double_push (&percentile_list, 75);
739 subc_list_double_push (&percentile_list, 90);
740 subc_list_double_push (&percentile_list, 95);
746 /* TOTAL and NOTOTAL are simple, mutually exclusive flags */
748 xmn_custom_total (struct lexer *lexer UNUSED, struct dataset *ds UNUSED,
749 struct cmd_examine *p, void *aux UNUSED)
751 if ( p->sbc_nototal )
753 msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL");
761 xmn_custom_nototal (struct lexer *lexer UNUSED, struct dataset *ds UNUSED,
762 struct cmd_examine *p, void *aux UNUSED)
766 msg (SE, _("%s and %s are mutually exclusive"), "TOTAL", "NOTOTAL");
775 /* Parser for the variables sub command
776 Returns 1 on success */
778 xmn_custom_variables (struct lexer *lexer, struct dataset *ds,
779 struct cmd_examine *cmd,
782 const struct dictionary *dict = dataset_dict (ds);
783 lex_match (lexer, '=');
785 if ( (lex_token (lexer) != T_ID || dict_lookup_var (dict, lex_tokid (lexer)) == NULL)
786 && lex_token (lexer) != T_ALL)
791 if (!parse_variables_const (lexer, dict, &dependent_vars, &n_dependent_vars,
792 PV_NO_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH) )
794 free (dependent_vars);
798 assert (n_dependent_vars);
801 if ( lex_match (lexer, T_BY))
804 success = examine_parse_independent_vars (lexer, dict, cmd);
807 free (dependent_vars);
817 /* Parse the clause specifying the factors */
819 examine_parse_independent_vars (struct lexer *lexer,
820 const struct dictionary *dict,
821 struct cmd_examine *cmd)
824 struct xfactor *sf = xmalloc (sizeof *sf);
826 ll_init (&sf->result_list);
828 if ( (lex_token (lexer) != T_ID ||
829 dict_lookup_var (dict, lex_tokid (lexer)) == NULL)
830 && lex_token (lexer) != T_ALL)
836 sf->indep_var[0] = parse_variable (lexer, dict);
837 sf->indep_var[1] = NULL;
839 if ( lex_token (lexer) == T_BY )
841 lex_match (lexer, T_BY);
843 if ( (lex_token (lexer) != T_ID ||
844 dict_lookup_var (dict, lex_tokid (lexer)) == NULL)
845 && lex_token (lexer) != T_ALL)
851 sf->indep_var[1] = parse_variable (lexer, dict);
853 ll_push_tail (&factor_list, &sf->ll);
856 ll_push_tail (&factor_list, &sf->ll);
858 lex_match (lexer, ',');
860 if ( lex_token (lexer) == '.' || lex_token (lexer) == '/' )
863 success = examine_parse_independent_vars (lexer, dict, cmd);
872 examine_group (struct cmd_examine *cmd, struct casereader *reader, int level,
873 const struct dictionary *dict, struct xfactor *factor)
876 const struct variable *wv = dict_get_weight (dict);
879 struct factor_result *result = xzalloc (sizeof (*result));
881 result->metrics = xcalloc (n_dependent_vars, sizeof (*result->metrics));
883 if ( cmd->a_statistics[XMN_ST_EXTREME] )
884 n_extrema = cmd->st_n;
887 c = casereader_peek (reader, 0);
893 value_dup (case_data (c, factor->indep_var[0]),
894 var_get_width (factor->indep_var[0]));
898 value_dup (case_data (c, factor->indep_var[1]),
899 var_get_width (factor->indep_var[1]));
904 for (v = 0; v < n_dependent_vars; ++v)
906 struct casewriter *writer;
907 struct casereader *input = casereader_clone (reader);
909 result->metrics[v].moments = moments1_create (MOMENT_KURTOSIS);
910 result->metrics[v].minima = extrema_create (n_extrema, EXTREME_MINIMA);
911 result->metrics[v].maxima = extrema_create (n_extrema, EXTREME_MAXIMA);
912 result->metrics[v].cmin = DBL_MAX;
914 if (cmd->a_statistics[XMN_ST_DESCRIPTIVES] ||
915 cmd->a_plot[XMN_PLT_BOXPLOT] ||
916 cmd->a_plot[XMN_PLT_NPPLOT] ||
917 cmd->sbc_percentiles)
919 /* In this case, we need to sort the data, so we create a sorting
921 struct subcase up_ordering;
922 subcase_init_var (&up_ordering, dependent_vars[v], SC_ASCEND);
923 writer = sort_create_writer (&up_ordering,
924 casereader_get_value_cnt (reader));
925 subcase_destroy (&up_ordering);
929 /* but in this case, sorting is unnecessary, so an ordinary
930 casewriter is sufficient */
932 autopaging_writer_create (casereader_get_value_cnt (reader));
936 /* Sort or just iterate, whilst calculating moments etc */
937 while ((c = casereader_read (input)) != NULL)
939 const casenumber loc =
940 case_data_idx (c, casereader_get_value_cnt (reader) - 1)->f;
942 const double weight = wv ? case_data (c, wv)->f : 1.0;
943 const union value *value = case_data (c, dependent_vars[v]);
945 if (weight != SYSMIS)
946 minimize (&result->metrics[v].cmin, weight);
948 moments1_add (result->metrics[v].moments,
952 result->metrics[v].n += weight;
954 if ( ! var_is_value_missing (dependent_vars[v], value, MV_ANY) )
955 result->metrics[v].n_valid += weight;
957 extrema_add (result->metrics[v].maxima,
962 extrema_add (result->metrics[v].minima,
967 casewriter_write (writer, c);
969 casereader_destroy (input);
970 result->metrics[v].up_reader = casewriter_make_reader (writer);
973 /* If percentiles or descriptives have been requested, then a
974 second pass through the data (which has now been sorted)
976 if ( cmd->a_statistics[XMN_ST_DESCRIPTIVES] ||
977 cmd->a_plot[XMN_PLT_BOXPLOT] ||
978 cmd->a_plot[XMN_PLT_NPPLOT] ||
979 cmd->sbc_percentiles)
981 for (v = 0; v < n_dependent_vars; ++v)
985 struct order_stats **os ;
986 struct factor_metrics *metric = &result->metrics[v];
988 metric->n_ptiles = percentile_list.n_data;
990 metric->ptl = xcalloc (metric->n_ptiles,
991 sizeof (struct percentile *));
993 metric->quartiles = xcalloc (3, sizeof (*metric->quartiles));
995 for (i = 0 ; i < metric->n_ptiles; ++i)
997 metric->ptl[i] = (struct percentile *)
998 percentile_create (percentile_list.data[i] / 100.0, metric->n_valid);
1000 if ( percentile_list.data[i] == 25)
1001 metric->quartiles[0] = metric->ptl[i];
1002 else if ( percentile_list.data[i] == 50)
1003 metric->quartiles[1] = metric->ptl[i];
1004 else if ( percentile_list.data[i] == 75)
1005 metric->quartiles[2] = metric->ptl[i];
1008 metric->tukey_hinges = tukey_hinges_create (metric->n, metric->cmin);
1009 metric->trimmed_mean = trimmed_mean_create (metric->n, 0.05);
1011 n_os = metric->n_ptiles + 2;
1013 if ( cmd->a_plot[XMN_PLT_NPPLOT] )
1015 metric->np = np_create (metric->moments);
1019 os = xcalloc (sizeof (struct order_stats *), n_os);
1021 for (i = 0 ; i < metric->n_ptiles ; ++i )
1023 os[i] = (struct order_stats *) metric->ptl[i];
1026 os[i] = (struct order_stats *) metric->tukey_hinges;
1027 os[i+1] = (struct order_stats *) metric->trimmed_mean;
1029 if (cmd->a_plot[XMN_PLT_NPPLOT])
1030 os[i+2] = metric->np;
1032 order_stats_accumulate (os, n_os,
1033 casereader_clone (metric->up_reader),
1034 wv, dependent_vars[v], MV_ANY);
1039 /* FIXME: Do this in the above loop */
1040 if ( cmd->a_plot[XMN_PLT_HISTOGRAM] )
1043 struct casereader *input = casereader_clone (reader);
1045 for (v = 0; v < n_dependent_vars; ++v)
1047 const struct extremum *max, *min;
1048 struct factor_metrics *metric = &result->metrics[v];
1050 const struct ll_list *max_list =
1051 extrema_list (result->metrics[v].maxima);
1053 const struct ll_list *min_list =
1054 extrema_list (result->metrics[v].minima);
1056 if ( ll_is_empty (max_list))
1058 msg (MW, _("Not creating plot because data set is empty."));
1062 assert (! ll_is_empty (min_list));
1064 max = (const struct extremum *)
1065 ll_data (ll_head(max_list), struct extremum, ll);
1067 min = (const struct extremum *)
1068 ll_data (ll_head (min_list), struct extremum, ll);
1070 metric->histogram = histogram_create (10, min->value, max->value);
1073 while ((c = casereader_read (input)) != NULL)
1075 const double weight = wv ? case_data (c, wv)->f : 1.0;
1077 for (v = 0; v < n_dependent_vars; ++v)
1079 struct factor_metrics *metric = &result->metrics[v];
1080 if ( metric->histogram)
1081 histogram_add ((struct histogram *) metric->histogram,
1082 case_data (c, dependent_vars[v])->f, weight);
1086 casereader_destroy (input);
1089 /* In this case, a third iteration is required */
1090 if (cmd->a_plot[XMN_PLT_BOXPLOT])
1092 for (v = 0; v < n_dependent_vars; ++v)
1094 struct factor_metrics *metric = &result->metrics[v];
1096 metric->box_whisker =
1097 box_whisker_create ((struct tukey_hinges *) metric->tukey_hinges,
1099 casereader_get_value_cnt (metric->up_reader)
1102 order_stats_accumulate ((struct order_stats **) &metric->box_whisker,
1104 casereader_clone (metric->up_reader),
1105 wv, dependent_vars[v], MV_ANY);
1109 ll_push_tail (&factor->result_list, &result->ll);
1110 casereader_destroy (reader);
1115 run_examine (struct cmd_examine *cmd, struct casereader *input,
1119 const struct dictionary *dict = dataset_dict (ds);
1121 struct casereader *level0 = casereader_clone (input);
1123 c = casereader_peek (input, 0);
1126 casereader_destroy (input);
1130 output_split_file_values (ds, c);
1133 ll_init (&level0_factor.result_list);
1135 examine_group (cmd, level0, 0, dict, &level0_factor);
1137 for (ll = ll_head (&factor_list);
1138 ll != ll_null (&factor_list);
1141 struct xfactor *factor = ll_data (ll, struct xfactor, ll);
1143 struct casereader *group = NULL;
1144 struct casereader *level1;
1145 struct casegrouper *grouper1 = NULL;
1147 level1 = casereader_clone (input);
1148 level1 = sort_execute_1var (level1, factor->indep_var[0]);
1149 grouper1 = casegrouper_create_vars (level1, &factor->indep_var[0], 1);
1151 while (casegrouper_get_next_group (grouper1, &group))
1153 struct casereader *group_copy = casereader_clone (group);
1155 if ( !factor->indep_var[1])
1156 examine_group (cmd, group_copy, 1, dict, factor);
1160 struct casereader *group2 = NULL;
1161 struct casegrouper *grouper2 = NULL;
1163 group_copy = sort_execute_1var (group_copy,
1164 factor->indep_var[1]);
1166 grouper2 = casegrouper_create_vars (group_copy,
1167 &factor->indep_var[1], 1);
1169 while (casegrouper_get_next_group (grouper2, &group2))
1171 examine_group (cmd, group2, 2, dict, factor);
1174 casegrouper_destroy (grouper2);
1177 casereader_destroy (group);
1179 casegrouper_destroy (grouper1);
1182 casereader_destroy (input);
1184 output_examine (dict);
1186 factor_destroy (&level0_factor);
1190 for (ll = ll_head (&factor_list);
1191 ll != ll_null (&factor_list);
1194 struct xfactor *f = ll_data (ll, struct xfactor, ll);
1203 show_summary (const struct variable **dependent_var, int n_dep_var,
1204 const struct dictionary *dict,
1205 const struct xfactor *fctr)
1207 const struct variable *wv = dict_get_weight (dict);
1208 const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0;
1210 static const char *subtitle[]=
1218 int heading_columns = 1;
1220 const int heading_rows = 3;
1221 struct tab_table *tbl;
1228 if ( fctr->indep_var[0] )
1230 heading_columns = 2;
1232 if ( fctr->indep_var[1] )
1234 heading_columns = 3;
1238 n_rows *= ll_count (&fctr->result_list);
1239 n_rows += heading_rows;
1241 n_cols = heading_columns + 6;
1243 tbl = tab_create (n_cols, n_rows, 0);
1244 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
1246 tab_dim (tbl, tab_natural_dimensions);
1248 /* Outline the box */
1253 n_cols - 1, n_rows - 1);
1255 /* Vertical lines for the data only */
1260 n_cols - 1, n_rows - 1);
1263 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
1264 tab_hline (tbl, TAL_1, heading_columns, n_cols - 1, 1 );
1265 tab_hline (tbl, TAL_1, heading_columns, n_cols - 1, heading_rows -1 );
1267 tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
1270 tab_title (tbl, _("Case Processing Summary"));
1272 tab_joint_text (tbl, heading_columns, 0,
1274 TAB_CENTER | TAT_TITLE,
1277 /* Remove lines ... */
1284 for (j = 0 ; j < 3 ; ++j)
1286 tab_text (tbl, heading_columns + j * 2 , 2, TAB_CENTER | TAT_TITLE,
1289 tab_text (tbl, heading_columns + j * 2 + 1, 2, TAB_CENTER | TAT_TITLE,
1292 tab_joint_text (tbl, heading_columns + j * 2 , 1,
1293 heading_columns + j * 2 + 1, 1,
1294 TAB_CENTER | TAT_TITLE,
1297 tab_box (tbl, -1, -1,
1299 heading_columns + j * 2, 1,
1300 heading_columns + j * 2 + 1, 1);
1304 /* Titles for the independent variables */
1305 if ( fctr->indep_var[0] )
1307 tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
1308 var_to_string (fctr->indep_var[0]));
1310 if ( fctr->indep_var[1] )
1312 tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
1313 var_to_string (fctr->indep_var[1]));
1317 for (v = 0 ; v < n_dep_var ; ++v)
1321 union value *last_value = NULL;
1324 tab_hline (tbl, TAL_1, 0, n_cols -1 ,
1325 v * ll_count (&fctr->result_list)
1330 v * ll_count (&fctr->result_list) + heading_rows,
1331 TAB_LEFT | TAT_TITLE,
1332 var_to_string (dependent_var[v])
1336 for (ll = ll_head (&fctr->result_list);
1337 ll != ll_null (&fctr->result_list); ll = ll_next (ll))
1340 const struct factor_result *result =
1341 ll_data (ll, struct factor_result, ll);
1343 if ( fctr->indep_var[0] )
1346 if ( last_value == NULL ||
1347 compare_values_short (last_value, result->value[0],
1348 fctr->indep_var[0]))
1352 last_value = result->value[0];
1353 ds_init_empty (&str);
1355 var_append_value_name (fctr->indep_var[0], result->value[0],
1360 v * ll_count (&fctr->result_list),
1361 TAB_LEFT | TAT_TITLE,
1366 if ( fctr->indep_var[1] && j > 0)
1367 tab_hline (tbl, TAL_1, 1, n_cols - 1,
1369 v * ll_count (&fctr->result_list));
1372 if ( fctr->indep_var[1])
1376 ds_init_empty (&str);
1378 var_append_value_name (fctr->indep_var[1],
1379 result->value[1], &str);
1383 v * ll_count (&fctr->result_list),
1384 TAB_LEFT | TAT_TITLE,
1392 moments1_calculate (result->metrics[v].moments,
1393 &n, &result->metrics[v].mean,
1394 &result->metrics[v].variance,
1395 &result->metrics[v].skewness,
1396 &result->metrics[v].kurtosis);
1398 result->metrics[v].se_mean = sqrt (result->metrics[v].variance / n) ;
1401 tab_double (tbl, heading_columns,
1402 heading_rows + j + v * ll_count (&fctr->result_list),
1406 tab_text (tbl, heading_columns + 1,
1407 heading_rows + j + v * ll_count (&fctr->result_list),
1408 TAB_RIGHT | TAT_PRINTF,
1409 "%g%%", n * 100.0 / result->metrics[v].n);
1412 tab_double (tbl, heading_columns + 2,
1413 heading_rows + j + v * ll_count (&fctr->result_list),
1415 result->metrics[v].n - n,
1418 tab_text (tbl, heading_columns + 3,
1419 heading_rows + j + v * ll_count (&fctr->result_list),
1420 TAB_RIGHT | TAT_PRINTF,
1422 (result->metrics[v].n - n) * 100.0 / result->metrics[v].n
1425 /* Total Valid + Missing */
1426 tab_double (tbl, heading_columns + 4,
1427 heading_rows + j + v * ll_count (&fctr->result_list),
1429 result->metrics[v].n,
1432 tab_text (tbl, heading_columns + 5,
1433 heading_rows + j + v * ll_count (&fctr->result_list),
1434 TAB_RIGHT | TAT_PRINTF,
1436 (result->metrics[v].n) * 100.0 / result->metrics[v].n
1447 #define DESCRIPTIVE_ROWS 13
1450 show_descriptives (const struct variable **dependent_var,
1452 const struct xfactor *fctr)
1455 int heading_columns = 3;
1457 const int heading_rows = 1;
1458 struct tab_table *tbl;
1465 if ( fctr->indep_var[0] )
1467 heading_columns = 4;
1469 if ( fctr->indep_var[1] )
1471 heading_columns = 5;
1475 n_rows *= ll_count (&fctr->result_list) * DESCRIPTIVE_ROWS;
1476 n_rows += heading_rows;
1478 n_cols = heading_columns + 2;
1480 tbl = tab_create (n_cols, n_rows, 0);
1481 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
1483 tab_dim (tbl, tab_natural_dimensions);
1485 /* Outline the box */
1490 n_cols - 1, n_rows - 1);
1493 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
1494 tab_hline (tbl, TAL_2, 1, n_cols - 1, heading_rows );
1496 tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows - 1);
1499 if ( fctr->indep_var[0])
1500 tab_text (tbl, 1, 0, TAT_TITLE, var_to_string (fctr->indep_var[0]));
1502 if ( fctr->indep_var[1])
1503 tab_text (tbl, 2, 0, TAT_TITLE, var_to_string (fctr->indep_var[1]));
1505 for (v = 0 ; v < n_dep_var ; ++v )
1510 const int row_var_start =
1511 v * DESCRIPTIVE_ROWS * ll_count(&fctr->result_list);
1515 heading_rows + row_var_start,
1516 TAB_LEFT | TAT_TITLE,
1517 var_to_string (dependent_var[v])
1520 for (ll = ll_head (&fctr->result_list);
1521 ll != ll_null (&fctr->result_list); i++, ll = ll_next (ll))
1523 const struct factor_result *result =
1524 ll_data (ll, struct factor_result, ll);
1527 gsl_cdf_tdist_Qinv ((1 - cmd.n_cinterval[0] / 100.0) / 2.0,
1528 result->metrics[v].n - 1);
1530 if ( i > 0 || v > 0 )
1532 const int left_col = (i == 0) ? 0 : 1;
1533 tab_hline (tbl, TAL_1, left_col, n_cols - 1,
1534 heading_rows + row_var_start + i * DESCRIPTIVE_ROWS);
1537 if ( fctr->indep_var[0])
1540 ds_init_empty (&vstr);
1541 var_append_value_name (fctr->indep_var[0],
1542 result->value[0], &vstr);
1545 heading_rows + row_var_start + i * DESCRIPTIVE_ROWS,
1554 tab_text (tbl, n_cols - 4,
1555 heading_rows + row_var_start + i * DESCRIPTIVE_ROWS,
1559 tab_text (tbl, n_cols - 4,
1560 heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS,
1561 TAB_LEFT | TAT_PRINTF,
1562 _("%g%% Confidence Interval for Mean"),
1563 cmd.n_cinterval[0]);
1565 tab_text (tbl, n_cols - 3,
1566 heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS,
1570 tab_text (tbl, n_cols - 3,
1571 heading_rows + row_var_start + 2 + i * DESCRIPTIVE_ROWS,
1575 tab_text (tbl, n_cols - 4,
1576 heading_rows + row_var_start + 3 + i * DESCRIPTIVE_ROWS,
1577 TAB_LEFT | TAT_PRINTF,
1578 _("5%% Trimmed Mean"));
1580 tab_text (tbl, n_cols - 4,
1581 heading_rows + row_var_start + 4 + i * DESCRIPTIVE_ROWS,
1585 tab_text (tbl, n_cols - 4,
1586 heading_rows + row_var_start + 5 + i * DESCRIPTIVE_ROWS,
1590 tab_text (tbl, n_cols - 4,
1591 heading_rows + row_var_start + 6 + i * DESCRIPTIVE_ROWS,
1593 _("Std. Deviation"));
1595 tab_text (tbl, n_cols - 4,
1596 heading_rows + row_var_start + 7 + i * DESCRIPTIVE_ROWS,
1600 tab_text (tbl, n_cols - 4,
1601 heading_rows + row_var_start + 8 + i * DESCRIPTIVE_ROWS,
1605 tab_text (tbl, n_cols - 4,
1606 heading_rows + row_var_start + 9 + i * DESCRIPTIVE_ROWS,
1610 tab_text (tbl, n_cols - 4,
1611 heading_rows + row_var_start + 10 + i * DESCRIPTIVE_ROWS,
1613 _("Interquartile Range"));
1616 tab_text (tbl, n_cols - 4,
1617 heading_rows + row_var_start + 11 + i * DESCRIPTIVE_ROWS,
1621 tab_text (tbl, n_cols - 4,
1622 heading_rows + row_var_start + 12 + i * DESCRIPTIVE_ROWS,
1627 /* Now the statistics ... */
1629 tab_double (tbl, n_cols - 2,
1630 heading_rows + row_var_start + i * DESCRIPTIVE_ROWS,
1632 result->metrics[v].mean,
1635 tab_double (tbl, n_cols - 1,
1636 heading_rows + row_var_start + i * DESCRIPTIVE_ROWS,
1638 result->metrics[v].se_mean,
1642 tab_double (tbl, n_cols - 2,
1643 heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS,
1645 result->metrics[v].mean - t *
1646 result->metrics[v].se_mean,
1649 tab_double (tbl, n_cols - 2,
1650 heading_rows + row_var_start + 2 + i * DESCRIPTIVE_ROWS,
1652 result->metrics[v].mean + t *
1653 result->metrics[v].se_mean,
1657 tab_double (tbl, n_cols - 2,
1658 heading_rows + row_var_start + 3 + i * DESCRIPTIVE_ROWS,
1660 trimmed_mean_calculate ((struct trimmed_mean *) result->metrics[v].trimmed_mean),
1664 tab_double (tbl, n_cols - 2,
1665 heading_rows + row_var_start + 4 + i * DESCRIPTIVE_ROWS,
1667 percentile_calculate (result->metrics[v].quartiles[1], percentile_algorithm),
1671 tab_double (tbl, n_cols - 2,
1672 heading_rows + row_var_start + 5 + i * DESCRIPTIVE_ROWS,
1674 result->metrics[v].variance,
1677 tab_double (tbl, n_cols - 2,
1678 heading_rows + row_var_start + 6 + i * DESCRIPTIVE_ROWS,
1680 sqrt (result->metrics[v].variance),
1683 tab_double (tbl, n_cols - 2,
1684 heading_rows + row_var_start + 10 + i * DESCRIPTIVE_ROWS,
1686 percentile_calculate (result->metrics[v].quartiles[2],
1687 percentile_algorithm) -
1688 percentile_calculate (result->metrics[v].quartiles[0],
1689 percentile_algorithm),
1693 tab_double (tbl, n_cols - 2,
1694 heading_rows + row_var_start + 11 + i * DESCRIPTIVE_ROWS,
1696 result->metrics[v].skewness,
1699 tab_double (tbl, n_cols - 2,
1700 heading_rows + row_var_start + 12 + i * DESCRIPTIVE_ROWS,
1702 result->metrics[v].kurtosis,
1705 tab_double (tbl, n_cols - 1,
1706 heading_rows + row_var_start + 11 + i * DESCRIPTIVE_ROWS,
1708 calc_seskew (result->metrics[v].n),
1711 tab_double (tbl, n_cols - 1,
1712 heading_rows + row_var_start + 12 + i * DESCRIPTIVE_ROWS,
1714 calc_sekurt (result->metrics[v].n),
1718 struct extremum *minimum, *maximum ;
1720 struct ll *max_ll = ll_head (extrema_list (result->metrics[v].maxima));
1721 struct ll *min_ll = ll_head (extrema_list (result->metrics[v].minima));
1723 maximum = ll_data (max_ll, struct extremum, ll);
1724 minimum = ll_data (min_ll, struct extremum, ll);
1726 tab_double (tbl, n_cols - 2,
1727 heading_rows + row_var_start + 7 + i * DESCRIPTIVE_ROWS,
1732 tab_double (tbl, n_cols - 2,
1733 heading_rows + row_var_start + 8 + i * DESCRIPTIVE_ROWS,
1738 tab_double (tbl, n_cols - 2,
1739 heading_rows + row_var_start + 9 + i * DESCRIPTIVE_ROWS,
1741 maximum->value - minimum->value,
1747 tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
1749 tab_title (tbl, _("Descriptives"));
1751 tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE,
1754 tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE,
1763 show_extremes (const struct variable **dependent_var,
1765 const struct xfactor *fctr)
1768 int heading_columns = 3;
1770 const int heading_rows = 1;
1771 struct tab_table *tbl;
1778 if ( fctr->indep_var[0] )
1780 heading_columns = 4;
1782 if ( fctr->indep_var[1] )
1784 heading_columns = 5;
1788 n_rows *= ll_count (&fctr->result_list) * cmd.st_n * 2;
1789 n_rows += heading_rows;
1791 n_cols = heading_columns + 2;
1793 tbl = tab_create (n_cols, n_rows, 0);
1794 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
1796 tab_dim (tbl, tab_natural_dimensions);
1798 /* Outline the box */
1803 n_cols - 1, n_rows - 1);
1806 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
1807 tab_hline (tbl, TAL_2, 1, n_cols - 1, heading_rows );
1808 tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows - 1);
1810 if ( fctr->indep_var[0])
1811 tab_text (tbl, 1, 0, TAT_TITLE, var_to_string (fctr->indep_var[0]));
1813 if ( fctr->indep_var[1])
1814 tab_text (tbl, 2, 0, TAT_TITLE, var_to_string (fctr->indep_var[1]));
1816 for (v = 0 ; v < n_dep_var ; ++v )
1820 const int row_var_start = v * cmd.st_n * 2 * ll_count(&fctr->result_list);
1824 heading_rows + row_var_start,
1825 TAB_LEFT | TAT_TITLE,
1826 var_to_string (dependent_var[v])
1829 for (ll = ll_head (&fctr->result_list);
1830 ll != ll_null (&fctr->result_list); i++, ll = ll_next (ll))
1835 const int row_result_start = i * cmd.st_n * 2;
1837 const struct factor_result *result =
1838 ll_data (ll, struct factor_result, ll);
1841 tab_hline (tbl, TAL_1, 1, n_cols - 1,
1842 heading_rows + row_var_start + row_result_start);
1844 tab_hline (tbl, TAL_1, heading_columns - 2, n_cols - 1,
1845 heading_rows + row_var_start + row_result_start + cmd.st_n);
1847 for ( e = 1; e <= cmd.st_n; ++e )
1849 tab_text (tbl, n_cols - 3,
1850 heading_rows + row_var_start + row_result_start + e - 1,
1851 TAB_RIGHT | TAT_PRINTF,
1854 tab_text (tbl, n_cols - 3,
1855 heading_rows + row_var_start + row_result_start + cmd.st_n + e - 1,
1856 TAB_RIGHT | TAT_PRINTF,
1861 min_ll = ll_head (extrema_list (result->metrics[v].minima));
1862 for (e = 0; e < cmd.st_n;)
1864 struct extremum *minimum = ll_data (min_ll, struct extremum, ll);
1865 double weight = minimum->weight;
1867 while (weight-- > 0 && e < cmd.st_n)
1869 tab_double (tbl, n_cols - 1,
1870 heading_rows + row_var_start + row_result_start + cmd.st_n + e,
1876 tab_fixed (tbl, n_cols - 2,
1877 heading_rows + row_var_start +
1878 row_result_start + cmd.st_n + e,
1885 min_ll = ll_next (min_ll);
1889 max_ll = ll_head (extrema_list (result->metrics[v].maxima));
1890 for (e = 0; e < cmd.st_n;)
1892 struct extremum *maximum = ll_data (max_ll, struct extremum, ll);
1893 double weight = maximum->weight;
1895 while (weight-- > 0 && e < cmd.st_n)
1897 tab_double (tbl, n_cols - 1,
1898 heading_rows + row_var_start +
1899 row_result_start + e,
1905 tab_fixed (tbl, n_cols - 2,
1906 heading_rows + row_var_start +
1907 row_result_start + e,
1914 max_ll = ll_next (max_ll);
1918 if ( fctr->indep_var[0])
1921 ds_init_empty (&vstr);
1922 var_append_value_name (fctr->indep_var[0],
1923 result->value[0], &vstr);
1926 heading_rows + row_var_start + row_result_start,
1935 tab_text (tbl, n_cols - 4,
1936 heading_rows + row_var_start + row_result_start,
1940 tab_text (tbl, n_cols - 4,
1941 heading_rows + row_var_start + row_result_start + cmd.st_n,
1947 tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
1950 tab_title (tbl, _("Extreme Values"));
1953 tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE,
1957 tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE,
1963 #define PERCENTILE_ROWS 2
1966 show_percentiles (const struct variable **dependent_var,
1968 const struct xfactor *fctr)
1972 int heading_columns = 2;
1974 const int n_percentiles = subc_list_double_count (&percentile_list);
1975 const int heading_rows = 2;
1976 struct tab_table *tbl;
1983 if ( fctr->indep_var[0] )
1985 heading_columns = 3;
1987 if ( fctr->indep_var[1] )
1989 heading_columns = 4;
1993 n_rows *= ll_count (&fctr->result_list) * PERCENTILE_ROWS;
1994 n_rows += heading_rows;
1996 n_cols = heading_columns + n_percentiles;
1998 tbl = tab_create (n_cols, n_rows, 0);
1999 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
2001 tab_dim (tbl, tab_natural_dimensions);
2003 /* Outline the box */
2008 n_cols - 1, n_rows - 1);
2011 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
2012 tab_hline (tbl, TAL_2, 1, n_cols - 1, heading_rows );
2014 if ( fctr->indep_var[0])
2015 tab_text (tbl, 1, 1, TAT_TITLE, var_to_string (fctr->indep_var[0]));
2017 if ( fctr->indep_var[1])
2018 tab_text (tbl, 2, 1, TAT_TITLE, var_to_string (fctr->indep_var[1]));
2020 for (v = 0 ; v < n_dep_var ; ++v )
2026 const int row_var_start =
2027 v * PERCENTILE_ROWS * ll_count(&fctr->result_list);
2031 heading_rows + row_var_start,
2032 TAB_LEFT | TAT_TITLE,
2033 var_to_string (dependent_var[v])
2036 for (ll = ll_head (&fctr->result_list);
2037 ll != ll_null (&fctr->result_list); i++, ll = ll_next (ll))
2040 const struct factor_result *result =
2041 ll_data (ll, struct factor_result, ll);
2043 if ( i > 0 || v > 0 )
2045 const int left_col = (i == 0) ? 0 : 1;
2046 tab_hline (tbl, TAL_1, left_col, n_cols - 1,
2047 heading_rows + row_var_start + i * PERCENTILE_ROWS);
2050 if ( fctr->indep_var[0])
2053 ds_init_empty (&vstr);
2054 var_append_value_name (fctr->indep_var[0],
2055 result->value[0], &vstr);
2058 heading_rows + row_var_start + i * PERCENTILE_ROWS,
2067 tab_text (tbl, n_cols - n_percentiles - 1,
2068 heading_rows + row_var_start + i * PERCENTILE_ROWS,
2070 ptile_alg_desc [percentile_algorithm]);
2073 tab_text (tbl, n_cols - n_percentiles - 1,
2074 heading_rows + row_var_start + 1 + i * PERCENTILE_ROWS,
2076 _("Tukey's Hinges"));
2079 tab_vline (tbl, TAL_1, n_cols - n_percentiles -1, heading_rows, n_rows - 1);
2081 tukey_hinges_calculate ((struct tukey_hinges *) result->metrics[v].tukey_hinges,
2084 for (j = 0; j < n_percentiles; ++j)
2086 double hinge = SYSMIS;
2087 tab_double (tbl, n_cols - n_percentiles + j,
2088 heading_rows + row_var_start + i * PERCENTILE_ROWS,
2090 percentile_calculate (result->metrics[v].ptl[j],
2091 percentile_algorithm),
2095 if ( result->metrics[v].ptl[j]->ptile == 0.5)
2097 else if ( result->metrics[v].ptl[j]->ptile == 0.25)
2099 else if ( result->metrics[v].ptl[j]->ptile == 0.75)
2102 if ( hinge != SYSMIS)
2103 tab_double (tbl, n_cols - n_percentiles + j,
2104 heading_rows + row_var_start + 1 + i * PERCENTILE_ROWS,
2114 tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
2116 tab_title (tbl, _("Percentiles"));
2119 for (i = 0 ; i < n_percentiles; ++i )
2121 tab_text (tbl, n_cols - n_percentiles + i, 1,
2122 TAB_CENTER | TAT_TITLE | TAT_PRINTF,
2124 subc_list_double_at (&percentile_list, i)
2130 tab_joint_text (tbl,
2131 n_cols - n_percentiles, 0,
2133 TAB_CENTER | TAT_TITLE,
2136 /* Vertical lines for the data only */
2140 n_cols - n_percentiles, 1,
2141 n_cols - 1, n_rows - 1);
2143 tab_hline (tbl, TAL_1, n_cols - n_percentiles, n_cols - 1, 1);
2151 factor_to_string_concise (const struct xfactor *fctr,
2152 const struct factor_result *result,
2156 if (fctr->indep_var[0])
2158 var_append_value_name (fctr->indep_var[0], result->value[0], str);
2160 if ( fctr->indep_var[1] )
2162 ds_put_cstr (str, ",");
2164 var_append_value_name (fctr->indep_var[1], result->value[1], str);
2166 ds_put_cstr (str, ")");
2173 factor_to_string (const struct xfactor *fctr,
2174 const struct factor_result *result,
2178 if (fctr->indep_var[0])
2180 ds_put_format (str, "(%s = ", var_get_name (fctr->indep_var[0]));
2182 var_append_value_name (fctr->indep_var[0], result->value[0], str);
2184 if ( fctr->indep_var[1] )
2186 ds_put_cstr (str, ",");
2187 ds_put_format (str, "%s = ", var_get_name (fctr->indep_var[1]));
2189 var_append_value_name (fctr->indep_var[1], result->value[1], str);
2191 ds_put_cstr (str, ")");