1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2004, 2008, 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <gsl/gsl_cdf.h>
20 #include <libpspp/message.h>
25 #include <math/sort.h>
26 #include <math/order-stats.h>
27 #include <math/percentiles.h>
28 #include <math/tukey-hinges.h>
29 #include <math/box-whisker.h>
30 #include <math/trimmed-mean.h>
31 #include <math/extrema.h>
33 #include <data/case.h>
34 #include <data/casegrouper.h>
35 #include <data/casereader.h>
36 #include <data/casewriter.h>
37 #include <data/dictionary.h>
38 #include <data/procedure.h>
39 #include <data/subcase.h>
40 #include <data/value-labels.h>
41 #include <data/variable.h>
42 #include <language/command.h>
43 #include <language/dictionary/split-file.h>
44 #include <language/lexer/lexer.h>
45 #include <libpspp/compiler.h>
46 #include <libpspp/hash.h>
47 #include <libpspp/message.h>
48 #include <libpspp/misc.h>
49 #include <libpspp/str.h>
50 #include <math/moments.h>
51 #include <output/charts/box-whisker.h>
52 #include <output/charts/cartesian.h>
53 #include <output/manager.h>
54 #include <output/table.h>
60 #define _(msgid) gettext (msgid)
61 #define N_(msgid) msgid
64 #include <output/chart.h>
65 #include <output/charts/plot-hist.h>
66 #include <output/charts/plot-chart.h>
67 #include <math/histogram.h>
74 missing=miss:pairwise/!listwise,
76 incl:include/!exclude;
77 +compare=cmp:variables/!groups;
80 +plot[plt_]=stemleaf,boxplot,npplot,:spreadlevel(*d:n),histogram,all,none;
82 +statistics[st_]=descriptives,:extreme(*d:n),all,none.
90 static struct cmd_examine cmd;
92 static const struct variable **dependent_vars;
93 static size_t n_dependent_vars;
97 static subc_list_double percentile_list;
98 static enum pc_alg percentile_algorithm;
100 struct factor_metrics
102 struct moments1 *moments;
104 struct percentile **ptl;
107 struct statistic *tukey_hinges;
108 struct statistic *box_whisker;
109 struct statistic *trimmed_mean;
110 struct statistic *histogram;
111 struct order_stats *np;
113 /* Three quartiles indexing into PTL */
114 struct percentile **quartiles;
116 /* A reader sorted in ASCENDING order */
117 struct casereader *up_reader;
119 /* The minimum value of all the weights */
122 /* Sum of all weights, including those for missing values */
125 /* Sum of weights of non_missing values */
138 struct extrema *minima;
139 struct extrema *maxima;
146 union value value[2];
148 /* An array of factor metrics, one for each variable */
149 struct factor_metrics *metrics;
154 /* We need to make a list of this structure */
157 /* The independent variable */
158 const struct variable const* indep_var[2];
160 /* A list of results for this factor */
161 struct ll_list result_list ;
166 factor_destroy (struct xfactor *fctr)
168 struct ll *ll = ll_head (&fctr->result_list);
169 while (ll != ll_null (&fctr->result_list))
172 struct factor_result *result =
173 ll_data (ll, struct factor_result, ll);
176 for (v = 0; v < n_dependent_vars; ++v)
179 moments1_destroy (result->metrics[v].moments);
180 extrema_destroy (result->metrics[v].minima);
181 extrema_destroy (result->metrics[v].maxima);
182 statistic_destroy (result->metrics[v].trimmed_mean);
183 statistic_destroy (result->metrics[v].tukey_hinges);
184 statistic_destroy (result->metrics[v].box_whisker);
185 statistic_destroy (result->metrics[v].histogram);
186 for (i = 0 ; i < result->metrics[v].n_ptiles; ++i)
187 statistic_destroy ((struct statistic *) result->metrics[v].ptl[i]);
188 free (result->metrics[v].ptl);
189 free (result->metrics[v].quartiles);
190 casereader_destroy (result->metrics[v].up_reader);
193 for (i = 0; i < 2; i++)
194 if (fctr->indep_var[i])
195 value_destroy (&result->value[i],
196 var_get_width (fctr->indep_var[i]));
197 free (result->metrics);
203 static struct xfactor level0_factor;
204 static struct ll_list factor_list;
206 /* Parse the clause specifying the factors */
207 static int examine_parse_independent_vars (struct lexer *lexer,
208 const struct dictionary *dict,
209 struct cmd_examine *cmd);
211 /* Output functions */
212 static void show_summary (const struct variable **dependent_var, int n_dep_var,
213 const struct dictionary *dict,
214 const struct xfactor *f);
217 static void show_descriptives (const struct variable **dependent_var,
219 const struct xfactor *f);
222 static void show_percentiles (const struct variable **dependent_var,
224 const struct xfactor *f);
227 static void show_extremes (const struct variable **dependent_var,
229 const struct xfactor *f);
234 /* Per Split function */
235 static void run_examine (struct cmd_examine *, struct casereader *,
238 static void output_examine (const struct dictionary *dict);
241 void factor_calc (const struct ccase *c, int case_no,
242 double weight, bool case_missing);
245 /* Represent a factor as a string, so it can be
246 printed in a human readable fashion */
247 static void factor_to_string (const struct xfactor *fctr,
248 const struct factor_result *result,
251 /* Represent a factor as a string, so it can be
252 printed in a human readable fashion,
253 but sacrificing some readablility for the sake of brevity */
255 factor_to_string_concise (const struct xfactor *fctr,
256 const struct factor_result *result,
262 /* Categories of missing values to exclude. */
263 static enum mv_class exclude_values;
266 cmd_examine (struct lexer *lexer, struct dataset *ds)
268 struct casegrouper *grouper;
269 struct casereader *group;
272 subc_list_double_create (&percentile_list);
273 percentile_algorithm = PC_HAVERAGE;
275 ll_init (&factor_list);
277 if ( !parse_examine (lexer, ds, &cmd, NULL) )
279 subc_list_double_destroy (&percentile_list);
283 /* If /MISSING=INCLUDE is set, then user missing values are ignored */
284 exclude_values = cmd.incl == XMN_INCLUDE ? MV_SYSTEM : MV_ANY;
286 if ( cmd.st_n == SYSMIS )
289 if ( ! cmd.sbc_cinterval)
290 cmd.n_cinterval[0] = 95.0;
292 /* If descriptives have been requested, make sure the
293 quartiles are calculated */
294 if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES] )
296 subc_list_double_push (&percentile_list, 25);
297 subc_list_double_push (&percentile_list, 50);
298 subc_list_double_push (&percentile_list, 75);
301 grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds));
303 while (casegrouper_get_next_group (grouper, &group))
305 struct casereader *reader =
306 casereader_create_arithmetic_sequence (group, 1, 1);
308 run_examine (&cmd, reader, ds);
311 ok = casegrouper_destroy (grouper);
312 ok = proc_commit (ds) && ok;
314 if ( dependent_vars )
315 free (dependent_vars);
317 subc_list_double_destroy (&percentile_list);
319 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
323 /* Plot the normal and detrended normal plots for RESULT.
324 Label the plots with LABEL */
326 np_plot (struct np *np, const char *label)
328 double yfirst = 0, ylast = 0;
335 struct chart *np_chart;
337 /* Detrended Normal Plot */
338 struct chart *dnp_chart;
340 /* The slope and intercept of the ideal normal probability line */
341 const double slope = 1.0 / np->stddev;
342 const double intercept = -np->mean / np->stddev;
346 msg (MW, _("Not creating plot because data set is empty."));
350 np_chart = chart_create ();
351 dnp_chart = chart_create ();
353 if ( !np_chart || ! dnp_chart )
356 chart_write_title (np_chart, _("Normal Q-Q Plot of %s"), label);
357 chart_write_xlabel (np_chart, _("Observed Value"));
358 chart_write_ylabel (np_chart, _("Expected Normal"));
360 chart_write_title (dnp_chart, _("Detrended Normal Q-Q Plot of %s"),
362 chart_write_xlabel (dnp_chart, _("Observed Value"));
363 chart_write_ylabel (dnp_chart, _("Dev from Normal"));
365 yfirst = gsl_cdf_ugaussian_Pinv (1 / (np->n + 1));
366 ylast = gsl_cdf_ugaussian_Pinv (np->n / (np->n + 1));
368 /* Need to make sure that both the scatter plot and the ideal fit into the
370 x_lower = MIN (np->y_min, (yfirst - intercept) / slope) ;
371 x_upper = MAX (np->y_max, (ylast - intercept) / slope) ;
372 slack = (x_upper - x_lower) * 0.05 ;
374 chart_write_xscale (np_chart, x_lower - slack, x_upper + slack, 5);
375 chart_write_xscale (dnp_chart, np->y_min, np->y_max, 5);
377 chart_write_yscale (np_chart, yfirst, ylast, 5);
378 chart_write_yscale (dnp_chart, np->dns_min, np->dns_max, 5);
381 struct casereader *reader = casewriter_make_reader (np->writer);
383 while ((c = casereader_read (reader)) != NULL)
385 chart_datum (np_chart, 0, case_data_idx (c, NP_IDX_Y)->f, case_data_idx (c, NP_IDX_NS)->f);
386 chart_datum (dnp_chart, 0, case_data_idx (c, NP_IDX_Y)->f, case_data_idx (c, NP_IDX_DNS)->f);
390 casereader_destroy (reader);
393 chart_line (dnp_chart, 0, 0, np->y_min, np->y_max , CHART_DIM_X);
394 chart_line (np_chart, slope, intercept, yfirst, ylast , CHART_DIM_Y);
396 chart_submit (np_chart);
397 chart_submit (dnp_chart);
402 show_npplot (const struct variable **dependent_var,
404 const struct xfactor *fctr)
408 for (v = 0; v < n_dep_var; ++v)
411 for (ll = ll_head (&fctr->result_list);
412 ll != ll_null (&fctr->result_list);
416 const struct factor_result *result =
417 ll_data (ll, struct factor_result, ll);
419 ds_init_empty (&str);
420 ds_put_format (&str, "%s ", var_get_name (dependent_var[v]));
422 factor_to_string (fctr, result, &str);
424 np_plot ((struct np*) result->metrics[v].np, ds_cstr(&str));
426 statistic_destroy ((struct statistic *)result->metrics[v].np);
435 show_histogram (const struct variable **dependent_var,
437 const struct xfactor *fctr)
441 for (v = 0; v < n_dep_var; ++v)
444 for (ll = ll_head (&fctr->result_list);
445 ll != ll_null (&fctr->result_list);
449 const struct factor_result *result =
450 ll_data (ll, struct factor_result, ll);
453 ds_init_empty (&str);
454 ds_put_format (&str, "%s ", var_get_name (dependent_var[v]));
456 factor_to_string (fctr, result, &str);
458 moments1_calculate ((struct moments1 *) result->metrics[v].moments,
459 &n, &mean, &var, NULL, NULL);
460 histogram_plot ((struct histogram *) result->metrics[v].histogram,
462 n, mean, sqrt (var), false);
472 show_boxplot_groups (const struct variable **dependent_var,
474 const struct xfactor *fctr)
478 for (v = 0; v < n_dep_var; ++v)
482 struct chart *ch = chart_create ();
483 double y_min = DBL_MAX;
484 double y_max = -DBL_MAX;
486 for (ll = ll_head (&fctr->result_list);
487 ll != ll_null (&fctr->result_list);
490 const struct extremum *max, *min;
491 const struct factor_result *result =
492 ll_data (ll, struct factor_result, ll);
494 const struct ll_list *max_list =
495 extrema_list (result->metrics[v].maxima);
497 const struct ll_list *min_list =
498 extrema_list (result->metrics[v].minima);
500 if ( ll_is_empty (max_list))
502 msg (MW, _("Not creating plot because data set is empty."));
506 max = (const struct extremum *)
507 ll_data (ll_head(max_list), struct extremum, ll);
509 min = (const struct extremum *)
510 ll_data (ll_head (min_list), struct extremum, ll);
512 y_max = MAX (y_max, max->value);
513 y_min = MIN (y_min, min->value);
516 boxplot_draw_yscale (ch, y_max, y_min);
518 if ( fctr->indep_var[0])
519 chart_write_title (ch, _("Boxplot of %s vs. %s"),
520 var_to_string (dependent_var[v]),
521 var_to_string (fctr->indep_var[0]) );
523 chart_write_title (ch, _("Boxplot of %s"),
524 var_to_string (dependent_var[v]));
526 for (ll = ll_head (&fctr->result_list);
527 ll != ll_null (&fctr->result_list);
530 const struct factor_result *result =
531 ll_data (ll, struct factor_result, ll);
534 const double box_width = (ch->data_right - ch->data_left)
535 / (ll_count (&fctr->result_list) * 2.0 ) ;
537 const double box_centre = (f++ * 2 + 1) * box_width + ch->data_left;
539 ds_init_empty (&str);
540 factor_to_string_concise (fctr, result, &str);
542 boxplot_draw_boxplot (ch,
543 box_centre, box_width,
544 (const struct box_whisker *)
545 result->metrics[v].box_whisker,
558 show_boxplot_variables (const struct variable **dependent_var,
560 const struct xfactor *fctr
566 const struct ll_list *result_list = &fctr->result_list;
568 for (ll = ll_head (result_list);
569 ll != ll_null (result_list);
574 struct chart *ch = chart_create ();
575 double y_min = DBL_MAX;
576 double y_max = -DBL_MAX;
578 const struct factor_result *result =
579 ll_data (ll, struct factor_result, ll);
581 const double box_width = (ch->data_right - ch->data_left)
582 / (n_dep_var * 2.0 ) ;
584 for (v = 0; v < n_dep_var; ++v)
586 const struct ll *max_ll =
587 ll_head (extrema_list (result->metrics[v].maxima));
588 const struct ll *min_ll =
589 ll_head (extrema_list (result->metrics[v].minima));
591 const struct extremum *max =
592 (const struct extremum *) ll_data (max_ll, struct extremum, ll);
594 const struct extremum *min =
595 (const struct extremum *) ll_data (min_ll, struct extremum, ll);
597 y_max = MAX (y_max, max->value);
598 y_min = MIN (y_min, min->value);
602 boxplot_draw_yscale (ch, y_max, y_min);
604 ds_init_empty (&title);
605 factor_to_string (fctr, result, &title);
608 ds_put_format (&title, "%s = ", var_get_name (fctr->indep_var[0]));
609 var_append_value_name (fctr->indep_var[0], &result->value[0], &title);
612 chart_write_title (ch, "%s", ds_cstr (&title));
615 for (v = 0; v < n_dep_var; ++v)
618 const double box_centre = (v * 2 + 1) * box_width + ch->data_left;
620 ds_init_empty (&str);
621 ds_init_cstr (&str, var_get_name (dependent_var[v]));
623 boxplot_draw_boxplot (ch,
624 box_centre, box_width,
625 (const struct box_whisker *) result->metrics[v].box_whisker,
636 /* Show all the appropriate tables */
638 output_examine (const struct dictionary *dict)
642 show_summary (dependent_vars, n_dependent_vars, dict, &level0_factor);
644 if ( cmd.a_statistics[XMN_ST_EXTREME] )
645 show_extremes (dependent_vars, n_dependent_vars, &level0_factor);
647 if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES] )
648 show_descriptives (dependent_vars, n_dependent_vars, &level0_factor);
650 if ( cmd.sbc_percentiles)
651 show_percentiles (dependent_vars, n_dependent_vars, &level0_factor);
655 if (cmd.a_plot[XMN_PLT_BOXPLOT])
656 show_boxplot_groups (dependent_vars, n_dependent_vars, &level0_factor);
658 if (cmd.a_plot[XMN_PLT_HISTOGRAM])
659 show_histogram (dependent_vars, n_dependent_vars, &level0_factor);
661 if (cmd.a_plot[XMN_PLT_NPPLOT])
662 show_npplot (dependent_vars, n_dependent_vars, &level0_factor);
665 for (ll = ll_head (&factor_list);
666 ll != ll_null (&factor_list); ll = ll_next (ll))
668 struct xfactor *factor = ll_data (ll, struct xfactor, ll);
669 show_summary (dependent_vars, n_dependent_vars, dict, factor);
671 if ( cmd.a_statistics[XMN_ST_EXTREME] )
672 show_extremes (dependent_vars, n_dependent_vars, factor);
674 if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES] )
675 show_descriptives (dependent_vars, n_dependent_vars, factor);
677 if ( cmd.sbc_percentiles)
678 show_percentiles (dependent_vars, n_dependent_vars, factor);
680 if (cmd.a_plot[XMN_PLT_BOXPLOT] &&
681 cmd.cmp == XMN_GROUPS)
682 show_boxplot_groups (dependent_vars, n_dependent_vars, factor);
685 if (cmd.a_plot[XMN_PLT_BOXPLOT] &&
686 cmd.cmp == XMN_VARIABLES)
687 show_boxplot_variables (dependent_vars, n_dependent_vars,
690 if (cmd.a_plot[XMN_PLT_HISTOGRAM])
691 show_histogram (dependent_vars, n_dependent_vars, factor);
693 if (cmd.a_plot[XMN_PLT_NPPLOT])
694 show_npplot (dependent_vars, n_dependent_vars, factor);
698 /* Parse the PERCENTILES subcommand */
700 xmn_custom_percentiles (struct lexer *lexer, struct dataset *ds UNUSED,
701 struct cmd_examine *p UNUSED, void *aux UNUSED)
703 lex_match (lexer, '=');
705 lex_match (lexer, '(');
707 while ( lex_is_number (lexer) )
709 subc_list_double_push (&percentile_list, lex_number (lexer));
713 lex_match (lexer, ',') ;
715 lex_match (lexer, ')');
717 lex_match (lexer, '=');
719 if ( lex_match_id (lexer, "HAVERAGE"))
720 percentile_algorithm = PC_HAVERAGE;
722 else if ( lex_match_id (lexer, "WAVERAGE"))
723 percentile_algorithm = PC_WAVERAGE;
725 else if ( lex_match_id (lexer, "ROUND"))
726 percentile_algorithm = PC_ROUND;
728 else if ( lex_match_id (lexer, "EMPIRICAL"))
729 percentile_algorithm = PC_EMPIRICAL;
731 else if ( lex_match_id (lexer, "AEMPIRICAL"))
732 percentile_algorithm = PC_AEMPIRICAL;
734 else if ( lex_match_id (lexer, "NONE"))
735 percentile_algorithm = PC_NONE;
738 if ( 0 == subc_list_double_count (&percentile_list))
740 subc_list_double_push (&percentile_list, 5);
741 subc_list_double_push (&percentile_list, 10);
742 subc_list_double_push (&percentile_list, 25);
743 subc_list_double_push (&percentile_list, 50);
744 subc_list_double_push (&percentile_list, 75);
745 subc_list_double_push (&percentile_list, 90);
746 subc_list_double_push (&percentile_list, 95);
752 /* TOTAL and NOTOTAL are simple, mutually exclusive flags */
754 xmn_custom_total (struct lexer *lexer UNUSED, struct dataset *ds UNUSED,
755 struct cmd_examine *p, void *aux UNUSED)
757 if ( p->sbc_nototal )
759 msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL");
767 xmn_custom_nototal (struct lexer *lexer UNUSED, struct dataset *ds UNUSED,
768 struct cmd_examine *p, void *aux UNUSED)
772 msg (SE, _("%s and %s are mutually exclusive"), "TOTAL", "NOTOTAL");
781 /* Parser for the variables sub command
782 Returns 1 on success */
784 xmn_custom_variables (struct lexer *lexer, struct dataset *ds,
785 struct cmd_examine *cmd,
788 const struct dictionary *dict = dataset_dict (ds);
789 lex_match (lexer, '=');
791 if ( (lex_token (lexer) != T_ID || dict_lookup_var (dict, lex_tokid (lexer)) == NULL)
792 && lex_token (lexer) != T_ALL)
797 if (!parse_variables_const (lexer, dict, &dependent_vars, &n_dependent_vars,
798 PV_NO_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH) )
800 free (dependent_vars);
804 assert (n_dependent_vars);
807 if ( lex_match (lexer, T_BY))
810 success = examine_parse_independent_vars (lexer, dict, cmd);
813 free (dependent_vars);
823 /* Parse the clause specifying the factors */
825 examine_parse_independent_vars (struct lexer *lexer,
826 const struct dictionary *dict,
827 struct cmd_examine *cmd)
830 struct xfactor *sf = xmalloc (sizeof *sf);
832 ll_init (&sf->result_list);
834 if ( (lex_token (lexer) != T_ID ||
835 dict_lookup_var (dict, lex_tokid (lexer)) == NULL)
836 && lex_token (lexer) != T_ALL)
842 sf->indep_var[0] = parse_variable (lexer, dict);
843 sf->indep_var[1] = NULL;
845 if ( lex_token (lexer) == T_BY )
847 lex_match (lexer, T_BY);
849 if ( (lex_token (lexer) != T_ID ||
850 dict_lookup_var (dict, lex_tokid (lexer)) == NULL)
851 && lex_token (lexer) != T_ALL)
857 sf->indep_var[1] = parse_variable (lexer, dict);
859 ll_push_tail (&factor_list, &sf->ll);
862 ll_push_tail (&factor_list, &sf->ll);
864 lex_match (lexer, ',');
866 if ( lex_token (lexer) == '.' || lex_token (lexer) == '/' )
869 success = examine_parse_independent_vars (lexer, dict, cmd);
878 examine_group (struct cmd_examine *cmd, struct casereader *reader, int level,
879 const struct dictionary *dict, struct xfactor *factor)
882 const struct variable *wv = dict_get_weight (dict);
885 struct factor_result *result = xzalloc (sizeof (*result));
888 for (i = 0; i < 2; i++)
889 if (factor->indep_var[i])
890 value_init (&result->value[i], var_get_width (factor->indep_var[i]));
892 result->metrics = xcalloc (n_dependent_vars, sizeof (*result->metrics));
894 if ( cmd->a_statistics[XMN_ST_EXTREME] )
895 n_extrema = cmd->st_n;
898 c = casereader_peek (reader, 0);
902 for (i = 0; i < 2; i++)
903 if (factor->indep_var[i])
904 value_copy (&result->value[i], case_data (c, factor->indep_var[i]),
905 var_get_width (factor->indep_var[i]));
909 for (v = 0; v < n_dependent_vars; ++v)
911 struct casewriter *writer;
912 struct casereader *input = casereader_clone (reader);
914 result->metrics[v].moments = moments1_create (MOMENT_KURTOSIS);
915 result->metrics[v].minima = extrema_create (n_extrema, EXTREME_MINIMA);
916 result->metrics[v].maxima = extrema_create (n_extrema, EXTREME_MAXIMA);
917 result->metrics[v].cmin = DBL_MAX;
919 if (cmd->a_statistics[XMN_ST_DESCRIPTIVES] ||
920 cmd->a_plot[XMN_PLT_BOXPLOT] ||
921 cmd->a_plot[XMN_PLT_NPPLOT] ||
922 cmd->sbc_percentiles)
924 /* In this case, we need to sort the data, so we create a sorting
926 struct subcase up_ordering;
927 subcase_init_var (&up_ordering, dependent_vars[v], SC_ASCEND);
928 writer = sort_create_writer (&up_ordering,
929 casereader_get_proto (reader));
930 subcase_destroy (&up_ordering);
934 /* but in this case, sorting is unnecessary, so an ordinary
935 casewriter is sufficient */
937 autopaging_writer_create (casereader_get_proto (reader));
941 /* Sort or just iterate, whilst calculating moments etc */
942 while ((c = casereader_read (input)) != NULL)
944 int n_vals = caseproto_get_n_widths (casereader_get_proto (reader));
945 const casenumber loc = case_data_idx (c, n_vals - 1)->f;
947 const double weight = wv ? case_data (c, wv)->f : 1.0;
948 const union value *value = case_data (c, dependent_vars[v]);
950 if (weight != SYSMIS)
951 minimize (&result->metrics[v].cmin, weight);
953 moments1_add (result->metrics[v].moments,
957 result->metrics[v].n += weight;
959 if ( ! var_is_value_missing (dependent_vars[v], value, MV_ANY) )
960 result->metrics[v].n_valid += weight;
962 extrema_add (result->metrics[v].maxima,
967 extrema_add (result->metrics[v].minima,
972 casewriter_write (writer, c);
974 casereader_destroy (input);
975 result->metrics[v].up_reader = casewriter_make_reader (writer);
978 /* If percentiles or descriptives have been requested, then a
979 second pass through the data (which has now been sorted)
981 if ( cmd->a_statistics[XMN_ST_DESCRIPTIVES] ||
982 cmd->a_plot[XMN_PLT_BOXPLOT] ||
983 cmd->a_plot[XMN_PLT_NPPLOT] ||
984 cmd->sbc_percentiles)
986 for (v = 0; v < n_dependent_vars; ++v)
990 struct order_stats **os ;
991 struct factor_metrics *metric = &result->metrics[v];
993 metric->n_ptiles = percentile_list.n_data;
995 metric->ptl = xcalloc (metric->n_ptiles,
996 sizeof (struct percentile *));
998 metric->quartiles = xcalloc (3, sizeof (*metric->quartiles));
1000 for (i = 0 ; i < metric->n_ptiles; ++i)
1002 metric->ptl[i] = (struct percentile *)
1003 percentile_create (percentile_list.data[i] / 100.0, metric->n_valid);
1005 if ( percentile_list.data[i] == 25)
1006 metric->quartiles[0] = metric->ptl[i];
1007 else if ( percentile_list.data[i] == 50)
1008 metric->quartiles[1] = metric->ptl[i];
1009 else if ( percentile_list.data[i] == 75)
1010 metric->quartiles[2] = metric->ptl[i];
1013 metric->tukey_hinges = tukey_hinges_create (metric->n_valid, metric->cmin);
1014 metric->trimmed_mean = trimmed_mean_create (metric->n_valid, 0.05);
1016 n_os = metric->n_ptiles + 2;
1018 if ( cmd->a_plot[XMN_PLT_NPPLOT] )
1020 metric->np = np_create (metric->moments);
1024 os = xcalloc (sizeof (struct order_stats *), n_os);
1026 for (i = 0 ; i < metric->n_ptiles ; ++i )
1028 os[i] = (struct order_stats *) metric->ptl[i];
1031 os[i] = (struct order_stats *) metric->tukey_hinges;
1032 os[i+1] = (struct order_stats *) metric->trimmed_mean;
1034 if (cmd->a_plot[XMN_PLT_NPPLOT])
1035 os[i+2] = metric->np;
1037 order_stats_accumulate (os, n_os,
1038 casereader_clone (metric->up_reader),
1039 wv, dependent_vars[v], MV_ANY);
1044 /* FIXME: Do this in the above loop */
1045 if ( cmd->a_plot[XMN_PLT_HISTOGRAM] )
1048 struct casereader *input = casereader_clone (reader);
1050 for (v = 0; v < n_dependent_vars; ++v)
1052 const struct extremum *max, *min;
1053 struct factor_metrics *metric = &result->metrics[v];
1055 const struct ll_list *max_list =
1056 extrema_list (result->metrics[v].maxima);
1058 const struct ll_list *min_list =
1059 extrema_list (result->metrics[v].minima);
1061 if ( ll_is_empty (max_list))
1063 msg (MW, _("Not creating plot because data set is empty."));
1067 assert (! ll_is_empty (min_list));
1069 max = (const struct extremum *)
1070 ll_data (ll_head(max_list), struct extremum, ll);
1072 min = (const struct extremum *)
1073 ll_data (ll_head (min_list), struct extremum, ll);
1075 metric->histogram = histogram_create (10, min->value, max->value);
1078 while ((c = casereader_read (input)) != NULL)
1080 const double weight = wv ? case_data (c, wv)->f : 1.0;
1082 for (v = 0; v < n_dependent_vars; ++v)
1084 struct factor_metrics *metric = &result->metrics[v];
1085 if ( metric->histogram)
1086 histogram_add ((struct histogram *) metric->histogram,
1087 case_data (c, dependent_vars[v])->f, weight);
1091 casereader_destroy (input);
1094 /* In this case, a third iteration is required */
1095 if (cmd->a_plot[XMN_PLT_BOXPLOT])
1097 for (v = 0; v < n_dependent_vars; ++v)
1099 struct factor_metrics *metric = &result->metrics[v];
1100 int n_vals = caseproto_get_n_widths (casereader_get_proto (
1101 metric->up_reader));
1103 metric->box_whisker =
1104 box_whisker_create ((struct tukey_hinges *) metric->tukey_hinges,
1105 cmd->v_id, n_vals - 1);
1107 order_stats_accumulate ((struct order_stats **) &metric->box_whisker,
1109 casereader_clone (metric->up_reader),
1110 wv, dependent_vars[v], MV_ANY);
1114 ll_push_tail (&factor->result_list, &result->ll);
1115 casereader_destroy (reader);
1120 run_examine (struct cmd_examine *cmd, struct casereader *input,
1124 const struct dictionary *dict = dataset_dict (ds);
1126 struct casereader *level0 = casereader_clone (input);
1128 c = casereader_peek (input, 0);
1131 casereader_destroy (input);
1135 output_split_file_values (ds, c);
1138 ll_init (&level0_factor.result_list);
1140 examine_group (cmd, level0, 0, dict, &level0_factor);
1142 for (ll = ll_head (&factor_list);
1143 ll != ll_null (&factor_list);
1146 struct xfactor *factor = ll_data (ll, struct xfactor, ll);
1148 struct casereader *group = NULL;
1149 struct casereader *level1;
1150 struct casegrouper *grouper1 = NULL;
1152 level1 = casereader_clone (input);
1153 level1 = sort_execute_1var (level1, factor->indep_var[0]);
1154 grouper1 = casegrouper_create_vars (level1, &factor->indep_var[0], 1);
1156 while (casegrouper_get_next_group (grouper1, &group))
1158 struct casereader *group_copy = casereader_clone (group);
1160 if ( !factor->indep_var[1])
1161 examine_group (cmd, group_copy, 1, dict, factor);
1165 struct casereader *group2 = NULL;
1166 struct casegrouper *grouper2 = NULL;
1168 group_copy = sort_execute_1var (group_copy,
1169 factor->indep_var[1]);
1171 grouper2 = casegrouper_create_vars (group_copy,
1172 &factor->indep_var[1], 1);
1174 while (casegrouper_get_next_group (grouper2, &group2))
1176 examine_group (cmd, group2, 2, dict, factor);
1179 casegrouper_destroy (grouper2);
1182 casereader_destroy (group);
1184 casegrouper_destroy (grouper1);
1187 casereader_destroy (input);
1189 output_examine (dict);
1191 factor_destroy (&level0_factor);
1195 for (ll = ll_head (&factor_list);
1196 ll != ll_null (&factor_list);
1199 struct xfactor *f = ll_data (ll, struct xfactor, ll);
1208 show_summary (const struct variable **dependent_var, int n_dep_var,
1209 const struct dictionary *dict,
1210 const struct xfactor *fctr)
1212 const struct variable *wv = dict_get_weight (dict);
1213 const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0;
1215 static const char *subtitle[]=
1223 int heading_columns = 1;
1225 const int heading_rows = 3;
1226 struct tab_table *tbl;
1233 if ( fctr->indep_var[0] )
1235 heading_columns = 2;
1237 if ( fctr->indep_var[1] )
1239 heading_columns = 3;
1243 n_rows *= ll_count (&fctr->result_list);
1244 n_rows += heading_rows;
1246 n_cols = heading_columns + 6;
1248 tbl = tab_create (n_cols, n_rows, 0);
1249 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
1251 tab_dim (tbl, tab_natural_dimensions, NULL, NULL);
1253 /* Outline the box */
1258 n_cols - 1, n_rows - 1);
1260 /* Vertical lines for the data only */
1265 n_cols - 1, n_rows - 1);
1268 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
1269 tab_hline (tbl, TAL_1, heading_columns, n_cols - 1, 1 );
1270 tab_hline (tbl, TAL_1, heading_columns, n_cols - 1, heading_rows -1 );
1272 tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
1275 tab_title (tbl, _("Case Processing Summary"));
1277 tab_joint_text (tbl, heading_columns, 0,
1279 TAB_CENTER | TAT_TITLE,
1282 /* Remove lines ... */
1289 for (j = 0 ; j < 3 ; ++j)
1291 tab_text (tbl, heading_columns + j * 2 , 2, TAB_CENTER | TAT_TITLE,
1294 tab_text (tbl, heading_columns + j * 2 + 1, 2, TAB_CENTER | TAT_TITLE,
1297 tab_joint_text (tbl, heading_columns + j * 2 , 1,
1298 heading_columns + j * 2 + 1, 1,
1299 TAB_CENTER | TAT_TITLE,
1302 tab_box (tbl, -1, -1,
1304 heading_columns + j * 2, 1,
1305 heading_columns + j * 2 + 1, 1);
1309 /* Titles for the independent variables */
1310 if ( fctr->indep_var[0] )
1312 tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
1313 var_to_string (fctr->indep_var[0]));
1315 if ( fctr->indep_var[1] )
1317 tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
1318 var_to_string (fctr->indep_var[1]));
1322 for (v = 0 ; v < n_dep_var ; ++v)
1326 const union value *last_value = NULL;
1329 tab_hline (tbl, TAL_1, 0, n_cols -1 ,
1330 v * ll_count (&fctr->result_list)
1335 v * ll_count (&fctr->result_list) + heading_rows,
1336 TAB_LEFT | TAT_TITLE,
1337 var_to_string (dependent_var[v])
1341 for (ll = ll_head (&fctr->result_list);
1342 ll != ll_null (&fctr->result_list); ll = ll_next (ll))
1345 const struct factor_result *result =
1346 ll_data (ll, struct factor_result, ll);
1348 if ( fctr->indep_var[0] )
1351 if ( last_value == NULL ||
1352 !value_equal (last_value, &result->value[0],
1353 var_get_width (fctr->indep_var[0])))
1357 last_value = &result->value[0];
1358 ds_init_empty (&str);
1360 var_append_value_name (fctr->indep_var[0], &result->value[0],
1365 v * ll_count (&fctr->result_list),
1366 TAB_LEFT | TAT_TITLE,
1371 if ( fctr->indep_var[1] && j > 0)
1372 tab_hline (tbl, TAL_1, 1, n_cols - 1,
1374 v * ll_count (&fctr->result_list));
1377 if ( fctr->indep_var[1])
1381 ds_init_empty (&str);
1383 var_append_value_name (fctr->indep_var[1],
1384 &result->value[1], &str);
1388 v * ll_count (&fctr->result_list),
1389 TAB_LEFT | TAT_TITLE,
1397 moments1_calculate (result->metrics[v].moments,
1398 &n, &result->metrics[v].mean,
1399 &result->metrics[v].variance,
1400 &result->metrics[v].skewness,
1401 &result->metrics[v].kurtosis);
1403 result->metrics[v].se_mean = sqrt (result->metrics[v].variance / n) ;
1406 tab_double (tbl, heading_columns,
1407 heading_rows + j + v * ll_count (&fctr->result_list),
1411 tab_text (tbl, heading_columns + 1,
1412 heading_rows + j + v * ll_count (&fctr->result_list),
1413 TAB_RIGHT | TAT_PRINTF,
1414 "%g%%", n * 100.0 / result->metrics[v].n);
1417 tab_double (tbl, heading_columns + 2,
1418 heading_rows + j + v * ll_count (&fctr->result_list),
1420 result->metrics[v].n - n,
1423 tab_text (tbl, heading_columns + 3,
1424 heading_rows + j + v * ll_count (&fctr->result_list),
1425 TAB_RIGHT | TAT_PRINTF,
1427 (result->metrics[v].n - n) * 100.0 / result->metrics[v].n
1430 /* Total Valid + Missing */
1431 tab_double (tbl, heading_columns + 4,
1432 heading_rows + j + v * ll_count (&fctr->result_list),
1434 result->metrics[v].n,
1437 tab_text (tbl, heading_columns + 5,
1438 heading_rows + j + v * ll_count (&fctr->result_list),
1439 TAB_RIGHT | TAT_PRINTF,
1441 (result->metrics[v].n) * 100.0 / result->metrics[v].n
1452 #define DESCRIPTIVE_ROWS 13
1455 show_descriptives (const struct variable **dependent_var,
1457 const struct xfactor *fctr)
1460 int heading_columns = 3;
1462 const int heading_rows = 1;
1463 struct tab_table *tbl;
1470 if ( fctr->indep_var[0] )
1472 heading_columns = 4;
1474 if ( fctr->indep_var[1] )
1476 heading_columns = 5;
1480 n_rows *= ll_count (&fctr->result_list) * DESCRIPTIVE_ROWS;
1481 n_rows += heading_rows;
1483 n_cols = heading_columns + 2;
1485 tbl = tab_create (n_cols, n_rows, 0);
1486 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
1488 tab_dim (tbl, tab_natural_dimensions, NULL, NULL);
1490 /* Outline the box */
1495 n_cols - 1, n_rows - 1);
1498 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
1499 tab_hline (tbl, TAL_2, 1, n_cols - 1, heading_rows );
1501 tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows - 1);
1504 if ( fctr->indep_var[0])
1505 tab_text (tbl, 1, 0, TAT_TITLE, var_to_string (fctr->indep_var[0]));
1507 if ( fctr->indep_var[1])
1508 tab_text (tbl, 2, 0, TAT_TITLE, var_to_string (fctr->indep_var[1]));
1510 for (v = 0 ; v < n_dep_var ; ++v )
1515 const int row_var_start =
1516 v * DESCRIPTIVE_ROWS * ll_count(&fctr->result_list);
1520 heading_rows + row_var_start,
1521 TAB_LEFT | TAT_TITLE,
1522 var_to_string (dependent_var[v])
1525 for (ll = ll_head (&fctr->result_list);
1526 ll != ll_null (&fctr->result_list); i++, ll = ll_next (ll))
1528 const struct factor_result *result =
1529 ll_data (ll, struct factor_result, ll);
1532 gsl_cdf_tdist_Qinv ((1 - cmd.n_cinterval[0] / 100.0) / 2.0,
1533 result->metrics[v].n - 1);
1535 if ( i > 0 || v > 0 )
1537 const int left_col = (i == 0) ? 0 : 1;
1538 tab_hline (tbl, TAL_1, left_col, n_cols - 1,
1539 heading_rows + row_var_start + i * DESCRIPTIVE_ROWS);
1542 if ( fctr->indep_var[0])
1545 ds_init_empty (&vstr);
1546 var_append_value_name (fctr->indep_var[0],
1547 &result->value[0], &vstr);
1550 heading_rows + row_var_start + i * DESCRIPTIVE_ROWS,
1559 tab_text (tbl, n_cols - 4,
1560 heading_rows + row_var_start + i * DESCRIPTIVE_ROWS,
1564 tab_text (tbl, n_cols - 4,
1565 heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS,
1566 TAB_LEFT | TAT_PRINTF,
1567 _("%g%% Confidence Interval for Mean"),
1568 cmd.n_cinterval[0]);
1570 tab_text (tbl, n_cols - 3,
1571 heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS,
1575 tab_text (tbl, n_cols - 3,
1576 heading_rows + row_var_start + 2 + i * DESCRIPTIVE_ROWS,
1580 tab_text (tbl, n_cols - 4,
1581 heading_rows + row_var_start + 3 + i * DESCRIPTIVE_ROWS,
1582 TAB_LEFT | TAT_PRINTF,
1583 _("5%% Trimmed Mean"));
1585 tab_text (tbl, n_cols - 4,
1586 heading_rows + row_var_start + 4 + i * DESCRIPTIVE_ROWS,
1590 tab_text (tbl, n_cols - 4,
1591 heading_rows + row_var_start + 5 + i * DESCRIPTIVE_ROWS,
1595 tab_text (tbl, n_cols - 4,
1596 heading_rows + row_var_start + 6 + i * DESCRIPTIVE_ROWS,
1598 _("Std. Deviation"));
1600 tab_text (tbl, n_cols - 4,
1601 heading_rows + row_var_start + 7 + i * DESCRIPTIVE_ROWS,
1605 tab_text (tbl, n_cols - 4,
1606 heading_rows + row_var_start + 8 + i * DESCRIPTIVE_ROWS,
1610 tab_text (tbl, n_cols - 4,
1611 heading_rows + row_var_start + 9 + i * DESCRIPTIVE_ROWS,
1615 tab_text (tbl, n_cols - 4,
1616 heading_rows + row_var_start + 10 + i * DESCRIPTIVE_ROWS,
1618 _("Interquartile Range"));
1621 tab_text (tbl, n_cols - 4,
1622 heading_rows + row_var_start + 11 + i * DESCRIPTIVE_ROWS,
1626 tab_text (tbl, n_cols - 4,
1627 heading_rows + row_var_start + 12 + i * DESCRIPTIVE_ROWS,
1632 /* Now the statistics ... */
1634 tab_double (tbl, n_cols - 2,
1635 heading_rows + row_var_start + i * DESCRIPTIVE_ROWS,
1637 result->metrics[v].mean,
1640 tab_double (tbl, n_cols - 1,
1641 heading_rows + row_var_start + i * DESCRIPTIVE_ROWS,
1643 result->metrics[v].se_mean,
1647 tab_double (tbl, n_cols - 2,
1648 heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS,
1650 result->metrics[v].mean - t *
1651 result->metrics[v].se_mean,
1654 tab_double (tbl, n_cols - 2,
1655 heading_rows + row_var_start + 2 + i * DESCRIPTIVE_ROWS,
1657 result->metrics[v].mean + t *
1658 result->metrics[v].se_mean,
1662 tab_double (tbl, n_cols - 2,
1663 heading_rows + row_var_start + 3 + i * DESCRIPTIVE_ROWS,
1665 trimmed_mean_calculate ((struct trimmed_mean *) result->metrics[v].trimmed_mean),
1669 tab_double (tbl, n_cols - 2,
1670 heading_rows + row_var_start + 4 + i * DESCRIPTIVE_ROWS,
1672 percentile_calculate (result->metrics[v].quartiles[1], percentile_algorithm),
1676 tab_double (tbl, n_cols - 2,
1677 heading_rows + row_var_start + 5 + i * DESCRIPTIVE_ROWS,
1679 result->metrics[v].variance,
1682 tab_double (tbl, n_cols - 2,
1683 heading_rows + row_var_start + 6 + i * DESCRIPTIVE_ROWS,
1685 sqrt (result->metrics[v].variance),
1688 tab_double (tbl, n_cols - 2,
1689 heading_rows + row_var_start + 10 + i * DESCRIPTIVE_ROWS,
1691 percentile_calculate (result->metrics[v].quartiles[2],
1692 percentile_algorithm) -
1693 percentile_calculate (result->metrics[v].quartiles[0],
1694 percentile_algorithm),
1698 tab_double (tbl, n_cols - 2,
1699 heading_rows + row_var_start + 11 + i * DESCRIPTIVE_ROWS,
1701 result->metrics[v].skewness,
1704 tab_double (tbl, n_cols - 2,
1705 heading_rows + row_var_start + 12 + i * DESCRIPTIVE_ROWS,
1707 result->metrics[v].kurtosis,
1710 tab_double (tbl, n_cols - 1,
1711 heading_rows + row_var_start + 11 + i * DESCRIPTIVE_ROWS,
1713 calc_seskew (result->metrics[v].n),
1716 tab_double (tbl, n_cols - 1,
1717 heading_rows + row_var_start + 12 + i * DESCRIPTIVE_ROWS,
1719 calc_sekurt (result->metrics[v].n),
1723 struct extremum *minimum, *maximum ;
1725 struct ll *max_ll = ll_head (extrema_list (result->metrics[v].maxima));
1726 struct ll *min_ll = ll_head (extrema_list (result->metrics[v].minima));
1728 maximum = ll_data (max_ll, struct extremum, ll);
1729 minimum = ll_data (min_ll, struct extremum, ll);
1731 tab_double (tbl, n_cols - 2,
1732 heading_rows + row_var_start + 7 + i * DESCRIPTIVE_ROWS,
1737 tab_double (tbl, n_cols - 2,
1738 heading_rows + row_var_start + 8 + i * DESCRIPTIVE_ROWS,
1743 tab_double (tbl, n_cols - 2,
1744 heading_rows + row_var_start + 9 + i * DESCRIPTIVE_ROWS,
1746 maximum->value - minimum->value,
1752 tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
1754 tab_title (tbl, _("Descriptives"));
1756 tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE,
1759 tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE,
1768 show_extremes (const struct variable **dependent_var,
1770 const struct xfactor *fctr)
1773 int heading_columns = 3;
1775 const int heading_rows = 1;
1776 struct tab_table *tbl;
1783 if ( fctr->indep_var[0] )
1785 heading_columns = 4;
1787 if ( fctr->indep_var[1] )
1789 heading_columns = 5;
1793 n_rows *= ll_count (&fctr->result_list) * cmd.st_n * 2;
1794 n_rows += heading_rows;
1796 n_cols = heading_columns + 2;
1798 tbl = tab_create (n_cols, n_rows, 0);
1799 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
1801 tab_dim (tbl, tab_natural_dimensions, NULL, NULL);
1803 /* Outline the box */
1808 n_cols - 1, n_rows - 1);
1811 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
1812 tab_hline (tbl, TAL_2, 1, n_cols - 1, heading_rows );
1813 tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows - 1);
1815 if ( fctr->indep_var[0])
1816 tab_text (tbl, 1, 0, TAT_TITLE, var_to_string (fctr->indep_var[0]));
1818 if ( fctr->indep_var[1])
1819 tab_text (tbl, 2, 0, TAT_TITLE, var_to_string (fctr->indep_var[1]));
1821 for (v = 0 ; v < n_dep_var ; ++v )
1825 const int row_var_start = v * cmd.st_n * 2 * ll_count(&fctr->result_list);
1829 heading_rows + row_var_start,
1830 TAB_LEFT | TAT_TITLE,
1831 var_to_string (dependent_var[v])
1834 for (ll = ll_head (&fctr->result_list);
1835 ll != ll_null (&fctr->result_list); i++, ll = ll_next (ll))
1840 const int row_result_start = i * cmd.st_n * 2;
1842 const struct factor_result *result =
1843 ll_data (ll, struct factor_result, ll);
1846 tab_hline (tbl, TAL_1, 1, n_cols - 1,
1847 heading_rows + row_var_start + row_result_start);
1849 tab_hline (tbl, TAL_1, heading_columns - 2, n_cols - 1,
1850 heading_rows + row_var_start + row_result_start + cmd.st_n);
1852 for ( e = 1; e <= cmd.st_n; ++e )
1854 tab_text (tbl, n_cols - 3,
1855 heading_rows + row_var_start + row_result_start + e - 1,
1856 TAB_RIGHT | TAT_PRINTF,
1859 tab_text (tbl, n_cols - 3,
1860 heading_rows + row_var_start + row_result_start + cmd.st_n + e - 1,
1861 TAB_RIGHT | TAT_PRINTF,
1866 min_ll = ll_head (extrema_list (result->metrics[v].minima));
1867 for (e = 0; e < cmd.st_n;)
1869 struct extremum *minimum = ll_data (min_ll, struct extremum, ll);
1870 double weight = minimum->weight;
1872 while (weight-- > 0 && e < cmd.st_n)
1874 tab_double (tbl, n_cols - 1,
1875 heading_rows + row_var_start + row_result_start + cmd.st_n + e,
1881 tab_fixed (tbl, n_cols - 2,
1882 heading_rows + row_var_start +
1883 row_result_start + cmd.st_n + e,
1890 min_ll = ll_next (min_ll);
1894 max_ll = ll_head (extrema_list (result->metrics[v].maxima));
1895 for (e = 0; e < cmd.st_n;)
1897 struct extremum *maximum = ll_data (max_ll, struct extremum, ll);
1898 double weight = maximum->weight;
1900 while (weight-- > 0 && e < cmd.st_n)
1902 tab_double (tbl, n_cols - 1,
1903 heading_rows + row_var_start +
1904 row_result_start + e,
1910 tab_fixed (tbl, n_cols - 2,
1911 heading_rows + row_var_start +
1912 row_result_start + e,
1919 max_ll = ll_next (max_ll);
1923 if ( fctr->indep_var[0])
1926 ds_init_empty (&vstr);
1927 var_append_value_name (fctr->indep_var[0],
1928 &result->value[0], &vstr);
1931 heading_rows + row_var_start + row_result_start,
1940 tab_text (tbl, n_cols - 4,
1941 heading_rows + row_var_start + row_result_start,
1945 tab_text (tbl, n_cols - 4,
1946 heading_rows + row_var_start + row_result_start + cmd.st_n,
1952 tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
1955 tab_title (tbl, _("Extreme Values"));
1958 tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE,
1962 tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE,
1968 #define PERCENTILE_ROWS 2
1971 show_percentiles (const struct variable **dependent_var,
1973 const struct xfactor *fctr)
1977 int heading_columns = 2;
1979 const int n_percentiles = subc_list_double_count (&percentile_list);
1980 const int heading_rows = 2;
1981 struct tab_table *tbl;
1988 if ( fctr->indep_var[0] )
1990 heading_columns = 3;
1992 if ( fctr->indep_var[1] )
1994 heading_columns = 4;
1998 n_rows *= ll_count (&fctr->result_list) * PERCENTILE_ROWS;
1999 n_rows += heading_rows;
2001 n_cols = heading_columns + n_percentiles;
2003 tbl = tab_create (n_cols, n_rows, 0);
2004 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
2006 tab_dim (tbl, tab_natural_dimensions, NULL, NULL);
2008 /* Outline the box */
2013 n_cols - 1, n_rows - 1);
2016 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
2017 tab_hline (tbl, TAL_2, 1, n_cols - 1, heading_rows );
2019 if ( fctr->indep_var[0])
2020 tab_text (tbl, 1, 1, TAT_TITLE, var_to_string (fctr->indep_var[0]));
2022 if ( fctr->indep_var[1])
2023 tab_text (tbl, 2, 1, TAT_TITLE, var_to_string (fctr->indep_var[1]));
2025 for (v = 0 ; v < n_dep_var ; ++v )
2031 const int row_var_start =
2032 v * PERCENTILE_ROWS * ll_count(&fctr->result_list);
2036 heading_rows + row_var_start,
2037 TAB_LEFT | TAT_TITLE,
2038 var_to_string (dependent_var[v])
2041 for (ll = ll_head (&fctr->result_list);
2042 ll != ll_null (&fctr->result_list); i++, ll = ll_next (ll))
2045 const struct factor_result *result =
2046 ll_data (ll, struct factor_result, ll);
2048 if ( i > 0 || v > 0 )
2050 const int left_col = (i == 0) ? 0 : 1;
2051 tab_hline (tbl, TAL_1, left_col, n_cols - 1,
2052 heading_rows + row_var_start + i * PERCENTILE_ROWS);
2055 if ( fctr->indep_var[0])
2058 ds_init_empty (&vstr);
2059 var_append_value_name (fctr->indep_var[0],
2060 &result->value[0], &vstr);
2063 heading_rows + row_var_start + i * PERCENTILE_ROWS,
2072 tab_text (tbl, n_cols - n_percentiles - 1,
2073 heading_rows + row_var_start + i * PERCENTILE_ROWS,
2075 ptile_alg_desc [percentile_algorithm]);
2078 tab_text (tbl, n_cols - n_percentiles - 1,
2079 heading_rows + row_var_start + 1 + i * PERCENTILE_ROWS,
2081 _("Tukey's Hinges"));
2084 tab_vline (tbl, TAL_1, n_cols - n_percentiles -1, heading_rows, n_rows - 1);
2086 tukey_hinges_calculate ((struct tukey_hinges *) result->metrics[v].tukey_hinges,
2089 for (j = 0; j < n_percentiles; ++j)
2091 double hinge = SYSMIS;
2092 tab_double (tbl, n_cols - n_percentiles + j,
2093 heading_rows + row_var_start + i * PERCENTILE_ROWS,
2095 percentile_calculate (result->metrics[v].ptl[j],
2096 percentile_algorithm),
2100 if ( result->metrics[v].ptl[j]->ptile == 0.5)
2102 else if ( result->metrics[v].ptl[j]->ptile == 0.25)
2104 else if ( result->metrics[v].ptl[j]->ptile == 0.75)
2107 if ( hinge != SYSMIS)
2108 tab_double (tbl, n_cols - n_percentiles + j,
2109 heading_rows + row_var_start + 1 + i * PERCENTILE_ROWS,
2119 tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
2121 tab_title (tbl, _("Percentiles"));
2124 for (i = 0 ; i < n_percentiles; ++i )
2126 tab_text (tbl, n_cols - n_percentiles + i, 1,
2127 TAB_CENTER | TAT_TITLE | TAT_PRINTF,
2129 subc_list_double_at (&percentile_list, i)
2135 tab_joint_text (tbl,
2136 n_cols - n_percentiles, 0,
2138 TAB_CENTER | TAT_TITLE,
2141 /* Vertical lines for the data only */
2145 n_cols - n_percentiles, 1,
2146 n_cols - 1, n_rows - 1);
2148 tab_hline (tbl, TAL_1, n_cols - n_percentiles, n_cols - 1, 1);
2156 factor_to_string_concise (const struct xfactor *fctr,
2157 const struct factor_result *result,
2161 if (fctr->indep_var[0])
2163 var_append_value_name (fctr->indep_var[0], &result->value[0], str);
2165 if ( fctr->indep_var[1] )
2167 ds_put_cstr (str, ",");
2169 var_append_value_name (fctr->indep_var[1], &result->value[1], str);
2171 ds_put_cstr (str, ")");
2178 factor_to_string (const struct xfactor *fctr,
2179 const struct factor_result *result,
2183 if (fctr->indep_var[0])
2185 ds_put_format (str, "(%s = ", var_get_name (fctr->indep_var[0]));
2187 var_append_value_name (fctr->indep_var[0], &result->value[0], str);
2189 if ( fctr->indep_var[1] )
2191 ds_put_cstr (str, ",");
2192 ds_put_format (str, "%s = ", var_get_name (fctr->indep_var[1]));
2194 var_append_value_name (fctr->indep_var[1], &result->value[1], str);
2196 ds_put_cstr (str, ")");