From 2acfe799af1fd4504ee1278e0b8864ace451688a Mon Sep 17 00:00:00 2001 From: John Darrington Date: Thu, 4 Sep 2008 19:57:06 +0800 Subject: [PATCH] Complete re-implementation of the EXAMINE command. The implementation now avoids storing large amounts of data on the heap. Instead it uses the casereader functionality and the casegrouper structure instead of hashes to store group data. Added documentation for the /ID subcommand which was missing. Better encapsulated the calculation of boxplot data, extreme values, histograms, NP plots and percentiles and others. Made the start of a common base interface for statistic calculations. Also updated the frequencies code to fit new interfaces. Added a test for bug #23852 --- doc/statistics.texi | 8 +- po/en_GB.po | 193 +- src/data/casewriter.c | 12 +- src/language/stats/examine.q | 2943 ++++++++++++++--------------- src/language/stats/frequencies.q | 35 +- src/libpspp/misc.h | 23 +- src/math/automake.mk | 31 +- src/math/factor-stats.c | 328 ---- src/math/factor-stats.h | 162 -- src/math/histogram.c | 65 +- src/math/histogram.h | 22 +- src/math/percentiles.c | 451 ++--- src/math/percentiles.h | 58 +- src/output/charts/box-whisker.c | 160 +- src/output/charts/box-whisker.h | 27 +- src/output/charts/plot-hist.c | 175 +- src/output/charts/plot-hist.h | 26 +- tests/automake.mk | 1 + tests/bugs/examine-missing2.sh | 38 +- tests/command/examine-extremes.sh | 4 +- tests/command/examine.sh | 18 +- 21 files changed, 1971 insertions(+), 2809 deletions(-) delete mode 100644 src/math/factor-stats.c delete mode 100644 src/math/factor-stats.h diff --git a/doc/statistics.texi b/doc/statistics.texi index 28398fb7..7b1d8c5f 100644 --- a/doc/statistics.texi +++ b/doc/statistics.texi @@ -232,7 +232,7 @@ EXAMINE /PLOT=@{BOXPLOT, NPPLOT, HISTOGRAM, ALL, NONE@} /CINTERVAL n /COMPARE=@{GROUPS,VARIABLES@} - /ID=@{case_number, var_name@} + /ID=var_name /@{TOTAL,NOTOTAL@} /PERCENTILE=[value_list]=@{HAVERAGE, WAVERAGE, ROUND, AEMPIRICAL, EMPIRICAL @} /MISSING=@{LISTWISE, PAIRWISE@} [@{EXCLUDE, INCLUDE@}] @@ -271,6 +271,12 @@ If /COMPARE=VARIABLES is specified, then one plot per factor is produced, each each containing one boxplot per dependent variable. If the /COMPARE subcommand is ommitted, then PSPP uses the default value of /COMPARE=GROUPS. + +The ID subcommand also pertains to boxplots. If given, it must +specify a variable name. Outliers and extreme cases plotted in +boxplots will be labelled with the case from that variable. Numeric or +string variables are permissible. If the ID subcommand is not given, +then the casenumber will be used for labelling. The CINTERVAL subcommand specifies the confidence interval to use in calculation of the descriptives command. The default it 95%. diff --git a/po/en_GB.po b/po/en_GB.po index 2c3bc865..1b8c6bf5 100644 --- a/po/en_GB.po +++ b/po/en_GB.po @@ -7,7 +7,7 @@ msgid "" msgstr "" "Project-Id-Version: PSPP 0.4.3\n" "Report-Msgid-Bugs-To: pspp-dev@gnu.org\n" -"POT-Creation-Date: 2008-08-23 08:35+0800\n" +"POT-Creation-Date: 2008-09-04 19:43+0800\n" "PO-Revision-Date: 2007-09-15 08:29+0800\n" "Last-Translator: John Darrington \n" "Language-Team: John Darrington \n" @@ -1872,8 +1872,8 @@ msgstr "" #: src/language/dictionary/sys-file-info.c:563 #: src/language/stats/crosstabs.q:1155 src/language/stats/crosstabs.q:1182 #: src/language/stats/crosstabs.q:1202 src/language/stats/crosstabs.q:1224 -#: src/language/stats/examine.q:1198 src/language/stats/frequencies.q:1060 -#: src/language/stats/frequencies.q:1184 +#: src/language/stats/examine.q:1948 src/language/stats/frequencies.q:1055 +#: src/language/stats/frequencies.q:1179 msgid "Value" msgstr "" @@ -2635,7 +2635,7 @@ msgstr "" #: src/language/stats/binomial.c:203 src/language/stats/chisquare.c:223 #: src/language/stats/chisquare.c:283 src/language/stats/crosstabs.q:862 #: src/language/stats/crosstabs.q:1062 src/language/stats/crosstabs.q:1785 -#: src/language/stats/examine.q:918 src/language/stats/frequencies.q:1137 +#: src/language/stats/examine.q:1207 src/language/stats/frequencies.q:1132 #: src/language/stats/oneway.q:306 src/language/stats/oneway.q:476 #: src/language/stats/regression.q:309 src/ui/gui/crosstabs-dialog.c:59 msgid "Total" @@ -2647,7 +2647,7 @@ msgid "Category" msgstr "" #: src/language/stats/binomial.c:236 src/language/stats/crosstabs.q:872 -#: src/language/stats/examine.q:993 src/language/stats/frequencies.q:1405 +#: src/language/stats/examine.q:1280 src/language/stats/frequencies.q:1400 #: src/language/stats/npar-summary.c:122 src/language/stats/oneway.q:391 #: src/language/stats/t-test.q:693 src/language/stats/t-test.q:716 #: src/language/stats/t-test.q:850 src/language/stats/t-test.q:1387 @@ -2740,24 +2740,24 @@ msgstr "" msgid "Summary." msgstr "" -#: src/language/stats/crosstabs.q:859 src/language/stats/examine.q:981 +#: src/language/stats/crosstabs.q:859 src/language/stats/examine.q:1268 msgid "Cases" msgstr "" -#: src/language/stats/crosstabs.q:860 src/language/stats/examine.q:916 -#: src/language/stats/frequencies.q:1058 src/language/stats/frequencies.q:1406 +#: src/language/stats/crosstabs.q:860 src/language/stats/examine.q:1205 +#: src/language/stats/frequencies.q:1053 src/language/stats/frequencies.q:1401 msgid "Valid" msgstr "" -#: src/language/stats/crosstabs.q:861 src/language/stats/examine.q:917 -#: src/language/stats/frequencies.q:1128 src/language/stats/frequencies.q:1407 +#: src/language/stats/crosstabs.q:861 src/language/stats/examine.q:1206 +#: src/language/stats/frequencies.q:1123 src/language/stats/frequencies.q:1402 #: src/ui/gui/psppire-var-sheet.c:106 msgid "Missing" msgstr "" -#: src/language/stats/crosstabs.q:873 src/language/stats/examine.q:996 -#: src/language/stats/frequencies.q:1062 src/language/stats/frequencies.q:1063 -#: src/language/stats/frequencies.q:1064 +#: src/language/stats/crosstabs.q:873 src/language/stats/examine.q:1283 +#: src/language/stats/frequencies.q:1057 src/language/stats/frequencies.q:1058 +#: src/language/stats/frequencies.q:1059 msgid "Percent" msgstr "" @@ -2799,7 +2799,7 @@ msgstr "" #: src/language/stats/crosstabs.q:1154 src/language/stats/crosstabs.q:1181 #: src/language/stats/crosstabs.q:1201 src/language/stats/crosstabs.q:1222 -#: src/language/stats/examine.q:1442 src/ui/gui/checkbox-treeview.c:94 +#: src/language/stats/examine.q:1745 src/ui/gui/checkbox-treeview.c:94 msgid "Statistic" msgstr "" @@ -2989,7 +2989,7 @@ msgstr "" msgid "%s Dependent" msgstr "" -#: src/language/stats/descriptives.c:102 src/language/stats/examine.q:1556 +#: src/language/stats/descriptives.c:102 src/language/stats/examine.q:1550 #: src/language/stats/frequencies.q:123 src/language/stats/npar-summary.c:125 #: src/language/stats/oneway.q:392 src/language/stats/t-test.q:694 #: src/language/stats/t-test.q:717 src/language/stats/t-test.q:849 @@ -3006,13 +3006,13 @@ msgstr "" msgid "Std Dev" msgstr "" -#: src/language/stats/descriptives.c:105 src/language/stats/examine.q:1636 +#: src/language/stats/descriptives.c:105 src/language/stats/examine.q:1581 #: src/language/stats/frequencies.q:128 src/ui/gui/descriptives-dialog.c:46 #: src/ui/gui/frequencies-dialog.c:45 msgid "Variance" msgstr "" -#: src/language/stats/descriptives.c:106 src/language/stats/examine.q:1743 +#: src/language/stats/descriptives.c:106 src/language/stats/examine.q:1617 #: src/language/stats/frequencies.q:129 src/ui/gui/descriptives-dialog.c:47 #: src/ui/gui/frequencies-dialog.c:50 msgid "Kurtosis" @@ -3022,7 +3022,7 @@ msgstr "" msgid "S E Kurt" msgstr "" -#: src/language/stats/descriptives.c:108 src/language/stats/examine.q:1723 +#: src/language/stats/descriptives.c:108 src/language/stats/examine.q:1612 #: src/language/stats/frequencies.q:131 src/ui/gui/descriptives-dialog.c:48 #: src/ui/gui/frequencies-dialog.c:46 msgid "Skewness" @@ -3032,20 +3032,20 @@ msgstr "" msgid "S E Skew" msgstr "" -#: src/language/stats/descriptives.c:110 src/language/stats/examine.q:1684 +#: src/language/stats/descriptives.c:110 src/language/stats/examine.q:1601 #: src/language/stats/frequencies.q:133 src/ui/gui/descriptives-dialog.c:43 #: src/ui/gui/frequencies-dialog.c:48 msgid "Range" msgstr "" -#: src/language/stats/descriptives.c:111 src/language/stats/examine.q:1661 +#: src/language/stats/descriptives.c:111 src/language/stats/examine.q:1591 #: src/language/stats/frequencies.q:134 src/language/stats/npar-summary.c:131 #: src/language/stats/oneway.q:404 src/ui/gui/descriptives-dialog.c:41 #: src/ui/gui/frequencies-dialog.c:42 msgid "Minimum" msgstr "" -#: src/language/stats/descriptives.c:112 src/language/stats/examine.q:1672 +#: src/language/stats/descriptives.c:112 src/language/stats/examine.q:1596 #: src/language/stats/frequencies.q:135 src/language/stats/npar-summary.c:134 #: src/language/stats/oneway.q:405 src/ui/gui/descriptives-dialog.c:42 #: src/ui/gui/frequencies-dialog.c:43 @@ -3102,119 +3102,132 @@ msgstr "" msgid "Valid cases = %g; cases with missing value(s) = %g." msgstr "" -#: src/language/stats/examine.q:288 src/language/stats/examine.q:291 -#, c-format -msgid "%s is not currently supported." +#: src/language/stats/examine.q:337 src/language/stats/examine.q:490 +#: src/language/stats/examine.q:1047 +msgid "Not creating plot because data set is empty." msgstr "" -#: src/language/stats/examine.q:501 src/language/stats/examine.q:514 +#: src/language/stats/examine.q:347 #, c-format -msgid "%s and %s are mutually exclusive" +msgid "Normal Q-Q Plot of %s" msgstr "" -#: src/language/stats/examine.q:976 -msgid "Case Processing Summary" +#: src/language/stats/examine.q:348 src/language/stats/examine.q:353 +msgid "Observed Value" msgstr "" -#: src/language/stats/examine.q:1183 -msgid "Extreme Values" +#: src/language/stats/examine.q:349 +msgid "Expected Normal" msgstr "" -#: src/language/stats/examine.q:1199 -msgid "Case Number" +#: src/language/stats/examine.q:351 +#, c-format +msgid "Detrended Normal Q-Q Plot of %s" msgstr "" -#: src/language/stats/examine.q:1297 -msgid "Highest" +#: src/language/stats/examine.q:354 +msgid "Dev from Normal" msgstr "" -#: src/language/stats/examine.q:1302 -msgid "Lowest" +#: src/language/stats/examine.q:507 +#, c-format +msgid "Boxplot of %s vs. %s" msgstr "" -#: src/language/stats/examine.q:1443 src/language/stats/oneway.q:394 -#: src/language/stats/oneway.q:692 src/language/stats/regression.q:203 -msgid "Std. Error" +#: src/language/stats/examine.q:511 +#, c-format +msgid "Boxplot of %s" msgstr "" -#: src/language/stats/examine.q:1445 src/language/stats/oneway.q:408 -#: src/ui/gui/examine.glade:307 -msgid "Descriptives" +#: src/language/stats/examine.q:747 src/language/stats/examine.q:760 +#, c-format +msgid "%s and %s are mutually exclusive" +msgstr "" + +#: src/language/stats/examine.q:1263 +msgid "Case Processing Summary" msgstr "" -#: src/language/stats/examine.q:1574 src/language/stats/oneway.q:399 +#: src/language/stats/examine.q:1555 src/language/stats/oneway.q:399 #, c-format msgid "%g%% Confidence Interval for Mean" msgstr "" -#: src/language/stats/examine.q:1580 src/language/stats/oneway.q:401 +#: src/language/stats/examine.q:1561 src/language/stats/oneway.q:401 msgid "Lower Bound" msgstr "" -#: src/language/stats/examine.q:1591 src/language/stats/oneway.q:402 +#: src/language/stats/examine.q:1566 src/language/stats/oneway.q:402 msgid "Upper Bound" msgstr "" -#: src/language/stats/examine.q:1603 +#: src/language/stats/examine.q:1571 #, c-format msgid "5%% Trimmed Mean" msgstr "" -#: src/language/stats/examine.q:1614 src/language/stats/frequencies.q:125 +#: src/language/stats/examine.q:1576 src/language/stats/frequencies.q:125 #: src/ui/gui/frequencies-dialog.c:52 msgid "Median" msgstr "" -#: src/language/stats/examine.q:1648 src/language/stats/npar-summary.c:128 +#: src/language/stats/examine.q:1586 src/language/stats/npar-summary.c:128 #: src/language/stats/oneway.q:393 src/language/stats/t-test.q:695 #: src/language/stats/t-test.q:718 src/language/stats/t-test.q:851 #: src/language/stats/t-test.q:1188 msgid "Std. Deviation" msgstr "" -#: src/language/stats/examine.q:1696 +#: src/language/stats/examine.q:1606 msgid "Interquartile Range" msgstr "" -#: src/language/stats/examine.q:1850 -#, c-format -msgid "Boxplot of %s vs. %s" +#: src/language/stats/examine.q:1742 src/language/stats/oneway.q:408 +#: src/ui/gui/examine.glade:307 +msgid "Descriptives" msgstr "" -#: src/language/stats/examine.q:1877 -msgid "Boxplot" +#: src/language/stats/examine.q:1748 src/language/stats/oneway.q:394 +#: src/language/stats/oneway.q:692 src/language/stats/regression.q:203 +msgid "Std. Error" msgstr "" -#: src/language/stats/examine.q:1919 +#: src/language/stats/examine.q:1845 src/language/stats/examine.q:1850 +#: src/ui/gui/psppire-var-store.c:675 src/ui/gui/psppire-var-store.c:685 +#: src/ui/gui/psppire-var-store.c:695 #, c-format -msgid "Normal Q-Q Plot of %s" +msgid "%d" msgstr "" -#: src/language/stats/examine.q:1920 src/language/stats/examine.q:1926 -msgid "Observed Value" +#: src/language/stats/examine.q:1928 +msgid "Highest" msgstr "" -#: src/language/stats/examine.q:1921 -msgid "Expected Normal" +#: src/language/stats/examine.q:1933 +msgid "Lowest" msgstr "" -#: src/language/stats/examine.q:1924 -#, c-format -msgid "Detrended Normal Q-Q Plot of %s" +#: src/language/stats/examine.q:1940 +msgid "Extreme Values" msgstr "" -#: src/language/stats/examine.q:1927 -msgid "Dev from Normal" +#: src/language/stats/examine.q:1944 +msgid "Case Number" msgstr "" -#: src/language/stats/examine.q:2046 src/language/stats/examine.q:2068 -#: src/language/stats/frequencies.q:1417 src/language/stats/npar-summary.c:141 +#: src/language/stats/examine.q:2066 +msgid "Tukey's Hinges" +msgstr "" + +#: src/language/stats/examine.q:2106 src/language/stats/examine.q:2124 +#: src/language/stats/frequencies.q:1412 src/language/stats/npar-summary.c:141 #: src/ui/gui/examine.glade:328 msgid "Percentiles" msgstr "" -#: src/language/stats/examine.q:2204 -msgid "Tukey's Hinges" +#: src/language/stats/examine.q:2113 +#, c-format +msgid "%g" msgstr "" #: src/language/stats/flip.c:96 @@ -3317,52 +3330,52 @@ msgid "" "MIN was specified as %g and MAX as %g. MIN and MAX will be ignored." msgstr "" -#: src/language/stats/frequencies.q:759 +#: src/language/stats/frequencies.q:754 #, c-format msgid "Variable %s specified multiple times on VARIABLES subcommand." msgstr "" -#: src/language/stats/frequencies.q:822 +#: src/language/stats/frequencies.q:817 msgid "`)' expected after GROUPED interval list." msgstr "" -#: src/language/stats/frequencies.q:834 +#: src/language/stats/frequencies.q:829 #, c-format msgid "Variables %s specified on GROUPED but not on VARIABLES." msgstr "" -#: src/language/stats/frequencies.q:841 +#: src/language/stats/frequencies.q:836 #, c-format msgid "Variables %s specified multiple times on GROUPED subcommand." msgstr "" -#: src/language/stats/frequencies.q:1059 src/language/stats/frequencies.q:1152 -#: src/language/stats/frequencies.q:1153 src/language/stats/frequencies.q:1187 +#: src/language/stats/frequencies.q:1054 src/language/stats/frequencies.q:1147 +#: src/language/stats/frequencies.q:1148 src/language/stats/frequencies.q:1182 msgid "Cum" msgstr "" -#: src/language/stats/frequencies.q:1061 src/output/charts/plot-hist.c:126 +#: src/language/stats/frequencies.q:1056 src/output/charts/plot-hist.c:138 msgid "Frequency" msgstr "" -#: src/language/stats/frequencies.q:1082 +#: src/language/stats/frequencies.q:1077 msgid "Value Label" msgstr "" -#: src/language/stats/frequencies.q:1185 +#: src/language/stats/frequencies.q:1180 msgid "Freq" msgstr "" -#: src/language/stats/frequencies.q:1186 src/language/stats/frequencies.q:1188 +#: src/language/stats/frequencies.q:1181 src/language/stats/frequencies.q:1183 msgid "Pct" msgstr "" -#: src/language/stats/frequencies.q:1379 +#: src/language/stats/frequencies.q:1374 #, c-format msgid "No valid data for variable %s; statistics not displayed." msgstr "" -#: src/language/stats/frequencies.q:1421 +#: src/language/stats/frequencies.q:1416 msgid "50 (Median)" msgstr "" @@ -4117,23 +4130,23 @@ msgstr "" msgid "hash table:" msgstr "" -#: src/math/percentiles.c:41 +#: src/math/percentiles.c:35 msgid "HAverage" msgstr "" -#: src/math/percentiles.c:42 +#: src/math/percentiles.c:36 msgid "Weighted Average" msgstr "" -#: src/math/percentiles.c:43 +#: src/math/percentiles.c:37 msgid "Rounded" msgstr "" -#: src/math/percentiles.c:44 +#: src/math/percentiles.c:38 msgid "Empirical" msgstr "" -#: src/math/percentiles.c:45 +#: src/math/percentiles.c:39 msgid "Empirical with averaging" msgstr "" @@ -4283,7 +4296,7 @@ msgstr "" msgid "creating \"%s\"" msgstr "" -#: src/output/charts/plot-hist.c:124 +#: src/output/charts/plot-hist.c:136 msgid "HISTOGRAM" msgstr "" @@ -5856,12 +5869,6 @@ msgstr "" msgid "Custom" msgstr "" -#: src/ui/gui/psppire-var-store.c:675 src/ui/gui/psppire-var-store.c:685 -#: src/ui/gui/psppire-var-store.c:695 -#, c-format -msgid "%d" -msgstr "" - #: src/ui/gui/rank.glade:111 msgid "By:" msgstr "" diff --git a/src/data/casewriter.c b/src/data/casewriter.c index a30e50e2..4461d85e 100644 --- a/src/data/casewriter.c +++ b/src/data/casewriter.c @@ -91,8 +91,7 @@ casewriter_get_value_cnt (const struct casewriter *writer) struct casereader * casewriter_make_reader (struct casewriter *writer) { - struct casereader *reader; - reader = writer->class->convert_to_reader (writer, writer->aux); + struct casereader *reader = writer->class->convert_to_reader (writer, writer->aux); taint_propagate (writer->taint, casereader_get_taint (reader)); taint_destroy (writer->taint); free (writer); @@ -241,10 +240,11 @@ casewriter_window_convert_to_reader (struct casewriter *writer UNUSED, void *window_) { struct casewindow *window = window_; - struct casereader *reader; - reader = casereader_create_random (casewindow_get_value_cnt (window), - casewindow_get_case_cnt (window), - &casereader_window_class, window); + struct casereader *reader = + casereader_create_random (casewindow_get_value_cnt (window), + casewindow_get_case_cnt (window), + &casereader_window_class, window); + taint_propagate (casewindow_get_taint (window), casereader_get_taint (reader)); return reader; diff --git a/src/language/stats/examine.q b/src/language/stats/examine.q index 9315e7e8..7f197ec3 100644 --- a/src/language/stats/examine.q +++ b/src/language/stats/examine.q @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2004 Free Software Foundation, Inc. + Copyright (C) 2004, 2008 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,9 +22,19 @@ #include #include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include +#include +#include #include #include #include @@ -37,9 +47,7 @@ #include #include #include -#include #include -#include #include #include #include @@ -56,6 +64,7 @@ #include #include #include +#include /* (specification) "EXAMINE" (xmn_): @@ -63,8 +72,8 @@ +total=custom; +nototal=custom; missing=miss:pairwise/!listwise, - rep:report/!noreport, - incl:include/!exclude; + rep:report/!noreport, + incl:include/!exclude; +compare=cmp:variables/!groups; +percentiles=custom; +id=var; @@ -78,73 +87,140 @@ /* (functions) */ - static struct cmd_examine cmd; static const struct variable **dependent_vars; - static size_t n_dependent_vars; +/* PERCENTILES */ + +static subc_list_double percentile_list; +static enum pc_alg percentile_algorithm; -struct factor +struct factor_metrics { - /* The independent variable */ - struct variable *indep_var[2]; + struct moments1 *moments; + + struct percentile **ptl; + size_t n_ptiles; + + struct statistic *tukey_hinges; + struct statistic *box_whisker; + struct statistic *trimmed_mean; + struct statistic *histogram; + struct order_stats *np; + + /* Three quartiles indexing into PTL */ + struct percentile **quartiles; + + /* A reader sorted in ASCENDING order */ + struct casereader *up_reader; + + /* The minimum value of all the weights */ + double cmin; + + /* Sum of all weights, including those for missing values */ + double n; + + double mean; + double variance; - /* Hash table of factor stats indexed by 2 values */ - struct hsh_table *fstats; + double skewness; - /* The hash table after it has been crunched */ - struct factor_statistics **fs; + double kurtosis; - struct factor *next; + double se_mean; + struct extrema *minima; + struct extrema *maxima; }; -/* Linked list of factors */ -static struct factor *factors = 0; +struct factor_result +{ + struct ll ll; -static struct metrics *totals = 0; + union value *value[2]; -/* Parse the clause specifying the factors */ -static int examine_parse_independent_vars (struct lexer *lexer, const struct dictionary *dict, struct cmd_examine *cmd); + /* An array of factor metrics, one for each variable */ + struct factor_metrics *metrics; +}; +struct xfactor +{ + /* We need to make a list of this structure */ + struct ll ll; + /* The independent variable */ + const struct variable const* indep_var[2]; -/* Output functions */ -static void show_summary (const struct variable **dependent_var, int n_dep_var, - const struct factor *f); + /* A list of results for this factor */ + struct ll_list result_list ; +}; -static void show_extremes (const struct variable **dependent_var, - int n_dep_var, - const struct factor *factor, - int n_extremities); -static void show_descriptives (const struct variable **dependent_var, - int n_dep_var, - struct factor *factor); +static void +factor_destroy (struct xfactor *fctr) +{ + struct ll *ll = ll_head (&fctr->result_list); + while (ll != ll_null (&fctr->result_list)) + { + int v; + struct factor_result *result = + ll_data (ll, struct factor_result, ll); -static void show_percentiles (const struct variable **dependent_var, - int n_dep_var, - struct factor *factor); + for (v = 0; v < n_dependent_vars; ++v) + { + int i; + moments1_destroy (result->metrics[v].moments); + extrema_destroy (result->metrics[v].minima); + extrema_destroy (result->metrics[v].maxima); + statistic_destroy (result->metrics[v].trimmed_mean); + statistic_destroy (result->metrics[v].tukey_hinges); + statistic_destroy (result->metrics[v].box_whisker); + statistic_destroy (result->metrics[v].histogram); + for (i = 0 ; i < result->metrics[v].n_ptiles; ++i) + statistic_destroy ((struct statistic *) result->metrics[v].ptl[i]); + free (result->metrics[v].ptl); + free (result->metrics[v].quartiles); + casereader_destroy (result->metrics[v].up_reader); + } + free (result->value[0]); + free (result->value[1]); + free (result->metrics); + ll = ll_next (ll); + free (result); + } +} +static struct xfactor level0_factor; +static struct ll_list factor_list = LL_INITIALIZER (factor_list); +/* Parse the clause specifying the factors */ +static int examine_parse_independent_vars (struct lexer *lexer, + const struct dictionary *dict, + struct cmd_examine *cmd); -void np_plot (const struct metrics *m, const char *factorname); +/* Output functions */ +static void show_summary (const struct variable **dependent_var, int n_dep_var, + const struct xfactor *f); -void box_plot_group (const struct factor *fctr, - const struct variable **vars, int n_vars, - const struct variable *id - ) ; +static void show_descriptives (const struct variable **dependent_var, + int n_dep_var, + const struct xfactor *f); -void box_plot_variables (const struct factor *fctr, - const struct variable **vars, int n_vars, - const struct variable *id - ); +static void show_percentiles (const struct variable **dependent_var, + int n_dep_var, + const struct xfactor *f); + + +static void show_extremes (const struct variable **dependent_var, + int n_dep_var, + const struct xfactor *f); + @@ -161,34 +237,24 @@ void factor_calc (const struct ccase *c, int case_no, /* Represent a factor as a string, so it can be printed in a human readable fashion */ -static void factor_to_string (const struct factor *fctr, - const struct factor_statistics *fs, - const struct variable *var, - struct string *str - ); +static void factor_to_string (const struct xfactor *fctr, + const struct factor_result *result, + struct string *str); /* Represent a factor as a string, so it can be printed in a human readable fashion, but sacrificing some readablility for the sake of brevity */ -static void factor_to_string_concise (const struct factor *fctr, - const struct factor_statistics *fs, - struct string *); - +static void +factor_to_string_concise (const struct xfactor *fctr, + const struct factor_result *result, + struct string *str + ); /* Categories of missing values to exclude. */ static enum mv_class exclude_values; -/* PERCENTILES */ - -static subc_list_double percentile_list; - -static enum pc_alg percentile_algorithm; - -static short sbc_percentile; - - int cmd_examine (struct lexer *lexer, struct dataset *ds) { @@ -224,225 +290,404 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) } grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds)); + while (casegrouper_get_next_group (grouper, &group)) - run_examine (&cmd, group, ds); + { + struct casereader *reader = + casereader_create_arithmetic_sequence (group, 1, 1); + + run_examine (&cmd, reader, ds); + } + ok = casegrouper_destroy (grouper); ok = proc_commit (ds) && ok; - if ( totals ) + if ( dependent_vars ) + free (dependent_vars); + + subc_list_double_destroy (&percentile_list); + + return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE; +}; + + +/* Plot the normal and detrended normal plots for RESULT. + Label the plots with LABEL */ +static void +np_plot (struct np *np, const char *label) +{ + double yfirst = 0, ylast = 0; + + double x_lower; + double x_upper; + double slack; + + /* Normal Plot */ + struct chart *np_chart; + + /* Detrended Normal Plot */ + struct chart *dnp_chart; + + /* The slope and intercept of the ideal normal probability line */ + const double slope = 1.0 / np->stddev; + const double intercept = -np->mean / np->stddev; + + if ( np->n < 1.0 ) { - free ( totals ); + msg (MW, _("Not creating plot because data set is empty.")); + return ; } - if ( dependent_vars ) - free (dependent_vars); + np_chart = chart_create (); + dnp_chart = chart_create (); + + if ( !np_chart || ! dnp_chart ) + return ; + + chart_write_title (np_chart, _("Normal Q-Q Plot of %s"), label); + chart_write_xlabel (np_chart, _("Observed Value")); + chart_write_ylabel (np_chart, _("Expected Normal")); + + chart_write_title (dnp_chart, _("Detrended Normal Q-Q Plot of %s"), + label); + chart_write_xlabel (dnp_chart, _("Observed Value")); + chart_write_ylabel (dnp_chart, _("Dev from Normal")); + + yfirst = gsl_cdf_ugaussian_Pinv (1 / (np->n + 1)); + ylast = gsl_cdf_ugaussian_Pinv (np->n / (np->n + 1)); + + /* Need to make sure that both the scatter plot and the ideal fit into the + plot */ + x_lower = MIN (np->y_min, (yfirst - intercept) / slope) ; + x_upper = MAX (np->y_max, (ylast - intercept) / slope) ; + slack = (x_upper - x_lower) * 0.05 ; + + chart_write_xscale (np_chart, x_lower - slack, x_upper + slack, 5); + chart_write_xscale (dnp_chart, np->y_min, np->y_max, 5); + + chart_write_yscale (np_chart, yfirst, ylast, 5); + chart_write_yscale (dnp_chart, np->dns_min, np->dns_max, 5); { - struct factor *f = factors ; - while ( f ) + struct ccase c; + struct casereader *reader = casewriter_make_reader (np->writer); + while (casereader_read (reader, &c)) { - struct factor *ff = f; + chart_datum (np_chart, 0, case_data_idx (&c, NP_IDX_Y)->f, case_data_idx (&c, NP_IDX_NS)->f); + chart_datum (dnp_chart, 0, case_data_idx (&c, NP_IDX_Y)->f, case_data_idx (&c, NP_IDX_DNS)->f); - f = f->next; - free ( ff->fs ); - hsh_destroy ( ff->fstats ) ; - free ( ff ) ; + case_destroy (&c); } - factors = 0; + casereader_destroy (reader); } - subc_list_double_destroy (&percentile_list); - - return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE; -}; + chart_line (dnp_chart, 0, 0, np->y_min, np->y_max , CHART_DIM_X); + chart_line (np_chart, slope, intercept, yfirst, ylast , CHART_DIM_Y); + chart_submit (np_chart); + chart_submit (dnp_chart); +} -/* Show all the appropriate tables */ static void -output_examine (void) +show_npplot (const struct variable **dependent_var, + int n_dep_var, + const struct xfactor *fctr) { - struct factor *fctr; + int v; - /* Show totals if appropriate */ - if ( ! cmd.sbc_nototal || factors == 0 ) + for (v = 0; v < n_dep_var; ++v) { - show_summary (dependent_vars, n_dependent_vars, 0); + struct ll *ll; + for (ll = ll_head (&fctr->result_list); + ll != ll_null (&fctr->result_list); + ll = ll_next (ll)) + { + struct string str; + const struct factor_result *result = + ll_data (ll, struct factor_result, ll); + + ds_init_empty (&str); + ds_put_format (&str, "%s ", var_get_name (dependent_var[v])); + + factor_to_string (fctr, result, &str); + + np_plot ((struct np*) result->metrics[v].np, ds_cstr(&str)); + + statistic_destroy ((struct statistic *)result->metrics[v].np); - if ( cmd.sbc_statistics ) + ds_destroy (&str); + } + } +} + + +static void +show_histogram (const struct variable **dependent_var, + int n_dep_var, + const struct xfactor *fctr) +{ + int v; + + for (v = 0; v < n_dep_var; ++v) + { + struct ll *ll; + for (ll = ll_head (&fctr->result_list); + ll != ll_null (&fctr->result_list); + ll = ll_next (ll)) { - if ( cmd.a_statistics[XMN_ST_EXTREME]) - show_extremes (dependent_vars, n_dependent_vars, 0, cmd.st_n); + struct string str; + const struct factor_result *result = + ll_data (ll, struct factor_result, ll); + + ds_init_empty (&str); + ds_put_format (&str, "%s ", var_get_name (dependent_var[v])); + + factor_to_string (fctr, result, &str); - if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES]) - show_descriptives (dependent_vars, n_dependent_vars, 0); + histogram_plot ((struct histogram *) result->metrics[v].histogram, + ds_cstr (&str), + (struct moments1 *) result->metrics[v].moments); + ds_destroy (&str); } - if ( sbc_percentile ) - show_percentiles (dependent_vars, n_dependent_vars, 0); + } +} + + - if ( cmd.sbc_plot) +static void +show_boxplot_groups (const struct variable **dependent_var, + int n_dep_var, + const struct xfactor *fctr) +{ + int v; + + for (v = 0; v < n_dep_var; ++v) + { + struct ll *ll; + int f = 0; + struct chart *ch = chart_create (); + double y_min = DBL_MAX; + double y_max = -DBL_MAX; + + for (ll = ll_head (&fctr->result_list); + ll != ll_null (&fctr->result_list); + ll = ll_next (ll)) { - int v; - if ( cmd.a_plot[XMN_PLT_STEMLEAF] ) - msg (SW, _ ("%s is not currently supported."), "STEMLEAF"); + const struct extremum *max, *min; + const struct factor_result *result = + ll_data (ll, struct factor_result, ll); - if ( cmd.a_plot[XMN_PLT_SPREADLEVEL] ) - msg (SW, _ ("%s is not currently supported."), "SPREADLEVEL"); + const struct ll_list *max_list = + extrema_list (result->metrics[v].maxima); - if ( cmd.a_plot[XMN_PLT_NPPLOT] ) - { - for ( v = 0 ; v < n_dependent_vars; ++v ) - np_plot (&totals[v], var_to_string (dependent_vars[v])); - } + const struct ll_list *min_list = + extrema_list (result->metrics[v].minima); - if ( cmd.a_plot[XMN_PLT_BOXPLOT] ) + if ( ll_is_empty (max_list)) { - if ( cmd.cmp == XMN_GROUPS ) - { - box_plot_group (0, (const struct variable **) dependent_vars, - n_dependent_vars, cmd.v_id); - } - else - box_plot_variables (0, - (const struct variable **) dependent_vars, - n_dependent_vars, cmd.v_id); + msg (MW, _("Not creating plot because data set is empty.")); + continue; } - if ( cmd.a_plot[XMN_PLT_HISTOGRAM] ) - { - for ( v = 0 ; v < n_dependent_vars; ++v ) - { - struct normal_curve normal; + max = (const struct extremum *) + ll_data (ll_head(max_list), struct extremum, ll); - normal.N = totals[v].n; - normal.mean = totals[v].mean; - normal.stddev = totals[v].stddev; + min = (const struct extremum *) + ll_data (ll_head (min_list), struct extremum, ll); - histogram_plot (totals[v].histogram, - var_to_string (dependent_vars[v]), - &normal, 0); - } - } + y_max = MAX (y_max, max->value); + y_min = MIN (y_min, min->value); + } + + boxplot_draw_yscale (ch, y_max, y_min); + + if ( fctr->indep_var[0]) + chart_write_title (ch, _("Boxplot of %s vs. %s"), + var_to_string (dependent_var[v]), + var_to_string (fctr->indep_var[0]) ); + else + chart_write_title (ch, _("Boxplot of %s"), + var_to_string (dependent_var[v])); + + for (ll = ll_head (&fctr->result_list); + ll != ll_null (&fctr->result_list); + ll = ll_next (ll)) + { + const struct factor_result *result = + ll_data (ll, struct factor_result, ll); + struct string str; + const double box_width = (ch->data_right - ch->data_left) + / (ll_count (&fctr->result_list) * 2.0 ) ; + + const double box_centre = (f++ * 2 + 1) * box_width + ch->data_left; + + ds_init_empty (&str); + factor_to_string_concise (fctr, result, &str); + + boxplot_draw_boxplot (ch, + box_centre, box_width, + (const struct box_whisker *) + result->metrics[v].box_whisker, + ds_cstr (&str)); + + ds_destroy (&str); } + chart_submit (ch); } +} - /* Show grouped statistics as appropriate */ - fctr = factors; - while ( fctr ) - { - show_summary (dependent_vars, n_dependent_vars, fctr); - if ( cmd.sbc_statistics ) - { - if ( cmd.a_statistics[XMN_ST_EXTREME]) - show_extremes (dependent_vars, n_dependent_vars, fctr, cmd.st_n); +static void +show_boxplot_variables (const struct variable **dependent_var, + int n_dep_var, + const struct xfactor *fctr + ) - if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES]) - show_descriptives (dependent_vars, n_dependent_vars, fctr); - } +{ + int v; + struct ll *ll; + const struct ll_list *result_list = &fctr->result_list; + + for (ll = ll_head (result_list); + ll != ll_null (result_list); + ll = ll_next (ll)) + + { + struct string title; + struct chart *ch = chart_create (); + double y_min = DBL_MAX; + double y_max = -DBL_MAX; - if ( sbc_percentile ) - show_percentiles (dependent_vars, n_dependent_vars, fctr); + const struct factor_result *result = + ll_data (ll, struct factor_result, ll); + const double box_width = (ch->data_right - ch->data_left) + / (n_dep_var * 2.0 ) ; - if ( cmd.sbc_plot) + for (v = 0; v < n_dep_var; ++v) { - size_t v; + const struct ll *max_ll = + ll_head (extrema_list (result->metrics[v].maxima)); + const struct ll *min_ll = + ll_head (extrema_list (result->metrics[v].minima)); - struct factor_statistics **fs = fctr->fs ; + const struct extremum *max = + (const struct extremum *) ll_data (max_ll, struct extremum, ll); - if ( cmd.a_plot[XMN_PLT_BOXPLOT] ) - { - if ( cmd.cmp == XMN_VARIABLES ) - box_plot_variables (fctr, - (const struct variable **) dependent_vars, - n_dependent_vars, cmd.v_id); - else - box_plot_group (fctr, - (const struct variable **) dependent_vars, - n_dependent_vars, cmd.v_id); - } + const struct extremum *min = + (const struct extremum *) ll_data (min_ll, struct extremum, ll); - for ( v = 0 ; v < n_dependent_vars; ++v ) - { + y_max = MAX (y_max, max->value); + y_min = MIN (y_min, min->value); + } - for ( fs = fctr->fs ; *fs ; ++fs ) - { - struct string str; - ds_init_empty (&str); - factor_to_string (fctr, *fs, dependent_vars[v], &str); - if ( cmd.a_plot[XMN_PLT_NPPLOT] ) - np_plot (& (*fs)->m[v], ds_cstr (&str)); + boxplot_draw_yscale (ch, y_max, y_min); - if ( cmd.a_plot[XMN_PLT_HISTOGRAM] ) - { - struct normal_curve normal; + ds_init_empty (&title); + factor_to_string (fctr, result, &title); - normal.N = (*fs)->m[v].n; - normal.mean = (*fs)->m[v].mean; - normal.stddev = (*fs)->m[v].stddev; +#if 0 + ds_put_format (&title, "%s = ", var_get_name (fctr->indep_var[0])); + var_append_value_name (fctr->indep_var[0], result->value[0], &title); +#endif - histogram_plot ((*fs)->m[v].histogram, - ds_cstr (&str) , &normal, 0); - } + chart_write_title (ch, ds_cstr (&title)); + ds_destroy (&title); - ds_destroy (&str); + for (v = 0; v < n_dep_var; ++v) + { + struct string str; + const double box_centre = (v * 2 + 1) * box_width + ch->data_left; - } /* for ( fs .... */ + ds_init_empty (&str); + ds_init_cstr (&str, var_get_name (dependent_var[v])); - } /* for ( v = 0 ..... */ + boxplot_draw_boxplot (ch, + box_centre, box_width, + (const struct box_whisker *) result->metrics[v].box_whisker, + ds_cstr (&str)); + ds_destroy (&str); } - fctr = fctr->next; + chart_submit (ch); } - } -/* Create a hash table of percentiles and their values from the list of - percentiles */ -static struct hsh_table * -list_to_ptile_hash (const subc_list_double *l) +/* Show all the appropriate tables */ +static void +output_examine (void) { - int i; + struct ll *ll; + + show_summary (dependent_vars, n_dependent_vars, &level0_factor); - struct hsh_table *h ; + if ( cmd.a_statistics[XMN_ST_EXTREME] ) + show_extremes (dependent_vars, n_dependent_vars, &level0_factor); - h = hsh_create (subc_list_double_count (l), - (hsh_compare_func *) ptile_compare, - (hsh_hash_func *) ptile_hash, - (hsh_free_func *) free, - 0); + if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES] ) + show_descriptives (dependent_vars, n_dependent_vars, &level0_factor); + if ( cmd.sbc_percentiles) + show_percentiles (dependent_vars, n_dependent_vars, &level0_factor); - for ( i = 0 ; i < subc_list_double_count (l) ; ++i ) + if ( cmd.sbc_plot) { - struct percentile *p = xmalloc (sizeof *p); - - p->p = subc_list_double_at (l,i); - p->v = SYSMIS; + if (cmd.a_plot[XMN_PLT_BOXPLOT]) + show_boxplot_groups (dependent_vars, n_dependent_vars, &level0_factor); - hsh_insert (h, p); + if (cmd.a_plot[XMN_PLT_HISTOGRAM]) + show_histogram (dependent_vars, n_dependent_vars, &level0_factor); + if (cmd.a_plot[XMN_PLT_NPPLOT]) + show_npplot (dependent_vars, n_dependent_vars, &level0_factor); } - return h; + for (ll = ll_head (&factor_list); + ll != ll_null (&factor_list); ll = ll_next (ll)) + { + struct xfactor *factor = ll_data (ll, struct xfactor, ll); + show_summary (dependent_vars, n_dependent_vars, factor); + + if ( cmd.a_statistics[XMN_ST_EXTREME] ) + show_extremes (dependent_vars, n_dependent_vars, factor); + + if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES] ) + show_descriptives (dependent_vars, n_dependent_vars, factor); + + if ( cmd.sbc_percentiles) + show_percentiles (dependent_vars, n_dependent_vars, factor); + + if (cmd.a_plot[XMN_PLT_BOXPLOT] && + cmd.cmp == XMN_GROUPS) + show_boxplot_groups (dependent_vars, n_dependent_vars, factor); + + + if (cmd.a_plot[XMN_PLT_BOXPLOT] && + cmd.cmp == XMN_VARIABLES) + show_boxplot_variables (dependent_vars, n_dependent_vars, + factor); + + if (cmd.a_plot[XMN_PLT_HISTOGRAM]) + show_histogram (dependent_vars, n_dependent_vars, factor); + if (cmd.a_plot[XMN_PLT_NPPLOT]) + show_npplot (dependent_vars, n_dependent_vars, factor); + } } /* Parse the PERCENTILES subcommand */ static int xmn_custom_percentiles (struct lexer *lexer, struct dataset *ds UNUSED, - struct cmd_examine *p UNUSED, void *aux UNUSED) + struct cmd_examine *p UNUSED, void *aux UNUSED) { - sbc_percentile = 1; - lex_match (lexer, '='); lex_match (lexer, '('); @@ -494,11 +739,12 @@ xmn_custom_percentiles (struct lexer *lexer, struct dataset *ds UNUSED, /* TOTAL and NOTOTAL are simple, mutually exclusive flags */ static int -xmn_custom_total (struct lexer *lexer UNUSED, struct dataset *ds UNUSED, struct cmd_examine *p, void *aux UNUSED) +xmn_custom_total (struct lexer *lexer UNUSED, struct dataset *ds UNUSED, + struct cmd_examine *p, void *aux UNUSED) { if ( p->sbc_nototal ) { - msg (SE, _ ("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL"); + msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL"); return 0; } @@ -511,7 +757,7 @@ xmn_custom_nototal (struct lexer *lexer UNUSED, struct dataset *ds UNUSED, { if ( p->sbc_total ) { - msg (SE, _ ("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL"); + msg (SE, _("%s and %s are mutually exclusive"), "TOTAL", "NOTOTAL"); return 0; } @@ -523,19 +769,21 @@ xmn_custom_nototal (struct lexer *lexer UNUSED, struct dataset *ds UNUSED, /* Parser for the variables sub command Returns 1 on success */ static int -xmn_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_examine *cmd, void *aux UNUSED) +xmn_custom_variables (struct lexer *lexer, struct dataset *ds, + struct cmd_examine *cmd, + void *aux UNUSED) { const struct dictionary *dict = dataset_dict (ds); lex_match (lexer, '='); if ( (lex_token (lexer) != T_ID || dict_lookup_var (dict, lex_tokid (lexer)) == NULL) - && lex_token (lexer) != T_ALL) + && lex_token (lexer) != T_ALL) { return 2; } if (!parse_variables_const (lexer, dict, &dependent_vars, &n_dependent_vars, - PV_NO_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH) ) + PV_NO_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH) ) { free (dependent_vars); return 0; @@ -543,16 +791,15 @@ xmn_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_examin assert (n_dependent_vars); - totals = xnmalloc (n_dependent_vars, sizeof *totals); if ( lex_match (lexer, T_BY)) { int success ; success = examine_parse_independent_vars (lexer, dict, cmd); - if ( success != 1 ) { - free (dependent_vars); - free (totals) ; - } + if ( success != 1 ) + { + free (dependent_vars); + } return success; } @@ -563,47 +810,44 @@ xmn_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_examin /* Parse the clause specifying the factors */ static int -examine_parse_independent_vars (struct lexer *lexer, const struct dictionary *dict, struct cmd_examine *cmd) +examine_parse_independent_vars (struct lexer *lexer, + const struct dictionary *dict, + struct cmd_examine *cmd) { int success; - struct factor *sf = xmalloc (sizeof *sf); + struct xfactor *sf = xmalloc (sizeof *sf); - if ( (lex_token (lexer) != T_ID || dict_lookup_var (dict, lex_tokid (lexer)) == NULL) - && lex_token (lexer) != T_ALL) + ll_init (&sf->result_list); + + if ( (lex_token (lexer) != T_ID || + dict_lookup_var (dict, lex_tokid (lexer)) == NULL) + && lex_token (lexer) != T_ALL) { free ( sf ) ; return 2; } - sf->indep_var[0] = parse_variable (lexer, dict); - sf->indep_var[1] = 0; + sf->indep_var[1] = NULL; if ( lex_token (lexer) == T_BY ) { - lex_match (lexer, T_BY); - if ( (lex_token (lexer) != T_ID || dict_lookup_var (dict, lex_tokid (lexer)) == NULL) - && lex_token (lexer) != T_ALL) + if ( (lex_token (lexer) != T_ID || + dict_lookup_var (dict, lex_tokid (lexer)) == NULL) + && lex_token (lexer) != T_ALL) { - free ( sf ) ; + free (sf); return 2; } sf->indep_var[1] = parse_variable (lexer, dict); + ll_push_tail (&factor_list, &sf->ll); } - - - sf->fstats = hsh_create (4, - (hsh_compare_func *) factor_statistics_compare, - (hsh_hash_func *) factor_statistics_hash, - (hsh_free_func *) factor_statistics_free, - 0); - - sf->next = factors; - factors = sf; + else + ll_push_tail (&factor_list, &sf->ll); lex_match (lexer, ','); @@ -618,335 +862,378 @@ examine_parse_independent_vars (struct lexer *lexer, const struct dictionary *di return success; } +static void +examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, + const struct dictionary *dict, struct xfactor *factor) +{ + struct ccase c; + const struct variable *wv = dict_get_weight (dict); + int v; + int n_extrema = 1; + struct factor_result *result = xzalloc (sizeof (*result)); + + result->metrics = xcalloc (n_dependent_vars, sizeof (*result->metrics)); + if ( cmd->a_statistics[XMN_ST_EXTREME] ) + n_extrema = cmd->st_n; -void populate_percentiles (struct tab_table *tbl, int col, int row, - const struct metrics *m); + if (casereader_peek (reader, 0, &c)) + { + if ( level > 0) + { + result->value[0] = + value_dup (case_data (&c, factor->indep_var[0]), + var_get_width (factor->indep_var[0])); + + if ( level > 1) + result->value[1] = + value_dup (case_data (&c, factor->indep_var[1]), + var_get_width (factor->indep_var[1])); + } + case_destroy (&c); + } + + for (v = 0; v < n_dependent_vars; ++v) + { + struct casewriter *writer; + struct casereader *input = casereader_clone (reader); + + result->metrics[v].moments = moments1_create (MOMENT_KURTOSIS); + result->metrics[v].minima = extrema_create (n_extrema, EXTREME_MINIMA); + result->metrics[v].maxima = extrema_create (n_extrema, EXTREME_MAXIMA); + result->metrics[v].cmin = DBL_MAX; + + if (cmd->a_statistics[XMN_ST_DESCRIPTIVES] || + cmd->a_plot[XMN_PLT_BOXPLOT] || + cmd->a_plot[XMN_PLT_NPPLOT] || + cmd->sbc_percentiles) + { + /* In this case, we need to sort the data, so we create a sorting + casewriter */ + struct case_ordering *up_ordering = case_ordering_create (); -void populate_descriptives (struct tab_table *t, int col, int row, - const struct metrics *fs); + case_ordering_add_var (up_ordering, dependent_vars[v], SRT_ASCEND); + writer = sort_create_writer (up_ordering, + casereader_get_value_cnt (reader)); + } + else + { + /* but in this case, sorting is unnecessary, so an ordinary + casewriter is sufficient */ + writer = + autopaging_writer_create (casereader_get_value_cnt (reader)); + } -void populate_extremes (struct tab_table *t, int col, int row, int n, - const struct metrics *m); -void populate_summary (struct tab_table *t, int col, int row, - const struct metrics *m); + /* Sort or just iterate, whilst calculating moments etc */ + while (casereader_read (input, &c)) + { + const casenumber loc = + case_data_idx (&c, casereader_get_value_cnt (reader) - 1)->f; + const double weight = wv ? case_data (&c, wv)->f : 1.0; + if (weight != SYSMIS) + minimize (&result->metrics[v].cmin, weight); + moments1_add (result->metrics[v].moments, + case_data (&c, dependent_vars[v])->f, + weight); -/* Perform calculations for the sub factors */ -void -factor_calc (const struct ccase *c, int case_no, double weight, - bool case_missing) -{ - size_t v; - struct factor *fctr = factors; + result->metrics[v].n += weight; - while ( fctr) - { - struct factor_statistics **foo ; - union value *indep_vals[2] ; + extrema_add (result->metrics[v].maxima, + case_data (&c, dependent_vars[v])->f, + weight, + loc); - indep_vals[0] = value_dup ( - case_data (c, fctr->indep_var[0]), - var_get_width (fctr->indep_var[0]) - ); + extrema_add (result->metrics[v].minima, + case_data (&c, dependent_vars[v])->f, + weight, + loc); - if ( fctr->indep_var[1] ) - indep_vals[1] = value_dup ( - case_data (c, fctr->indep_var[1]), - var_get_width (fctr->indep_var[1]) - ); - else - { - const union value sm = {SYSMIS}; - indep_vals[1] = value_dup (&sm, 0); + casewriter_write (writer, &c); } + casereader_destroy (input); + result->metrics[v].up_reader = casewriter_make_reader (writer); + } - assert (fctr->fstats); + /* If percentiles or descriptives have been requested, then a + second pass through the data (which has now been sorted) + is necessary */ + if ( cmd->a_statistics[XMN_ST_DESCRIPTIVES] || + cmd->a_plot[XMN_PLT_BOXPLOT] || + cmd->a_plot[XMN_PLT_NPPLOT] || + cmd->sbc_percentiles) + { + for (v = 0; v < n_dependent_vars; ++v) + { + int i; + int n_os; + struct order_stats **os ; + struct factor_metrics *metric = &result->metrics[v]; - foo = ( struct factor_statistics ** ) - hsh_probe (fctr->fstats, (void *) &indep_vals); + metric->n_ptiles = percentile_list.n_data; - if ( !*foo ) - { + metric->ptl = xcalloc (metric->n_ptiles, + sizeof (struct percentile *)); - *foo = create_factor_statistics (n_dependent_vars, - indep_vals[0], - indep_vals[1]); + metric->quartiles = xcalloc (3, sizeof (*metric->quartiles)); - for ( v = 0 ; v < n_dependent_vars ; ++v ) + for (i = 0 ; i < metric->n_ptiles; ++i) { - metrics_precalc ( & (*foo)->m[v] ); + metric->ptl[i] = (struct percentile *) + percentile_create (percentile_list.data[i] / 100.0, metric->n); + + if ( percentile_list.data[i] == 25) + metric->quartiles[0] = metric->ptl[i]; + else if ( percentile_list.data[i] == 50) + metric->quartiles[1] = metric->ptl[i]; + else if ( percentile_list.data[i] == 75) + metric->quartiles[2] = metric->ptl[i]; } - } - else - { - free (indep_vals[0]); - free (indep_vals[1]); - } + metric->tukey_hinges = tukey_hinges_create (metric->n, metric->cmin); + metric->trimmed_mean = trimmed_mean_create (metric->n, 0.05); - for ( v = 0 ; v < n_dependent_vars ; ++v ) - { - const struct variable *var = dependent_vars[v]; - union value *val = value_dup ( - case_data (c, var), - var_get_width (var) - ); + n_os = metric->n_ptiles + 2; - if (case_missing || var_is_value_missing (var, val, exclude_values)) + if ( cmd->a_plot[XMN_PLT_NPPLOT] ) { - free (val); - val = NULL; + metric->np = np_create (metric->moments); + n_os ++; } - metrics_calc ( & (*foo)->m[v], val, weight, case_no); + os = xcalloc (sizeof (struct order_stats *), n_os); - free (val); - } + for (i = 0 ; i < metric->n_ptiles ; ++i ) + { + os[i] = (struct order_stats *) metric->ptl[i]; + } - fctr = fctr->next; + os[i] = (struct order_stats *) metric->tukey_hinges; + os[i+1] = (struct order_stats *) metric->trimmed_mean; + + if (cmd->a_plot[XMN_PLT_NPPLOT]) + os[i+2] = metric->np; + + order_stats_accumulate (os, n_os, + casereader_clone (metric->up_reader), + wv, dependent_vars[v], MV_ANY); + free (os); + } } -} -static void -run_examine (struct cmd_examine *cmd, struct casereader *input, - struct dataset *ds) -{ - struct dictionary *dict = dataset_dict (ds); - casenumber case_no; - struct ccase c; - int v; - bool ok; - - struct factor *fctr; - - if (!casereader_peek (input, 0, &c)) - { - casereader_destroy (input); - return; - } - output_split_file_values (ds, &c); - case_destroy (&c); - - input = casereader_create_filter_weight (input, dict, NULL, NULL); - input = casereader_create_counter (input, &case_no, 0); - - /* Make sure we haven't got rubbish left over from a - previous split. */ - fctr = factors; - while (fctr) + /* FIXME: Do this in the above loop */ + if ( cmd->a_plot[XMN_PLT_HISTOGRAM] ) { - struct factor *next = fctr->next; + struct ccase c; + struct casereader *input = casereader_clone (reader); - hsh_clear (fctr->fstats); + for (v = 0; v < n_dependent_vars; ++v) + { + const struct extremum *max, *min; + struct factor_metrics *metric = &result->metrics[v]; - fctr->fs = 0; + const struct ll_list *max_list = + extrema_list (result->metrics[v].maxima); - fctr = next; - } + const struct ll_list *min_list = + extrema_list (result->metrics[v].minima); - for ( v = 0 ; v < n_dependent_vars ; ++v ) - metrics_precalc (&totals[v]); + if ( ll_is_empty (max_list)) + { + msg (MW, _("Not creating plot because data set is empty.")); + continue; + } - for (; casereader_read (input, &c); case_destroy (&c)) - { - bool case_missing = false; - const double weight = dict_get_case_weight (dict, &c, NULL); + assert (! ll_is_empty (min_list)); - if ( cmd->miss == XMN_LISTWISE ) - { - for ( v = 0 ; v < n_dependent_vars ; ++v ) - { - const struct variable *var = dependent_vars[v]; - union value *val = value_dup ( - case_data (&c, var), - var_get_width (var) - ); + max = (const struct extremum *) + ll_data (ll_head(max_list), struct extremum, ll); - if ( var_is_value_missing (var, val, exclude_values)) - case_missing = true; + min = (const struct extremum *) + ll_data (ll_head (min_list), struct extremum, ll); - free (val); - } + metric->histogram = histogram_create (10, min->value, max->value); } - for ( v = 0 ; v < n_dependent_vars ; ++v ) + while (casereader_read (input, &c)) { - const struct variable *var = dependent_vars[v]; - union value *val = value_dup ( - case_data (&c, var), - var_get_width (var) - ); - - if ( var_is_value_missing (var, val, exclude_values) - || case_missing ) + const double weight = wv ? case_data (&c, wv)->f : 1.0; + + for (v = 0; v < n_dependent_vars; ++v) { - free (val) ; - val = NULL; + struct factor_metrics *metric = &result->metrics[v]; + if ( metric->histogram) + histogram_add ((struct histogram *) metric->histogram, + case_data (&c, dependent_vars[v])->f, weight); } - - metrics_calc (&totals[v], val, weight, case_no); - - free (val); + case_destroy (&c); } - - factor_calc (&c, case_no, weight, case_missing); + casereader_destroy (input); } - ok = casereader_destroy (input); - for ( v = 0 ; v < n_dependent_vars ; ++v) + /* In this case, a third iteration is required */ + if (cmd->a_plot[XMN_PLT_BOXPLOT]) { - fctr = factors; - while ( fctr ) + for (v = 0; v < n_dependent_vars; ++v) { - struct hsh_iterator hi; - struct factor_statistics *fs; + struct factor_metrics *metric = &result->metrics[v]; + + metric->box_whisker = + box_whisker_create ((struct tukey_hinges *) metric->tukey_hinges, + cmd->v_id, + casereader_get_value_cnt (metric->up_reader) + - 1); + + order_stats_accumulate ((struct order_stats **) &metric->box_whisker, + 1, + casereader_clone (metric->up_reader), + wv, dependent_vars[v], MV_ANY); + } + } - for ( fs = hsh_first (fctr->fstats, &hi); - fs != 0 ; - fs = hsh_next (fctr->fstats, &hi)) - { + ll_push_tail (&factor->result_list, &result->ll); + casereader_destroy (reader); +} - fs->m[v].ptile_hash = list_to_ptile_hash (&percentile_list); - fs->m[v].ptile_alg = percentile_algorithm; - metrics_postcalc (&fs->m[v]); - } - fctr = fctr->next; - } +static void +run_examine (struct cmd_examine *cmd, struct casereader *input, + struct dataset *ds) +{ + struct ll *ll; + const struct dictionary *dict = dataset_dict (ds); + struct ccase c; + struct casereader *level0 = casereader_clone (input); - totals[v].ptile_hash = list_to_ptile_hash (&percentile_list); - totals[v].ptile_alg = percentile_algorithm; - metrics_postcalc (&totals[v]); + if (!casereader_peek (input, 0, &c)) + { + casereader_destroy (input); + return; } + output_split_file_values (ds, &c); + case_destroy (&c); + + ll_init (&level0_factor.result_list); - /* Make sure that the combination of factors are complete */ + examine_group (cmd, level0, 0, dict, &level0_factor); - fctr = factors; - while ( fctr ) + for (ll = ll_head (&factor_list); + ll != ll_null (&factor_list); + ll = ll_next (ll)) { - struct hsh_iterator hi; - struct hsh_iterator hi0; - struct hsh_iterator hi1; - struct factor_statistics *fs; + struct xfactor *factor = ll_data (ll, struct xfactor, ll); - struct hsh_table *idh0 = NULL; - struct hsh_table *idh1 = NULL; - union value **val0; - union value **val1; + struct casereader *group = NULL; + struct casereader *level1; + struct casegrouper *grouper1 = NULL; + struct case_ordering *ordering1 = case_ordering_create (); + case_ordering_add_var (ordering1, factor->indep_var[0], SRT_ASCEND); - idh0 = hsh_create (4, (hsh_compare_func *) compare_ptr_values, - (hsh_hash_func *) hash_ptr_value, - 0,0); + level1 = casereader_clone (input); - idh1 = hsh_create (4, (hsh_compare_func *) compare_ptr_values, - (hsh_hash_func *) hash_ptr_value, - 0,0); + level1 = sort_execute (level1, + case_ordering_clone (ordering1)); + grouper1 = casegrouper_create_case_ordering (level1, ordering1); + case_ordering_destroy (ordering1); - - for ( fs = hsh_first (fctr->fstats, &hi); - fs != 0 ; - fs = hsh_next (fctr->fstats, &hi)) + while (casegrouper_get_next_group (grouper1, &group)) { - hsh_insert (idh0, &fs->id[0]); - hsh_insert (idh1, &fs->id[1]); - } + struct casereader *group_copy = casereader_clone (group); - /* Ensure that the factors combination is complete */ - for ( val0 = hsh_first (idh0, &hi0); - val0 != 0 ; - val0 = hsh_next (idh0, &hi0)) - { - for ( val1 = hsh_first (idh1, &hi1); - val1 != 0 ; - val1 = hsh_next (idh1, &hi1)) + if ( !factor->indep_var[1]) + examine_group (cmd, group_copy, 1, dict, factor); + else { - struct factor_statistics **ffs; - union value *key[2]; - key[0] = *val0; - key[1] = *val1; - - ffs = (struct factor_statistics **) - hsh_probe (fctr->fstats, &key ); - - if ( !*ffs ) { - size_t i; - (*ffs) = create_factor_statistics (n_dependent_vars, - key[0], key[1]); - for ( i = 0 ; i < n_dependent_vars ; ++i ) - metrics_precalc ( & (*ffs)->m[i]); - } - } - } + int n_groups = 0; + struct casereader *group2 = NULL; + struct casegrouper *grouper2 = NULL; + struct case_ordering *ordering2 = case_ordering_create (); + + case_ordering_add_var (ordering2, + factor->indep_var[1], SRT_ASCEND); + group_copy = sort_execute (group_copy, + case_ordering_clone (ordering2)); + grouper2 = + casegrouper_create_case_ordering (group_copy, ordering2); - hsh_destroy (idh0); - hsh_destroy (idh1); + case_ordering_destroy (ordering2); - fctr->fs = (struct factor_statistics **) hsh_sort_copy (fctr->fstats); + while (casegrouper_get_next_group (grouper2, &group2)) + { + examine_group (cmd, group2, 2, dict, factor); + n_groups++; + } + casegrouper_destroy (grouper2); + } - fctr = fctr->next; + casereader_destroy (group); + } + casegrouper_destroy (grouper1); } - if (ok) - output_examine (); + casereader_destroy (input); + output_examine (); + + factor_destroy (&level0_factor); + + { + struct ll *ll; + for (ll = ll_head (&factor_list); + ll != ll_null (&factor_list); + ll = ll_next (ll)) + { + struct xfactor *f = ll_data (ll, struct xfactor, ll); + factor_destroy (f); + } + } - if ( totals ) - { - size_t i; - for ( i = 0 ; i < n_dependent_vars ; ++i ) - { - metrics_destroy (&totals[i]); - } - } } static void show_summary (const struct variable **dependent_var, int n_dep_var, - const struct factor *fctr) + const struct xfactor *fctr) { static const char *subtitle[]= { - N_ ("Valid"), - N_ ("Missing"), - N_ ("Total") + N_("Valid"), + N_("Missing"), + N_("Total") }; - int i; - int heading_columns ; + int v, j; + int heading_columns = 1; int n_cols; const int heading_rows = 3; struct tab_table *tbl; int n_rows ; - int n_factors = 1; + n_rows = n_dep_var; + + assert (fctr); - if ( fctr ) + if ( fctr->indep_var[0] ) { heading_columns = 2; - n_factors = hsh_count (fctr->fstats); - n_rows = n_dep_var * n_factors ; if ( fctr->indep_var[1] ) - heading_columns = 3; - } - else - { - heading_columns = 1; - n_rows = n_dep_var; + { + heading_columns = 3; + } } + n_rows *= ll_count (&fctr->result_list); n_rows += heading_rows; n_cols = heading_columns + 6; - tbl = tab_create (n_cols,n_rows,0); + tbl = tab_create (n_cols, n_rows, 0); tab_headers (tbl, heading_columns, 0, heading_rows, 0); tab_dim (tbl, tab_natural_dimensions); @@ -973,12 +1260,12 @@ show_summary (const struct variable **dependent_var, int n_dep_var, tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1); - tab_title (tbl, _ ("Case Processing Summary")); + tab_title (tbl, _("Case Processing Summary")); tab_joint_text (tbl, heading_columns, 0, - n_cols -1, 0, - TAB_CENTER | TAT_TITLE, - _ ("Cases")); + n_cols -1, 0, + TAB_CENTER | TAT_TITLE, + _("Cases")); /* Remove lines ... */ tab_box (tbl, @@ -987,28 +1274,28 @@ show_summary (const struct variable **dependent_var, int n_dep_var, heading_columns, 0, n_cols - 1, 0); - for ( i = 0 ; i < 3 ; ++i ) + for (j = 0 ; j < 3 ; ++j) { - tab_text (tbl, heading_columns + i * 2 , 2, TAB_CENTER | TAT_TITLE, - _ ("N")); + tab_text (tbl, heading_columns + j * 2 , 2, TAB_CENTER | TAT_TITLE, + _("N")); - tab_text (tbl, heading_columns + i * 2 + 1, 2, TAB_CENTER | TAT_TITLE, - _ ("Percent")); + tab_text (tbl, heading_columns + j * 2 + 1, 2, TAB_CENTER | TAT_TITLE, + _("Percent")); - tab_joint_text (tbl, heading_columns + i*2 , 1, - heading_columns + i * 2 + 1, 1, - TAB_CENTER | TAT_TITLE, - subtitle[i]); + tab_joint_text (tbl, heading_columns + j * 2 , 1, + heading_columns + j * 2 + 1, 1, + TAB_CENTER | TAT_TITLE, + subtitle[j]); tab_box (tbl, -1, -1, TAL_0, TAL_0, - heading_columns + i * 2, 1, - heading_columns + i * 2 + 1, 1); + heading_columns + j * 2, 1, + heading_columns + j * 2 + 1, 1); } /* Titles for the independent variables */ - if ( fctr ) + if ( fctr->indep_var[0] ) { tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE, var_to_string (fctr->indep_var[0])); @@ -1020,1275 +1307,883 @@ show_summary (const struct variable **dependent_var, int n_dep_var, } } - - for ( i = 0 ; i < n_dep_var ; ++i ) + for (v = 0 ; v < n_dep_var ; ++v) { - int n_factors = 1; - if ( fctr ) - n_factors = hsh_count (fctr->fstats); + int j = 0; + struct ll *ll; + union value *last_value = NULL; - if ( i > 0 ) - tab_hline (tbl, TAL_1, 0, n_cols -1 , i * n_factors + heading_rows); + if ( v > 0 ) + tab_hline (tbl, TAL_1, 0, n_cols -1 , + v * ll_count (&fctr->result_list) + + heading_rows); tab_text (tbl, - 0, i * n_factors + heading_rows, + 0, + v * ll_count (&fctr->result_list) + heading_rows, TAB_LEFT | TAT_TITLE, - var_to_string (dependent_var[i]) + var_to_string (dependent_var[v]) ); - if ( !fctr ) - populate_summary (tbl, heading_columns, - (i * n_factors) + heading_rows, - &totals[i]); - else + + for (ll = ll_head (&fctr->result_list); + ll != ll_null (&fctr->result_list); ll = ll_next (ll)) { - struct factor_statistics **fs = fctr->fs; - int count = 0 ; - const union value *prev = NULL; + double n; + const struct factor_result *result = + ll_data (ll, struct factor_result, ll); - while (*fs) + if ( fctr->indep_var[0] ) { - if ( !prev || - 0 != compare_values (prev, (*fs)->id[0], - var_get_width (fctr->indep_var[0]))) + + if ( last_value == NULL || + compare_values (last_value, result->value[0], + var_get_width(fctr->indep_var[0]))) { - struct string vstr; - ds_init_empty (&vstr); - var_append_value_name (fctr->indep_var[0], - (*fs)->id[0], &vstr); - - tab_text (tbl, - 1, - (i * n_factors ) + count + - heading_rows, + struct string str; + + last_value = result->value[0]; + ds_init_empty (&str); + + var_append_value_name (fctr->indep_var[0], result->value[0], + &str); + + tab_text (tbl, 1, + heading_rows + j + + v * ll_count (&fctr->result_list), TAB_LEFT | TAT_TITLE, - ds_cstr (&vstr) - ); + ds_cstr (&str)); - ds_destroy (&vstr); + ds_destroy (&str); - if (fctr->indep_var[1] && count > 0 ) + if ( fctr->indep_var[1] && j > 0) tab_hline (tbl, TAL_1, 1, n_cols - 1, - (i * n_factors ) + count + heading_rows); + heading_rows + j + + v * ll_count (&fctr->result_list)); } - prev = (*fs)->id[0]; - if ( fctr->indep_var[1]) { - struct string vstr; - ds_init_empty (&vstr); + struct string str; + + ds_init_empty (&str); + var_append_value_name (fctr->indep_var[1], - (*fs)->id[1], &vstr); - tab_text (tbl, - 2, - (i * n_factors ) + count + - heading_rows, + result->value[1], &str); + + tab_text (tbl, 2, + heading_rows + j + + v * ll_count (&fctr->result_list), TAB_LEFT | TAT_TITLE, - ds_cstr (&vstr) - ); - ds_destroy (&vstr); + ds_cstr (&str)); + + ds_destroy (&str); } + } - populate_summary (tbl, heading_columns, - (i * n_factors) + count - + heading_rows, - & (*fs)->m[i]); - count++ ; - fs++; - } + moments1_calculate (result->metrics[v].moments, + &n, &result->metrics[v].mean, + &result->metrics[v].variance, + &result->metrics[v].skewness, + &result->metrics[v].kurtosis); + + result->metrics[v].se_mean = sqrt (result->metrics[v].variance / n) ; + + /* Total Valid */ + tab_float (tbl, heading_columns, + heading_rows + j + v * ll_count (&fctr->result_list), + TAB_LEFT, + n, 8, 0); + + tab_text (tbl, heading_columns + 1, + heading_rows + j + v * ll_count (&fctr->result_list), + TAB_RIGHT | TAT_PRINTF, + "%g%%", n * 100.0 / result->metrics[v].n); + + /* Total Missing */ + tab_float (tbl, heading_columns + 2, + heading_rows + j + v * ll_count (&fctr->result_list), + TAB_LEFT, + result->metrics[v].n - n, + 8, 0); + + tab_text (tbl, heading_columns + 3, + heading_rows + j + v * ll_count (&fctr->result_list), + TAB_RIGHT | TAT_PRINTF, + "%g%%", + (result->metrics[v].n - n) * 100.0 / result->metrics[v].n + ); + + /* Total Valid + Missing */ + tab_float (tbl, heading_columns + 4, + heading_rows + j + v * ll_count (&fctr->result_list), + TAB_LEFT, + result->metrics[v].n, + 8, 0); + + tab_text (tbl, heading_columns + 5, + heading_rows + j + v * ll_count (&fctr->result_list), + TAB_RIGHT | TAT_PRINTF, + "%g%%", + (result->metrics[v].n) * 100.0 / result->metrics[v].n + ); + + ++j; } } - tab_submit (tbl); -} - - -void -populate_summary (struct tab_table *t, int col, int row, - const struct metrics *m) - -{ - const double total = m->n + m->n_missing ; - - tab_float (t, col + 0, row + 0, TAB_RIGHT, m->n, 8, 0); - tab_float (t, col + 2, row + 0, TAB_RIGHT, m->n_missing, 8, 0); - tab_float (t, col + 4, row + 0, TAB_RIGHT, total, 8, 0); - - - if ( total > 0 ) { - tab_text (t, col + 1, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%", - 100.0 * m->n / total ); - tab_text (t, col + 3, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%", - 100.0 * m->n_missing / total ); - - /* This seems a bit pointless !!! */ - tab_text (t, col + 5, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%", - 100.0 * total / total ); - } + tab_submit (tbl); } - +#define DESCRIPTIVE_ROWS 13 static void -show_extremes (const struct variable **dependent_var, int n_dep_var, - const struct factor *fctr, int n_extremities) +show_descriptives (const struct variable **dependent_var, + int n_dep_var, + const struct xfactor *fctr) { - int i; - int heading_columns ; + int v; + int heading_columns = 3; int n_cols; const int heading_rows = 1; struct tab_table *tbl; - int n_factors = 1; int n_rows ; + n_rows = n_dep_var; - if ( fctr ) - { - heading_columns = 2; - n_factors = hsh_count (fctr->fstats); + assert (fctr); - n_rows = n_dep_var * 2 * n_extremities * n_factors; + if ( fctr->indep_var[0] ) + { + heading_columns = 4; if ( fctr->indep_var[1] ) - heading_columns = 3; - } - else - { - heading_columns = 1; - n_rows = n_dep_var * 2 * n_extremities; + { + heading_columns = 5; + } } + n_rows *= ll_count (&fctr->result_list) * DESCRIPTIVE_ROWS; n_rows += heading_rows; - heading_columns += 2; n_cols = heading_columns + 2; - tbl = tab_create (n_cols,n_rows,0); + tbl = tab_create (n_cols, n_rows, 0); tab_headers (tbl, heading_columns, 0, heading_rows, 0); tab_dim (tbl, tab_natural_dimensions); - /* Outline the box, No internal lines*/ + /* Outline the box */ tab_box (tbl, TAL_2, TAL_2, -1, -1, 0, 0, n_cols - 1, n_rows - 1); - tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows ); - tab_title (tbl, _ ("Extreme Values")); + tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows ); + tab_hline (tbl, TAL_2, 1, n_cols - 1, heading_rows ); - tab_vline (tbl, TAL_2, n_cols - 2, 0, n_rows -1); - tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows -1); + tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows - 1); - if ( fctr ) - { - tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE, - var_to_string (fctr->indep_var[0])); - if ( fctr->indep_var[1] ) - tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE, - var_to_string (fctr->indep_var[1])); - } + if ( fctr->indep_var[0]) + tab_text (tbl, 1, 0, TAT_TITLE, var_to_string (fctr->indep_var[0])); - tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _ ("Value")); - tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _ ("Case Number")); + if ( fctr->indep_var[1]) + tab_text (tbl, 2, 0, TAT_TITLE, var_to_string (fctr->indep_var[1])); - for ( i = 0 ; i < n_dep_var ; ++i ) + for (v = 0 ; v < n_dep_var ; ++v ) { + struct ll *ll; + int i = 0; - if ( i > 0 ) - tab_hline (tbl, TAL_1, 0, n_cols -1 , - i * 2 * n_extremities * n_factors + heading_rows); + const int row_var_start = + v * DESCRIPTIVE_ROWS * ll_count(&fctr->result_list); - tab_text (tbl, 0, - i * 2 * n_extremities * n_factors + heading_rows, + tab_text (tbl, + 0, + heading_rows + row_var_start, TAB_LEFT | TAT_TITLE, - var_to_string (dependent_var[i]) + var_to_string (dependent_var[v]) ); + for (ll = ll_head (&fctr->result_list); + ll != ll_null (&fctr->result_list); i++, ll = ll_next (ll)) + { + const struct factor_result *result = + ll_data (ll, struct factor_result, ll); - if ( !fctr ) - populate_extremes (tbl, heading_columns - 2, - i * 2 * n_extremities * n_factors + heading_rows, - n_extremities, &totals[i]); + const double t = + gsl_cdf_tdist_Qinv ((1 - cmd.n_cinterval[0] / 100.0) / 2.0, + result->metrics[v].n - 1); - else - { - struct factor_statistics **fs = fctr->fs; - int count = 0 ; - const union value *prev = NULL; + if ( i > 0 || v > 0 ) + { + const int left_col = (i == 0) ? 0 : 1; + tab_hline (tbl, TAL_1, left_col, n_cols - 1, + heading_rows + row_var_start + i * DESCRIPTIVE_ROWS); + } - while (*fs) + if ( fctr->indep_var[0]) { - const int row = heading_rows + ( 2 * n_extremities ) * - ( ( i * n_factors ) + count ); + struct string vstr; + ds_init_empty (&vstr); + var_append_value_name (fctr->indep_var[0], + result->value[0], &vstr); + + tab_text (tbl, 1, + heading_rows + row_var_start + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + ds_cstr (&vstr) + ); + ds_destroy (&vstr); + } - if ( !prev || 0 != compare_values (prev, (*fs)->id[0], - var_get_width (fctr->indep_var[0]))) - { - struct string vstr; - ds_init_empty (&vstr); - var_append_value_name (fctr->indep_var[0], - (*fs)->id[0], &vstr); - if ( count > 0 ) - tab_hline (tbl, TAL_1, 1, n_cols - 1, row); + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Mean")); + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS, + TAB_LEFT | TAT_PRINTF, + _("%g%% Confidence Interval for Mean"), + cmd.n_cinterval[0]); + + tab_text (tbl, n_cols - 3, + heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Lower Bound")); + + tab_text (tbl, n_cols - 3, + heading_rows + row_var_start + 2 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Upper Bound")); + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 3 + i * DESCRIPTIVE_ROWS, + TAB_LEFT | TAT_PRINTF, + _("5%% Trimmed Mean")); + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 4 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Median")); + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 5 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Variance")); + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 6 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Std. Deviation")); + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 7 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Minimum")); + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 8 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Maximum")); + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 9 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Range")); + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 10 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Interquartile Range")); + + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 11 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Skewness")); + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 12 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Kurtosis")); + + + /* Now the statistics ... */ + + tab_float (tbl, n_cols - 2, + heading_rows + row_var_start + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + result->metrics[v].mean, + 8, 2); + + tab_float (tbl, n_cols - 1, + heading_rows + row_var_start + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + result->metrics[v].se_mean, + 8, 3); + + + tab_float (tbl, n_cols - 2, + heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + result->metrics[v].mean - t * + result->metrics[v].se_mean, + 8, 3); + + tab_float (tbl, n_cols - 2, + heading_rows + row_var_start + 2 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + result->metrics[v].mean + t * + result->metrics[v].se_mean, + 8, 3); + + + tab_float (tbl, n_cols - 2, + heading_rows + row_var_start + 3 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + trimmed_mean_calculate ((struct trimmed_mean *) result->metrics[v].trimmed_mean), + 8, 2); + + + tab_float (tbl, n_cols - 2, + heading_rows + row_var_start + 4 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + percentile_calculate (result->metrics[v].quartiles[1], percentile_algorithm), + 8, 2); + + + tab_float (tbl, n_cols - 2, + heading_rows + row_var_start + 5 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + result->metrics[v].variance, + 8, 3); + + tab_float (tbl, n_cols - 2, + heading_rows + row_var_start + 6 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + sqrt (result->metrics[v].variance), + 8, 3); + + tab_float (tbl, n_cols - 2, + heading_rows + row_var_start + 10 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + percentile_calculate (result->metrics[v].quartiles[2], + percentile_algorithm) - + percentile_calculate (result->metrics[v].quartiles[0], + percentile_algorithm), + 8, 2); + + + tab_float (tbl, n_cols - 2, + heading_rows + row_var_start + 11 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + result->metrics[v].skewness, + 8, 3); + + tab_float (tbl, n_cols - 2, + heading_rows + row_var_start + 12 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + result->metrics[v].kurtosis, + 8, 3); + + tab_float (tbl, n_cols - 1, + heading_rows + row_var_start + 11 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + calc_seskew (result->metrics[v].n), + 8, 3); + + tab_float (tbl, n_cols - 1, + heading_rows + row_var_start + 12 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + calc_sekurt (result->metrics[v].n), + 8, 3); + + { + struct extremum *minimum, *maximum ; + + struct ll *max_ll = ll_head (extrema_list (result->metrics[v].maxima)); + struct ll *min_ll = ll_head (extrema_list (result->metrics[v].minima)); + + maximum = ll_data (max_ll, struct extremum, ll); + minimum = ll_data (min_ll, struct extremum, ll); + + tab_float (tbl, n_cols - 2, + heading_rows + row_var_start + 7 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + minimum->value, + 8, 3); + + tab_float (tbl, n_cols - 2, + heading_rows + row_var_start + 8 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + maximum->value, + 8, 3); + + tab_float (tbl, n_cols - 2, + heading_rows + row_var_start + 9 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + maximum->value - minimum->value, + 8, 3); + } + } + } - tab_text (tbl, - 1, row, - TAB_LEFT | TAT_TITLE, - ds_cstr (&vstr) - ); + tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1); - ds_destroy (&vstr); - } + tab_title (tbl, _("Descriptives")); - prev = (*fs)->id[0]; + tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, + _("Statistic")); - if (fctr->indep_var[1] && count > 0 ) - tab_hline (tbl, TAL_1, 2, n_cols - 1, row); + tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, + _("Std. Error")); - if ( fctr->indep_var[1]) - { - struct string vstr; - ds_init_empty (&vstr); - var_append_value_name (fctr->indep_var[1], (*fs)->id[1], &vstr); + tab_submit (tbl); +} - tab_text (tbl, 2, row, - TAB_LEFT | TAT_TITLE, - ds_cstr (&vstr) - ); - ds_destroy (&vstr); - } - populate_extremes (tbl, heading_columns - 2, - row, n_extremities, - & (*fs)->m[i]); +static void +show_extremes (const struct variable **dependent_var, + int n_dep_var, + const struct xfactor *fctr) +{ + int v; + int heading_columns = 3; + int n_cols; + const int heading_rows = 1; + struct tab_table *tbl; + + int n_rows ; + n_rows = n_dep_var; - count++ ; - fs++; - } + assert (fctr); + + if ( fctr->indep_var[0] ) + { + heading_columns = 4; + + if ( fctr->indep_var[1] ) + { + heading_columns = 5; } } - tab_submit (tbl); -} + n_rows *= ll_count (&fctr->result_list) * cmd.st_n * 2; + n_rows += heading_rows; + n_cols = heading_columns + 2; + tbl = tab_create (n_cols, n_rows, 0); + tab_headers (tbl, heading_columns, 0, heading_rows, 0); -/* Fill in the extremities table */ -void -populate_extremes (struct tab_table *t, - int col, int row, int n, const struct metrics *m) -{ - int extremity; - int idx=0; + tab_dim (tbl, tab_natural_dimensions); + /* Outline the box */ + tab_box (tbl, + TAL_2, TAL_2, + -1, -1, + 0, 0, + n_cols - 1, n_rows - 1); - tab_text (t, col, row, - TAB_RIGHT | TAT_TITLE , - _ ("Highest") - ); - tab_text (t, col, row + n , - TAB_RIGHT | TAT_TITLE , - _ ("Lowest") - ); + tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows ); + tab_hline (tbl, TAL_2, 1, n_cols - 1, heading_rows ); + tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows - 1); + if ( fctr->indep_var[0]) + tab_text (tbl, 1, 0, TAT_TITLE, var_to_string (fctr->indep_var[0])); - tab_hline (t, TAL_1, col, col + 3, row + n ); + if ( fctr->indep_var[1]) + tab_text (tbl, 2, 0, TAT_TITLE, var_to_string (fctr->indep_var[1])); - for (extremity = 0; extremity < n ; ++extremity ) + for (v = 0 ; v < n_dep_var ; ++v ) { - /* Highest */ - tab_float (t, col + 1, row + extremity, - TAB_RIGHT, - extremity + 1, 8, 0); + struct ll *ll; + int i = 0; + const int row_var_start = v * cmd.st_n * 2 * ll_count(&fctr->result_list); + tab_text (tbl, + 0, + heading_rows + row_var_start, + TAB_LEFT | TAT_TITLE, + var_to_string (dependent_var[v]) + ); - /* Lowest */ - tab_float (t, col + 1, row + extremity + n, - TAB_RIGHT, - extremity + 1, 8, 0); + for (ll = ll_head (&fctr->result_list); + ll != ll_null (&fctr->result_list); i++, ll = ll_next (ll)) + { + int e ; + struct ll *min_ll; + struct ll *max_ll; + const int row_result_start = i * cmd.st_n * 2; - } + const struct factor_result *result = + ll_data (ll, struct factor_result, ll); + if (i > 0 || v > 0) + tab_hline (tbl, TAL_1, 1, n_cols - 1, + heading_rows + row_var_start + row_result_start); - /* Lowest */ - for (idx = 0, extremity = 0; extremity < n && idx < m->n_data ; ++idx ) - { - int j; - const struct weighted_value *wv = m->wvp[idx]; - struct case_node *cn = wv->case_nos; + tab_hline (tbl, TAL_1, heading_columns - 2, n_cols - 1, + heading_rows + row_var_start + row_result_start + cmd.st_n); + for ( e = 1; e <= cmd.st_n; ++e ) + { + tab_text (tbl, n_cols - 3, + heading_rows + row_var_start + row_result_start + e - 1, + TAB_RIGHT | TAT_PRINTF, + _("%d"), e); + + tab_text (tbl, n_cols - 3, + heading_rows + row_var_start + row_result_start + cmd.st_n + e - 1, + TAB_RIGHT | TAT_PRINTF, + _("%d"), e); + } - for (j = 0 ; j < wv->w ; ++j ) - { - if ( extremity + j >= n ) - break ; - tab_float (t, col + 3, row + extremity + j + n, - TAB_RIGHT, - wv->v.f, 8, 2); + min_ll = ll_head (extrema_list (result->metrics[v].minima)); + for (e = 0; e < cmd.st_n;) + { + struct extremum *minimum = ll_data (min_ll, struct extremum, ll); + double weight = minimum->weight; - tab_float (t, col + 2, row + extremity + j + n, - TAB_RIGHT, - cn->num, 8, 0); + while (weight-- > 0 && e < cmd.st_n) + { + tab_float (tbl, n_cols - 1, + heading_rows + row_var_start + row_result_start + cmd.st_n + e, + TAB_RIGHT, + minimum->value, + 8, 2); + + + tab_float (tbl, n_cols - 2, + heading_rows + row_var_start + row_result_start + cmd.st_n + e, + TAB_RIGHT, + minimum->location, + 8, 0); + ++e; + } - if ( cn->next ) - cn = cn->next; + min_ll = ll_next (min_ll); + } - } - extremity += wv->w ; - } + max_ll = ll_head (extrema_list (result->metrics[v].maxima)); + for (e = 0; e < cmd.st_n;) + { + struct extremum *maximum = ll_data (max_ll, struct extremum, ll); + double weight = maximum->weight; + while (weight-- > 0 && e < cmd.st_n) + { + tab_float (tbl, n_cols - 1, + heading_rows + row_var_start + row_result_start + e, + TAB_RIGHT, + maximum->value, + 8, 2); + + + tab_float (tbl, n_cols - 2, + heading_rows + row_var_start + row_result_start + e, + TAB_RIGHT, + maximum->location, + 8, 0); + ++e; + } - /* Highest */ - for (idx = m->n_data - 1, extremity = 0; extremity < n && idx >= 0; --idx ) - { - int j; - const struct weighted_value *wv = m->wvp[idx]; - struct case_node *cn = wv->case_nos; + max_ll = ll_next (max_ll); + } - for (j = 0 ; j < wv->w ; ++j ) - { - if ( extremity + j >= n ) - break ; - tab_float (t, col + 3, row + extremity + j, + if ( fctr->indep_var[0]) + { + struct string vstr; + ds_init_empty (&vstr); + var_append_value_name (fctr->indep_var[0], + result->value[0], &vstr); + + tab_text (tbl, 1, + heading_rows + row_var_start + row_result_start, + TAB_LEFT, + ds_cstr (&vstr) + ); + + ds_destroy (&vstr); + } + + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + row_result_start, TAB_RIGHT, - wv->v.f, 8, 2); + _("Highest")); - tab_float (t, col + 2, row + extremity + j, + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + row_result_start + cmd.st_n, TAB_RIGHT, - cn->num, 8, 0); + _("Lowest")); + } + } - if ( cn->next ) - cn = cn->next; + tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1); - } - extremity += wv->w ; - } + tab_title (tbl, _("Extreme Values")); + + + tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, + _("Case Number")); + + + tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, + _("Value")); + + tab_submit (tbl); } +#define PERCENTILE_ROWS 2 -/* Show the descriptives table */ -void -show_descriptives (const struct variable **dependent_var, +static void +show_percentiles (const struct variable **dependent_var, int n_dep_var, - struct factor *fctr) + const struct xfactor *fctr) { int i; - int heading_columns ; + int v; + int heading_columns = 2; int n_cols; - const int n_stat_rows = 13; - - const int heading_rows = 1; - + const int n_percentiles = subc_list_double_count (&percentile_list); + const int heading_rows = 2; struct tab_table *tbl; - int n_factors = 1; int n_rows ; + n_rows = n_dep_var; - if ( fctr ) - { - heading_columns = 4; - n_factors = hsh_count (fctr->fstats); - - n_rows = n_dep_var * n_stat_rows * n_factors; + assert (fctr); - if ( fctr->indep_var[1] ) - heading_columns = 5; - } - else + if ( fctr->indep_var[0] ) { heading_columns = 3; - n_rows = n_dep_var * n_stat_rows; + + if ( fctr->indep_var[1] ) + { + heading_columns = 4; + } } + n_rows *= ll_count (&fctr->result_list) * PERCENTILE_ROWS; n_rows += heading_rows; - n_cols = heading_columns + 2; - + n_cols = heading_columns + n_percentiles; tbl = tab_create (n_cols, n_rows, 0); - - tab_headers (tbl, heading_columns + 1, 0, heading_rows, 0); + tab_headers (tbl, heading_columns, 0, heading_rows, 0); tab_dim (tbl, tab_natural_dimensions); - /* Outline the box and have no internal lines*/ + /* Outline the box */ tab_box (tbl, TAL_2, TAL_2, -1, -1, 0, 0, n_cols - 1, n_rows - 1); - tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows ); - - tab_vline (tbl, TAL_1, heading_columns, 0, n_rows - 1); - tab_vline (tbl, TAL_2, n_cols - 2, 0, n_rows - 1); - tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows - 1); - tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _ ("Statistic")); - tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _ ("Std. Error")); + tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows ); + tab_hline (tbl, TAL_2, 1, n_cols - 1, heading_rows ); - tab_title (tbl, _ ("Descriptives")); + if ( fctr->indep_var[0]) + tab_text (tbl, 1, 1, TAT_TITLE, var_to_string (fctr->indep_var[0])); + if ( fctr->indep_var[1]) + tab_text (tbl, 2, 1, TAT_TITLE, var_to_string (fctr->indep_var[1])); - for ( i = 0 ; i < n_dep_var ; ++i ) + for (v = 0 ; v < n_dep_var ; ++v ) { - const int row = heading_rows + i * n_stat_rows * n_factors ; + double hinges[3]; + struct ll *ll; + int i = 0; - if ( i > 0 ) - tab_hline (tbl, TAL_1, 0, n_cols - 1, row ); + const int row_var_start = + v * PERCENTILE_ROWS * ll_count(&fctr->result_list); - tab_text (tbl, 0, - i * n_stat_rows * n_factors + heading_rows, + tab_text (tbl, + 0, + heading_rows + row_var_start, TAB_LEFT | TAT_TITLE, - var_to_string (dependent_var[i]) + var_to_string (dependent_var[v]) ); - - if ( fctr ) + for (ll = ll_head (&fctr->result_list); + ll != ll_null (&fctr->result_list); i++, ll = ll_next (ll)) { - const union value *prev = NULL; + int j; + const struct factor_result *result = + ll_data (ll, struct factor_result, ll); - struct factor_statistics **fs = fctr->fs; - int count = 0; + if ( i > 0 || v > 0 ) + { + const int left_col = (i == 0) ? 0 : 1; + tab_hline (tbl, TAL_1, left_col, n_cols - 1, + heading_rows + row_var_start + i * PERCENTILE_ROWS); + } - tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE, - var_to_string (fctr->indep_var[0])); + if ( fctr->indep_var[0]) + { + struct string vstr; + ds_init_empty (&vstr); + var_append_value_name (fctr->indep_var[0], + result->value[0], &vstr); + + tab_text (tbl, 1, + heading_rows + row_var_start + i * PERCENTILE_ROWS, + TAB_LEFT, + ds_cstr (&vstr) + ); + ds_destroy (&vstr); + } - if ( fctr->indep_var[1]) - tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE, - var_to_string (fctr->indep_var[1])); - while ( *fs ) - { - const int row = heading_rows + n_stat_rows * - ( ( i * n_factors ) + count ); + tab_text (tbl, n_cols - n_percentiles - 1, + heading_rows + row_var_start + i * PERCENTILE_ROWS, + TAB_LEFT, + ptile_alg_desc [percentile_algorithm]); - if ( !prev || 0 != compare_values (prev, (*fs)->id[0], - var_get_width (fctr->indep_var[0]))) - { - struct string vstr; - ds_init_empty (&vstr); - var_append_value_name (fctr->indep_var[0], - (*fs)->id[0], &vstr); - - if ( count > 0 ) - tab_hline (tbl, TAL_1, 1, n_cols - 1, row); - - tab_text (tbl, - 1, row, - TAB_LEFT | TAT_TITLE, - ds_cstr (&vstr) - ); - - ds_destroy (&vstr); - } - - prev = (*fs)->id[0]; - - if (fctr->indep_var[1] && count > 0 ) - tab_hline (tbl, TAL_1, 2, n_cols - 1, row); - - if ( fctr->indep_var[1]) - { - struct string vstr; - ds_init_empty (&vstr); - var_append_value_name (fctr->indep_var[1], (*fs)->id[1], &vstr); - - tab_text (tbl, 2, row, - TAB_LEFT | TAT_TITLE, - ds_cstr (&vstr) - ); - - ds_destroy (&vstr); - } - - populate_descriptives (tbl, heading_columns - 2, - row, & (*fs)->m[i]); - - count++ ; - fs++; - } - - } - - else - { - - populate_descriptives (tbl, heading_columns - 2, - i * n_stat_rows * n_factors + heading_rows, - &totals[i]); - } - } - - tab_submit (tbl); - -} - - -/* Fill in the descriptives data */ -void -populate_descriptives (struct tab_table *tbl, int col, int row, - const struct metrics *m) -{ - const double t = gsl_cdf_tdist_Qinv ((1 - cmd.n_cinterval[0] / 100.0)/2.0, - m->n -1); - - tab_text (tbl, col, - row, - TAB_LEFT | TAT_TITLE, - _ ("Mean")); - - tab_float (tbl, col + 2, - row, - TAB_CENTER, - m->mean, - 8,2); - - tab_float (tbl, col + 3, - row, - TAB_CENTER, - m->se_mean, - 8,3); - - - tab_text (tbl, col, - row + 1, - TAB_LEFT | TAT_TITLE | TAT_PRINTF, - _ ("%g%% Confidence Interval for Mean"), cmd.n_cinterval[0]); - - - tab_text (tbl, col + 1, - row + 1, - TAB_LEFT | TAT_TITLE, - _ ("Lower Bound")); - - tab_float (tbl, col + 2, - row + 1, - TAB_CENTER, - m->mean - t * m->se_mean, - 8,3); - - tab_text (tbl, col + 1, - row + 2, - TAB_LEFT | TAT_TITLE, - _ ("Upper Bound")); - - - tab_float (tbl, col + 2, - row + 2, - TAB_CENTER, - m->mean + t * m->se_mean, - 8,3); - - tab_text (tbl, col, - row + 3, - TAB_LEFT | TAT_TITLE | TAT_PRINTF, - _ ("5%% Trimmed Mean")); - - tab_float (tbl, col + 2, - row + 3, - TAB_CENTER, - m->trimmed_mean, - 8,2); - - tab_text (tbl, col, - row + 4, - TAB_LEFT | TAT_TITLE, - _ ("Median")); - - { - struct percentile *p; - double d = 50; - - p = hsh_find (m->ptile_hash, &d); - - assert (p); - - - tab_float (tbl, col + 2, - row + 4, - TAB_CENTER, - p->v, - 8, 2); - } - - - tab_text (tbl, col, - row + 5, - TAB_LEFT | TAT_TITLE, - _ ("Variance")); - - tab_float (tbl, col + 2, - row + 5, - TAB_CENTER, - m->var, - 8,3); - - - tab_text (tbl, col, - row + 6, - TAB_LEFT | TAT_TITLE, - _ ("Std. Deviation")); - - - tab_float (tbl, col + 2, - row + 6, - TAB_CENTER, - m->stddev, - 8,3); - - - tab_text (tbl, col, - row + 7, - TAB_LEFT | TAT_TITLE, - _ ("Minimum")); - - tab_float (tbl, col + 2, - row + 7, - TAB_CENTER, - m->min, - 8,3); - - tab_text (tbl, col, - row + 8, - TAB_LEFT | TAT_TITLE, - _ ("Maximum")); - - tab_float (tbl, col + 2, - row + 8, - TAB_CENTER, - m->max, - 8,3); - - - tab_text (tbl, col, - row + 9, - TAB_LEFT | TAT_TITLE, - _ ("Range")); - - - tab_float (tbl, col + 2, - row + 9, - TAB_CENTER, - m->max - m->min, - 8,3); - - tab_text (tbl, col, - row + 10, - TAB_LEFT | TAT_TITLE, - _ ("Interquartile Range")); - - { - struct percentile *p1; - struct percentile *p2; - - double d = 75; - p1 = hsh_find (m->ptile_hash, &d); - - d = 25; - p2 = hsh_find (m->ptile_hash, &d); - - assert (p1); - assert (p2); - - tab_float (tbl, col + 2, - row + 10, - TAB_CENTER, - p1->v - p2->v, - 8, 2); - } - - - - tab_text (tbl, col, - row + 11, - TAB_LEFT | TAT_TITLE, - _ ("Skewness")); - - - tab_float (tbl, col + 2, - row + 11, - TAB_CENTER, - m->skewness, - 8,3); - - /* stderr of skewness */ - tab_float (tbl, col + 3, - row + 11, - TAB_CENTER, - calc_seskew (m->n), - 8,3); - - - tab_text (tbl, col, - row + 12, - TAB_LEFT | TAT_TITLE, - _ ("Kurtosis")); - - - tab_float (tbl, col + 2, - row + 12, - TAB_CENTER, - m->kurtosis, - 8,3); - - /* stderr of kurtosis */ - tab_float (tbl, col + 3, - row + 12, - TAB_CENTER, - calc_sekurt (m->n), - 8,3); - - -} - - - -void -box_plot_variables (const struct factor *fctr, - const struct variable **vars, int n_vars, - const struct variable *id) -{ + tab_text (tbl, n_cols - n_percentiles - 1, + heading_rows + row_var_start + 1 + i * PERCENTILE_ROWS, + TAB_LEFT, + _("Tukey's Hinges")); - int i; - struct factor_statistics **fs ; - if ( ! fctr ) - { - box_plot_group (fctr, vars, n_vars, id); - return; - } + tab_vline (tbl, TAL_1, n_cols - n_percentiles -1, heading_rows, n_rows - 1); - for ( fs = fctr->fs ; *fs ; ++fs ) - { - struct string str; - double y_min = DBL_MAX; - double y_max = -DBL_MAX; - struct chart *ch = chart_create (); - ds_init_empty (&str); - factor_to_string (fctr, *fs, 0, &str ); + tukey_hinges_calculate ((struct tukey_hinges *) result->metrics[v].tukey_hinges, + hinges); - chart_write_title (ch, ds_cstr (&str)); - - for ( i = 0 ; i < n_vars ; ++i ) - { - y_max = MAX (y_max, (*fs)->m[i].max); - y_min = MIN (y_min, (*fs)->m[i].min); - } - - boxplot_draw_yscale (ch, y_max, y_min); - - for ( i = 0 ; i < n_vars ; ++i ) - { - - const double box_width = (ch->data_right - ch->data_left) - / (n_vars * 2.0 ) ; - - const double box_centre = ( i * 2 + 1) * box_width - + ch->data_left; - - boxplot_draw_boxplot (ch, - box_centre, box_width, - & (*fs)->m[i], - var_to_string (vars[i])); - - - } - - chart_submit (ch); - ds_destroy (&str); - } -} - - - -/* Do a box plot, grouping all factors into one plot ; - each dependent variable has its own plot. -*/ -void -box_plot_group (const struct factor *fctr, - const struct variable **vars, - int n_vars, - const struct variable *id UNUSED) -{ - - int i; - - for ( i = 0 ; i < n_vars ; ++i ) - { - struct factor_statistics **fs ; - struct chart *ch; - - ch = chart_create (); - - boxplot_draw_yscale (ch, totals[i].max, totals[i].min); - - if ( fctr ) - { - int n_factors = 0; - int f=0; - for ( fs = fctr->fs ; *fs ; ++fs ) - ++n_factors; - - chart_write_title (ch, _ ("Boxplot of %s vs. %s"), - var_to_string (vars[i]), var_to_string (fctr->indep_var[0]) ); - - for ( fs = fctr->fs ; *fs ; ++fs ) + for (j = 0; j < n_percentiles; ++j) { - struct string str; - const double box_width = (ch->data_right - ch->data_left) - / (n_factors * 2.0 ) ; - - const double box_centre = ( f++ * 2 + 1) * box_width - + ch->data_left; + double hinge = SYSMIS; + tab_float (tbl, n_cols - n_percentiles + j, + heading_rows + row_var_start + i * PERCENTILE_ROWS, + TAB_CENTER, + percentile_calculate (result->metrics[v].ptl[j], + percentile_algorithm), + 8, 2 + ); + + if ( result->metrics[v].ptl[j]->ptile == 0.5) + hinge = hinges[1]; + else if ( result->metrics[v].ptl[j]->ptile == 0.25) + hinge = hinges[0]; + else if ( result->metrics[v].ptl[j]->ptile == 0.75) + hinge = hinges[2]; + + if ( hinge != SYSMIS) + tab_float (tbl, n_cols - n_percentiles + j, + heading_rows + row_var_start + 1 + i * PERCENTILE_ROWS, + TAB_CENTER, + hinge, + 8, 2 + ); - ds_init_empty (&str); - factor_to_string_concise (fctr, *fs, &str); - - boxplot_draw_boxplot (ch, - box_centre, box_width, - & (*fs)->m[i], - ds_cstr (&str)); - ds_destroy (&str); } } - else if ( ch ) - { - const double box_width = (ch->data_right - ch->data_left) / 3.0; - const double box_centre = (ch->data_right + ch->data_left) / 2.0; - - chart_write_title (ch, _ ("Boxplot")); - - boxplot_draw_boxplot (ch, - box_centre, box_width, - &totals[i], - var_to_string (vars[i]) ); - - } - - chart_submit (ch); } -} - - -/* Plot the normal and detrended normal plots for m - Label the plots with factorname */ -void -np_plot (const struct metrics *m, const char *factorname) -{ - int i; - double yfirst=0, ylast=0; - - /* Normal Plot */ - struct chart *np_chart; - - /* Detrended Normal Plot */ - struct chart *dnp_chart; - - /* The slope and intercept of the ideal normal probability line */ - const double slope = 1.0 / m->stddev; - const double intercept = - m->mean / m->stddev; - - /* Cowardly refuse to plot an empty data set */ - if ( m->n_data == 0 ) - return ; - - np_chart = chart_create (); - dnp_chart = chart_create (); - - if ( !np_chart || ! dnp_chart ) - return ; - - chart_write_title (np_chart, _ ("Normal Q-Q Plot of %s"), factorname); - chart_write_xlabel (np_chart, _ ("Observed Value")); - chart_write_ylabel (np_chart, _ ("Expected Normal")); - - - chart_write_title (dnp_chart, _ ("Detrended Normal Q-Q Plot of %s"), - factorname); - chart_write_xlabel (dnp_chart, _ ("Observed Value")); - chart_write_ylabel (dnp_chart, _ ("Dev from Normal")); - - yfirst = gsl_cdf_ugaussian_Pinv (m->wvp[0]->rank / ( m->n + 1)); - ylast = gsl_cdf_ugaussian_Pinv (m->wvp[m->n_data-1]->rank / ( m->n + 1)); - - - { - /* Need to make sure that both the scatter plot and the ideal fit into the - plot */ - double x_lower = MIN (m->min, (yfirst - intercept) / slope) ; - double x_upper = MAX (m->max, (ylast - intercept) / slope) ; - double slack = (x_upper - x_lower) * 0.05 ; - chart_write_xscale (np_chart, x_lower - slack, x_upper + slack, 5); - - chart_write_xscale (dnp_chart, m->min, m->max, 5); - - } - - chart_write_yscale (np_chart, yfirst, ylast, 5); - - { - /* We have to cache the detrended data, beacause we need to - find its limits before we can plot it */ - double *d_data = xnmalloc (m->n_data, sizeof *d_data); - double d_max = -DBL_MAX; - double d_min = DBL_MAX; - for ( i = 0 ; i < m->n_data; ++i ) - { - const double ns = gsl_cdf_ugaussian_Pinv (m->wvp[i]->rank / ( m->n + 1)); - - chart_datum (np_chart, 0, m->wvp[i]->v.f, ns); - - d_data[i] = (m->wvp[i]->v.f - m->mean) / m->stddev - ns; - - if ( d_data[i] < d_min ) d_min = d_data[i]; - if ( d_data[i] > d_max ) d_max = d_data[i]; - } - chart_write_yscale (dnp_chart, d_min, d_max, 5); - - for ( i = 0 ; i < m->n_data; ++i ) - chart_datum (dnp_chart, 0, m->wvp[i]->v.f, d_data[i]); - - free (d_data); - } - - chart_line (np_chart, slope, intercept, yfirst, ylast , CHART_DIM_Y); - chart_line (dnp_chart, 0, 0, m->min, m->max , CHART_DIM_X); - - chart_submit (np_chart); - chart_submit (dnp_chart); -} - - - - -/* Show the percentiles */ -void -show_percentiles (const struct variable **dependent_var, - int n_dep_var, - struct factor *fctr) -{ - struct tab_table *tbl; - int i; - - int n_cols, n_rows; - int n_factors; - - struct hsh_table *ptiles ; + tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1); - int n_heading_columns; - const int n_heading_rows = 2; - const int n_stat_rows = 2; + tab_title (tbl, _("Percentiles")); - int n_ptiles ; - if ( fctr ) + for (i = 0 ; i < n_percentiles; ++i ) { - struct factor_statistics **fs = fctr->fs ; - n_heading_columns = 3; - n_factors = hsh_count (fctr->fstats); - - ptiles = (*fs)->m[0].ptile_hash; + tab_text (tbl, n_cols - n_percentiles + i, 1, + TAB_CENTER | TAT_TITLE | TAT_PRINTF, + _("%g"), + subc_list_double_at (&percentile_list, i) + ); - if ( fctr->indep_var[1] ) - n_heading_columns = 4; - } - else - { - n_factors = 1; - n_heading_columns = 2; - ptiles = totals[0].ptile_hash; } - n_ptiles = hsh_count (ptiles); - - n_rows = n_heading_rows + n_dep_var * n_stat_rows * n_factors; - - n_cols = n_heading_columns + n_ptiles ; - - tbl = tab_create (n_cols, n_rows, 0); - - tab_headers (tbl, n_heading_columns + 1, 0, n_heading_rows, 0); - - tab_dim (tbl, tab_natural_dimensions); - - /* Outline the box and have no internal lines*/ - tab_box (tbl, - TAL_2, TAL_2, - -1, -1, - 0, 0, - n_cols - 1, n_rows - 1); - - tab_hline (tbl, TAL_2, 0, n_cols - 1, n_heading_rows ); - - tab_vline (tbl, TAL_2, n_heading_columns, 0, n_rows - 1); - - - tab_title (tbl, _ ("Percentiles")); - - - tab_hline (tbl, TAL_1, n_heading_columns, n_cols - 1, 1 ); - - - tab_box (tbl, - -1, -1, - -1, TAL_1, - 0, n_heading_rows, - n_heading_columns - 1, n_rows - 1); - + tab_joint_text (tbl, + n_cols - n_percentiles, 0, + n_cols - 1, 0, + TAB_CENTER | TAT_TITLE, + _("Percentiles")); + /* Vertical lines for the data only */ tab_box (tbl, -1, -1, -1, TAL_1, - n_heading_columns, n_heading_rows - 1, + n_cols - n_percentiles, 1, n_cols - 1, n_rows - 1); - tab_joint_text (tbl, n_heading_columns + 1, 0, - n_cols - 1 , 0, - TAB_CENTER | TAT_TITLE , - _ ("Percentiles")); - - - { - /* Put in the percentile break points as headings */ - - struct percentile **p = (struct percentile **) hsh_sort (ptiles); - - i = 0; - while ( (*p) ) - { - tab_float (tbl, n_heading_columns + i++ , 1, - TAB_CENTER, - (*p)->p, 8, 0); - - p++; - } - - } - - for ( i = 0 ; i < n_dep_var ; ++i ) - { - const int n_stat_rows = 2; - const int row = n_heading_rows + i * n_stat_rows * n_factors ; - - if ( i > 0 ) - tab_hline (tbl, TAL_1, 0, n_cols - 1, row ); - - tab_text (tbl, 0, - i * n_stat_rows * n_factors + n_heading_rows, - TAB_LEFT | TAT_TITLE, - var_to_string (dependent_var[i]) - ); - - if ( fctr ) - { - const union value *prev = NULL ; - struct factor_statistics **fs = fctr->fs; - int count = 0; - - tab_text (tbl, 1, n_heading_rows - 1, - TAB_CENTER | TAT_TITLE, - var_to_string (fctr->indep_var[0])); - - - if ( fctr->indep_var[1]) - tab_text (tbl, 2, n_heading_rows - 1, TAB_CENTER | TAT_TITLE, - var_to_string (fctr->indep_var[1])); - - while ( *fs ) - { - const int row = n_heading_rows + n_stat_rows * - ( ( i * n_factors ) + count ); - - - if ( !prev || 0 != compare_values (prev, (*fs)->id[0], - var_get_width (fctr->indep_var[0]))) - { - struct string vstr; - ds_init_empty (&vstr); - var_append_value_name (fctr->indep_var[0], - (*fs)->id[0], &vstr); - - - if ( count > 0 ) - tab_hline (tbl, TAL_1, 1, n_cols - 1, row); - - tab_text (tbl, - 1, row, - TAB_LEFT | TAT_TITLE, - ds_cstr (&vstr) - ); - - ds_destroy (&vstr); - } - - prev = (*fs)->id[0]; - - if (fctr->indep_var[1] && count > 0 ) - tab_hline (tbl, TAL_1, 2, n_cols - 1, row); - - if ( fctr->indep_var[1]) - { - struct string vstr; - ds_init_empty (&vstr); - var_append_value_name (fctr->indep_var[1], (*fs)->id[1], &vstr); - - tab_text (tbl, 2, row, - TAB_LEFT | TAT_TITLE, - ds_cstr (&vstr) - ); - - ds_destroy (&vstr); - } - - - populate_percentiles (tbl, n_heading_columns - 1, - row, & (*fs)->m[i]); - - - count++ ; - fs++; - } - - - } - else - { - populate_percentiles (tbl, n_heading_columns - 1, - i * n_stat_rows * n_factors + n_heading_rows, - &totals[i]); - } - - - } + tab_hline (tbl, TAL_1, n_cols - n_percentiles, n_cols - 1, 1); tab_submit (tbl); - - } - - -void -populate_percentiles (struct tab_table *tbl, int col, int row, - const struct metrics *m) +static void +factor_to_string_concise (const struct xfactor *fctr, + const struct factor_result *result, + struct string *str + ) { - int i; - - struct percentile **p = (struct percentile **) hsh_sort (m->ptile_hash); - - tab_text (tbl, - col, row + 1, - TAB_LEFT | TAT_TITLE, - _ ("Tukey\'s Hinges") - ); - - tab_text (tbl, - col, row, - TAB_LEFT | TAT_TITLE, - ptile_alg_desc[m->ptile_alg] - ); - - - i = 0; - while ( (*p) ) + if (fctr->indep_var[0]) { - tab_float (tbl, col + i + 1 , row, - TAB_CENTER, - (*p)->v, 8, 2); - if ( (*p)->p == 25 ) - tab_float (tbl, col + i + 1 , row + 1, - TAB_CENTER, - m->hinge[0], 8, 2); - - if ( (*p)->p == 50 ) - tab_float (tbl, col + i + 1 , row + 1, - TAB_CENTER, - m->hinge[1], 8, 2); - - if ( (*p)->p == 75 ) - tab_float (tbl, col + i + 1 , row + 1, - TAB_CENTER, - m->hinge[2], 8, 2); + var_append_value_name (fctr->indep_var[0], result->value[0], str); + if ( fctr->indep_var[1] ) + { + ds_put_cstr (str, ","); - i++; + var_append_value_name (fctr->indep_var[1], result->value[1], str); - p++; + ds_put_cstr (str, ")"); + } } - } + static void -factor_to_string (const struct factor *fctr, - const struct factor_statistics *fs, - const struct variable *var, +factor_to_string (const struct xfactor *fctr, + const struct factor_result *result, struct string *str ) { - if (var) - ds_put_format (str, "%s (",var_to_string (var) ); - - - ds_put_format (str, "%s = ", - var_to_string (fctr->indep_var[0])); + if (fctr->indep_var[0]) + { + ds_put_format (str, "(%s = ", var_get_name (fctr->indep_var[0])); - var_append_value_name (fctr->indep_var[0], fs->id[0], str); + var_append_value_name (fctr->indep_var[0], result->value[0], str); - if ( fctr->indep_var[1] ) - { - ds_put_format (str, "; %s = )", - var_to_string (fctr->indep_var[1])); + if ( fctr->indep_var[1] ) + { + ds_put_cstr (str, ","); + ds_put_format (str, "%s = ", var_get_name (fctr->indep_var[1])); - var_append_value_name (fctr->indep_var[1], fs->id[1], str); - } - else - { - if ( var ) - ds_put_cstr (str, ")"); + var_append_value_name (fctr->indep_var[1], result->value[1], str); + } + ds_put_cstr (str, ")"); } } -static void -factor_to_string_concise (const struct factor *fctr, - const struct factor_statistics *fs, - struct string *str - ) - -{ - var_append_value_name (fctr->indep_var[0], fs->id[0], str); - - if ( fctr->indep_var[1] ) - { - ds_put_cstr (str, ","); - - var_append_value_name (fctr->indep_var[1],fs->id[1], str); - ds_put_cstr (str, ")"); - } -} /* Local Variables: diff --git a/src/language/stats/frequencies.q b/src/language/stats/frequencies.q index 4f26f93d..94b2bcf9 100644 --- a/src/language/stats/frequencies.q +++ b/src/language/stats/frequencies.q @@ -268,7 +268,7 @@ static hsh_compare_func compare_freq_numeric_d, compare_freq_alpha_d; static void do_piechart(const struct variable *var, const struct freq_tab *frq_tab); -gsl_histogram * +struct histogram * freq_tab_to_hist(const struct freq_tab *ft, const struct variable *var); @@ -606,31 +606,26 @@ postcalc (void) if ( chart == GFT_HIST) { double d[frq_n_stats]; - struct normal_curve norm; - gsl_histogram *hist ; - - - norm.N = vf->tab.valid_cases; + struct histogram *hist ; calc_stats (v, d); - norm.mean = d[frq_mean]; - norm.stddev = d[frq_stddev]; - hist = freq_tab_to_hist(ft,v); + hist = freq_tab_to_hist (ft,v); - histogram_plot(hist, var_to_string(v), &norm, normal); + histogram_plot_n (hist, var_to_string(v), + vf->tab.valid_cases, + d[frq_mean], + d[frq_stddev], + normal); - gsl_histogram_free(hist); + statistic_destroy ((struct statistic *)hist); } - if ( chart == GFT_PIE) { do_piechart(v_variables[i], ft); } - - cleanup_freq_tab (v); } @@ -1437,14 +1432,14 @@ dump_statistics (const struct variable *v, int show_varname) /* Create a gsl_histogram from a freq_tab */ -gsl_histogram * -freq_tab_to_hist(const struct freq_tab *ft, const struct variable *var) +struct histogram * +freq_tab_to_hist (const struct freq_tab *ft, const struct variable *var) { int i; double x_min = DBL_MAX; double x_max = -DBL_MAX; - gsl_histogram *hist; + struct statistic *hist; const double bins = 11; struct hsh_iterator hi; @@ -1461,15 +1456,15 @@ freq_tab_to_hist(const struct freq_tab *ft, const struct variable *var) if ( frq->value[0].f > x_max ) x_max = frq->value[0].f ; } - hist = histogram_create(bins, x_min, x_max); + hist = histogram_create (bins, x_min, x_max); for( i = 0 ; i < ft->n_valid ; ++i ) { frq = &ft->valid[i]; - gsl_histogram_accumulate(hist, frq->value[0].f, frq->count); + histogram_add ((struct histogram *)hist, frq->value[0].f, frq->count); } - return hist; + return (struct histogram *)hist; } diff --git a/src/libpspp/misc.h b/src/libpspp/misc.h index e33d588f..3b025157 100644 --- a/src/libpspp/misc.h +++ b/src/libpspp/misc.h @@ -14,8 +14,8 @@ You should have received a copy of the GNU General Public License along with this program. If not, see . */ -#if !math_misc_h -#define math_misc_h 1 +#if !libpspp_misc_h +#define libpspp_misc_h 1 #include #include @@ -60,4 +60,21 @@ pow4 (double x) return y; } -#endif /* math/misc.h */ +/* Set *DEST to the lower of *DEST and SRC */ +static inline void +minimize (double *dest, double src) +{ + if (src < *dest) + *dest = src; +} + + +/* Set *DEST to the greater of *DEST and SRC */ +static inline void +maximize (double *dest, double src) +{ + if (src > *dest) + *dest = src; +} + +#endif /* libpspp/misc.h */ diff --git a/src/math/automake.mk b/src/math/automake.mk index 107a985d..94a3a0a8 100644 --- a/src/math/automake.mk +++ b/src/math/automake.mk @@ -5,29 +5,26 @@ include $(top_srcdir)/src/math/ts/automake.mk noinst_LIBRARIES += src/math/libpspp_math.a src_math_libpspp_math_a_SOURCES = \ - src/math/factor-stats.c \ - src/math/factor-stats.h \ - src/math/chart-geometry.c \ - src/math/chart-geometry.h \ - src/math/coefficient.c \ - src/math/coefficient.h \ - src/math/covariance-matrix.c \ - src/math/covariance-matrix.h \ + src/math/box-whisker.c src/math/box-whiske.h \ + src/math/chart-geometry.c src/math/chart-geometry.h \ + src/math/coefficient.c src/math/coefficient.h \ + src/math/covariance-matrix.c src/math/covariance-matrix.h \ + src/math/extrema.c src/math/extrema.h \ src/math/group.c src/math/group.h \ src/math/group-proc.h \ src/math/histogram.c src/math/histogram.h \ - src/math/interaction.c \ - src/math/interaction.h \ - src/math/levene.c \ - src/math/levene.h \ - src/math/linreg.c \ - src/math/linreg.h \ - src/math/merge.c \ - src/math/merge.h \ + src/math/interaction.c src/math/interaction.h \ + src/math/levene.c src/math/levene.h \ + src/math/linreg.c src/math/linreg.h \ + src/math/merge.c src/math/merge.h \ src/math/moments.c src/math/moments.h \ + src/math/np.c src/math/np.h \ + src/math/order-stats.c src/math/order-stats.h \ src/math/percentiles.c src/math/percentiles.h \ src/math/design-matrix.c src/math/design-matrix.h \ src/math/random.c src/math/random.h \ - src/math/sort.c src/math/sort.h + src/math/sort.c src/math/sort.h \ + src/math/trimmed-mean.c src/math/trimmed-mean.h \ + src/math/tukey-hinges.c src/math/tukey-hinges.h EXTRA_DIST += src/math/OChangeLog diff --git a/src/math/factor-stats.c b/src/math/factor-stats.c deleted file mode 100644 index a97d7f09..00000000 --- a/src/math/factor-stats.c +++ /dev/null @@ -1,328 +0,0 @@ -/* PSPP - a program for statistical analysis. - Copyright (C) 2004 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#include -#include "factor-stats.h" -#include -#include -#include -#include -#include "moments.h" -#include "percentiles.h" - -#include -#include -#include -#include -#include "histogram.h" - -#include "xalloc.h" - -void -metrics_precalc(struct metrics *m) -{ - assert (m) ; - - m->n_missing = 0; - - m->min = DBL_MAX; - m->max = -DBL_MAX; - - m->histogram = 0; - - m->moments = moments1_create(MOMENT_KURTOSIS); - - m->ordered_data = hsh_create(20, - (hsh_compare_func *) compare_values, - (hsh_hash_func *) hash_value, - (hsh_free_func *) weighted_value_free, - (void *) 0); -} - - -/* Include val in the calculation for the metrics. - If val is null, then treat it as MISSING -*/ -void -metrics_calc (struct metrics *fs, const union value *val, - double weight, int case_no) -{ - struct weighted_value **wv; - double x; - - if ( ! val ) - { - fs->n_missing += weight; - return ; - } - - x = val->f; - - moments1_add(fs->moments, x, weight); - - if ( x < fs->min) fs->min = x; - if ( x > fs->max) fs->max = x; - - wv = (struct weighted_value **) hsh_probe (fs->ordered_data,(void *) val ); - - if ( *wv ) - { - /* If this value has already been seen, then simply - increase its weight and push a new case number */ - - struct case_node *cn; - - assert( (*wv)->v.f == val->f ); - (*wv)->w += weight; - - cn = xmalloc ( sizeof *cn); - cn->next = (*wv)->case_nos ; - cn->num = case_no; - - (*wv)->case_nos = cn; - } - else - { - struct case_node *cn; - - *wv = weighted_value_create(); - (*wv)->v = *val; - (*wv)->w = weight; - - cn = xmalloc (sizeof *cn); - cn->next=0; - cn->num = case_no; - (*wv)->case_nos = cn; - - } - -} - -void -metrics_postcalc(struct metrics *m) -{ - double cc = 0.0; - double tc ; - int k1, k2 ; - int i; - int j = 1; - - moments1_calculate (m->moments, &m->n, &m->mean, &m->var, - &m->skewness, &m->kurtosis); - - moments1_destroy (m->moments); - - - m->stddev = sqrt(m->var); - - /* FIXME: Check this is correct ??? - Shouldn't we use the sample variance ??? */ - m->se_mean = sqrt (m->var / m->n) ; - - - - m->wvp = (struct weighted_value **) hsh_sort(m->ordered_data); - m->n_data = hsh_count(m->ordered_data); - - /* Trimmed mean calculation */ - if ( m->n_data <= 1 ) - { - m->trimmed_mean = m->mean; - return; - } - - m->histogram = histogram_create(10, m->min, m->max); - - for ( i = 0 ; i < m->n_data ; ++i ) - { - struct weighted_value **wv = (m->wvp) ; - gsl_histogram_accumulate(m->histogram, wv[i]->v.f, wv[i]->w); - } - - tc = m->n * 0.05 ; - k1 = -1; - k2 = -1; - - for ( i = 0 ; i < m->n_data ; ++i ) - { - cc += m->wvp[i]->w; - m->wvp[i]->cc = cc; - - m->wvp[i]->rank = j + (m->wvp[i]->w - 1) / 2.0 ; - - j += m->wvp[i]->w; - - if ( cc < tc ) - k1 = i; - } - - - - k2 = m->n_data; - for ( i = m->n_data -1 ; i >= 0; --i ) - { - if ( tc > m->n - m->wvp[i]->cc) - k2 = i; - } - - - /* Calculate the percentiles */ - ptiles (m->ptile_hash, (const struct weighted_value **) m->wvp, - m->n_data, m->n, m->ptile_alg); - - tukey_hinges ((const struct weighted_value **) m->wvp, - m->n_data, m->n, m->hinge); - - /* Special case here */ - if ( k1 + 1 == k2 ) - { - m->trimmed_mean = m->wvp[k2]->v.f; - return; - } - - m->trimmed_mean = 0; - for ( i = k1 + 2 ; i <= k2 - 1 ; ++i ) - { - m->trimmed_mean += m->wvp[i]->v.f * m->wvp[i]->w; - } - - - m->trimmed_mean += (m->n - m->wvp[k2 - 1]->cc - tc) * m->wvp[k2]->v.f ; - m->trimmed_mean += (m->wvp[k1 + 1]->cc - tc) * m->wvp[k1 + 1]->v.f ; - m->trimmed_mean /= 0.9 * m->n ; - - -} - - -struct weighted_value * -weighted_value_create(void) -{ - struct weighted_value *wv; - wv = xmalloc (sizeof *wv); - - wv->cc = 0; - wv->case_nos = 0; - - return wv; -} - -void -weighted_value_free(struct weighted_value *wv) -{ - struct case_node *cn ; - - if ( !wv ) - return ; - - cn = wv->case_nos; - - while(cn) - { - struct case_node *next = cn->next; - - free(cn); - cn = next; - } - - free(wv); - -} - - - - - -/* Create a factor statistics object with for N dependent vars - and ID0 and ID1 as the values of the independent variable */ -struct factor_statistics * -create_factor_statistics (int n, - union value *id0, - union value *id1) -{ - struct factor_statistics *f; - - f = xmalloc (sizeof *f); - - f->id[0] = id0; - f->id[1] = id1; - f->m = xnmalloc (n, sizeof *f->m); - memset (f->m, 0, sizeof(struct metrics) * n); - f->n_var = n; - - return f; -} - -void -metrics_destroy(struct metrics *m) -{ - hsh_destroy(m->ordered_data); - hsh_destroy(m->ptile_hash); - if ( m-> histogram ) - gsl_histogram_free(m->histogram); -} - -void -factor_statistics_free(struct factor_statistics *f) -{ - - int i; - free (f->id[0]); - free (f->id[1]); - for ( i = 0 ; i < f->n_var; ++i ) - metrics_destroy(&f->m[i]); - free(f->m) ; - free(f); -} - - -int -factor_statistics_compare(const struct factor_statistics *f0, - const struct factor_statistics *f1, int width) -{ - - int cmp0; - - assert(f0); - assert(f1); - - cmp0 = compare_values(f0->id[0], f1->id[0], width); - - if ( cmp0 != 0 ) - return cmp0; - - - if ( ( f0->id[1]->f == SYSMIS ) && (f1->id[1]->f != SYSMIS) ) - return 1; - - if ( ( f0->id[1]->f != SYSMIS ) && (f1->id[1]->f == SYSMIS) ) - return -1; - - return compare_values (f0->id[1], f1->id[1], width); -} - -unsigned int -factor_statistics_hash (const struct factor_statistics *f, int width) -{ - unsigned int h; - - h = hash_value (f->id[0], width); - - if ( f->id[1]->f != SYSMIS ) - h += hash_value(f->id[1], width); - - return h; -} diff --git a/src/math/factor-stats.h b/src/math/factor-stats.h deleted file mode 100644 index 3c1c7f91..00000000 --- a/src/math/factor-stats.h +++ /dev/null @@ -1,162 +0,0 @@ -/* PSPP - a program for statistical analysis. - Copyright (C) 2004 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#ifndef FACTOR_STATS -#define FACTOR_STATS - - -/* FIXME: These things should probably be amalgamated with the - group_statistics struct */ - -#include -#include -#include -#include -#include "percentiles.h" - - -struct moments1; - -struct metrics -{ - double n; - - double n_missing; - - double min; - - double max; - - double mean; - - double se_mean; - - double var; - - double stddev; - - struct moments1 *moments; - - gsl_histogram *histogram; - - double skewness; - double kurtosis; - - double trimmed_mean; - - /* A hash of data for this factor. */ - struct hsh_table *ordered_data; - - /* A Pointer to this hash table AFTER it has been SORTED and crunched */ - struct weighted_value **wvp; - - /* The number of values in the above array - (if all the weights are 1, then this will - be the same as n) */ - int n_data; - - /* Percentile stuff */ - - /* A hash of struct percentiles */ - struct hsh_table *ptile_hash; - - /* Algorithm to be used for calculating percentiles */ - enum pc_alg ptile_alg; - - /* Tukey's Hinges */ - double hinge[3]; - -}; - - -struct metrics * metrics_create(void); - -void metrics_precalc(struct metrics *m); - -void metrics_calc(struct metrics *m, const union value *f, double weight, - int case_no); - -void metrics_postcalc(struct metrics *m); - -void metrics_destroy(struct metrics *m); - - - -/* Linked list of case nos */ -struct case_node -{ - int num; - struct case_node *next; -}; - -struct weighted_value -{ - union value v; - - /* The weight */ - double w; - - /* The cumulative weight */ - double cc; - - /* The rank */ - double rank; - - /* Linked list of cases nos which have this value */ - struct case_node *case_nos; - -}; - - -struct weighted_value *weighted_value_create(void); - -void weighted_value_free(struct weighted_value *wv); - - - -struct factor_statistics { - - /* The values of the independent variables */ - union value *id[2]; - - /* The an array stats for this factor, one for each dependent var */ - struct metrics *m; - - /* The number of dependent variables */ - int n_var; -}; - - -/* Create a factor statistics object with for N dependent vars - and ID as the value of the independent variable */ -struct factor_statistics * create_factor_statistics (int n, - union value *id0, - union value *id1); - - -void factor_statistics_free(struct factor_statistics *f); - - -/* Compare f0 and f1. - width is the width of the independent variable */ -int -factor_statistics_compare(const struct factor_statistics *f0, - const struct factor_statistics *f1, int width); - -unsigned int -factor_statistics_hash(const struct factor_statistics *f, int width); - -#endif diff --git a/src/math/histogram.c b/src/math/histogram.c index 7b875d40..67079398 100644 --- a/src/math/histogram.c +++ b/src/math/histogram.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2004 Free Software Foundation, Inc. + Copyright (C) 2004, 2008 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -15,36 +15,69 @@ along with this program. If not, see . */ #include -#include -#include -#include #include "histogram.h" + +#include +#include + +#include #include "chart-geometry.h" +#include + + +void +histogram_add (struct histogram *h, double y, double c) +{ + ((struct statistic *)h)->accumulate ((struct statistic *) h, NULL, c, 0, y); +} + -gsl_histogram * -histogram_create(double bins, double x_min, double x_max) +static void +acc (struct statistic *s, const struct ccase *cx UNUSED, double c, double cc UNUSED, double y) { - int n; - double bin_width ; - double bin_width_2 ; + struct histogram *hist = (struct histogram *) s; + + gsl_histogram_accumulate (hist->gsl_hist, y, c); +} + + +static void +destroy (struct statistic *s) +{ + struct histogram *h = (struct histogram *) s; + gsl_histogram_free (h->gsl_hist); + free (s); +} + + +struct statistic * +histogram_create (int bins, double min, double max) +{ + struct histogram *h = xmalloc (sizeof *h); + struct statistic *stat = (struct statistic *) h; double upper_limit, lower_limit; - gsl_histogram *hist = gsl_histogram_alloc(bins); + double bin_width = chart_rounded_tick ((max - min) / (double) bins); + double bin_width_2 = bin_width / 2.0; - bin_width = chart_rounded_tick((x_max - x_min)/ bins); - bin_width_2 = bin_width / 2.0; + int n = ceil (max / (bin_width_2) ) ; + + assert (max > min); - n = ceil( x_max / (bin_width_2) ) ; if ( ! (n % 2 ) ) n++; upper_limit = n * bin_width_2; - n = floor( x_min / (bin_width_2) ) ; + n = floor (min / (bin_width_2) ) ; if ( ! (n % 2 ) ) n--; lower_limit = n * bin_width_2; - gsl_histogram_set_ranges_uniform(hist, lower_limit, upper_limit); + h->gsl_hist = gsl_histogram_alloc (bins); + gsl_histogram_set_ranges_uniform (h->gsl_hist, lower_limit, upper_limit); + + stat->accumulate = acc; + stat->destroy = destroy; - return hist; + return stat; } diff --git a/src/math/histogram.h b/src/math/histogram.h index e4c7819f..b2b204ee 100644 --- a/src/math/histogram.h +++ b/src/math/histogram.h @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2004 Free Software Foundation, Inc. + Copyright (C) 2008 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -14,11 +14,25 @@ You should have received a copy of the GNU General Public License along with this program. If not, see . */ -#ifndef HISTOGRAM_H -#define HISTOGRAM_H +#ifndef __NEW_HISTOGRAM_H__ +#define __NEW_HISTOGRAM_H__ + +#include + +#include "statistic.h" #include -gsl_histogram * histogram_create(double bins, double x_min, double x_max); + +struct histogram +{ + struct statistic parent; + gsl_histogram *gsl_hist; +}; + +struct statistic * histogram_create (int bins, double max, double min); + +void histogram_add (struct histogram *h, double y, double c); + #endif diff --git a/src/math/percentiles.c b/src/math/percentiles.c index aa7eead6..bf99de16 100644 --- a/src/math/percentiles.c +++ b/src/math/percentiles.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2004 Free Software Foundation, Inc. + Copyright (C) 2008 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -15,25 +15,19 @@ along with this program. If not, see . */ #include -#include -#include -#include -#include "factor-stats.h" #include "percentiles.h" -#include +#include -#include "minmax.h" #include "gettext.h" #define _(msgid) gettext (msgid) #define N_(msgid) msgid -struct ptile_params -{ - double g1, g1_star; - double g2, g2_star; - int k1, k2; -}; +#include +#include +#include +#include +#include const char *const ptile_alg_desc[] = { @@ -47,380 +41,151 @@ const char *const ptile_alg_desc[] = { - -/* Individual Percentile algorithms */ - -/* Closest observation to tc1 */ -double ptile_round(const struct weighted_value **wv, - const struct ptile_params *par); - - -/* Weighted average at y_tc2 */ -double ptile_haverage(const struct weighted_value **wv, - const struct ptile_params *par); - - -/* Weighted average at y_tc1 */ -double ptile_waverage(const struct weighted_value **wv, - const struct ptile_params *par); - - -/* Empirical distribution function */ -double ptile_empirical(const struct weighted_value **wv, - const struct ptile_params *par); - - -/* Empirical distribution function with averaging*/ -double ptile_aempirical(const struct weighted_value **wv, - const struct ptile_params *par); - - - - -/* Closest observation to tc1 */ double -ptile_round(const struct weighted_value **wv, - const struct ptile_params *par) +percentile_calculate (const struct percentile *ptl, enum pc_alg alg) { - double x; - double a=0; + struct percentile *mutable = (struct percentile *) ptl; + const struct order_stats *os = &ptl->parent; - if ( par->k1 >= 0 ) - a = wv[par->k1]->v.f; + assert (os->cc == ptl->w); - if ( wv[par->k1 + 1]->w >= 1 ) - { - if ( par->g1_star < 0.5 ) - x = a; - else - x = wv[par->k1 + 1]->v.f; - } - else - { - if ( par->g1 < 0.5 ) - x = a; - else - x = wv[par->k1 + 1]->v.f; - - } - - return x; -} + if ( ptl->g1 == SYSMIS) + mutable->g1 = (os->k[0].tc - os->k[0].cc) / os->k[0].c_p1; -/* Weighted average at y_tc2 */ -double -ptile_haverage(const struct weighted_value **wv, - const struct ptile_params *par) -{ - - double a=0; + if ( ptl->g1_star == SYSMIS) + mutable->g1_star = os->k[0].tc - os->k[0].cc; - if ( par->g2_star >= 1.0 ) - return wv[par->k2 + 1]->v.f ; - - /* Special case for k2 + 1 >= n_data - (actually it's not a special case, but just avoids indexing errors ) - */ - if ( par->g2_star == 0 ) + if ( ptl->g2 == SYSMIS) { - assert(par->g2 == 0 ); - return wv[par->k2]->v.f; - } - - /* Ditto for k2 < 0 */ - if ( par->k2 >= 0 ) - { - a = wv[par->k2]->v.f; + if ( os->k[1].c == 0 ) + mutable->g2 = os->k[1].tc / os->k[1].c_p1; + else if ( os->k[1].c_p1 == 0 ) + mutable->g2 = 0; + else + mutable->g2 = (os->k[1].tc - os->k[1].cc) / os->k[1].c_p1; } - if ( wv[par->k2 + 1]->w >= 1.0 ) - return ( (1 - par->g2_star) * a + - par->g2_star * wv[par->k2 + 1]->v.f); - else - return ( (1 - par->g2) * a + - par->g2 * wv[par->k2 + 1]->v.f); - -} - - - -/* Weighted average at y_tc1 */ -double -ptile_waverage(const struct weighted_value **wv, - const struct ptile_params *par) -{ - double a=0; - - if ( par->g1_star >= 1.0 ) - return wv[par->k1 + 1]->v.f ; - - if ( par->k1 >= 0 ) + if ( ptl->g2_star == SYSMIS) { - a = wv[par->k1]->v.f; + if ( os->k[1].c == 0 ) + mutable->g2_star = os->k[1].tc; + else if ( os->k[1].c_p1 == 0 ) + mutable->g2_star = 0; + else + mutable->g2_star = os->k[1].tc - os->k[1].cc; } - if ( wv[par->k1 + 1]->w >= 1.0 ) - return ( (1 - par->g1_star) * a + - par->g1_star * wv[par->k1 + 1]->v.f); - else - return ( (1 - par->g1) * a + - par->g1 * wv[par->k1 + 1]->v.f); -} - - -/* Empirical distribution function */ -double -ptile_empirical(const struct weighted_value **wv, - const struct ptile_params *par) -{ - if ( par->g1_star > 0 ) - return wv[par->k1 + 1]->v.f; - else - return wv[par->k1]->v.f; -} - - - -/* Empirical distribution function with averageing */ -double -ptile_aempirical(const struct weighted_value **wv, - const struct ptile_params *par) -{ - if ( par->g1_star > 0 ) - return wv[par->k1 + 1]->v.f; - else - return (wv[par->k1]->v.f + wv[par->k1 + 1]->v.f ) / 2.0 ; -} - - - -/* Compute the percentile p */ -double ptile(double p, - const struct weighted_value **wv, - int n_data, - double w, - enum pc_alg algorithm); - - - -double -ptile(double p, - const struct weighted_value **wv, - int n_data, - double w, - enum pc_alg algorithm) -{ - int i; - double tc1, tc2; - double result; - - struct ptile_params pp; - - assert( p <= 1.0); - - tc1 = w * p ; - tc2 = (w + 1) * p ; - - pp.k1 = -1; - pp.k2 = -1; - - for ( i = 0 ; i < n_data ; ++i ) + switch (alg) { - if ( wv[i]->cc <= tc1 ) - pp.k1 = i; - - if ( wv[i]->cc <= tc2 ) - pp.k2 = i; + case PC_WAVERAGE: + if ( ptl->g1_star >= 1.0) + return os->k[0].y_p1; + else + { + double a = ( os->k[0].y == SYSMIS ) ? 0 : os->k[0].y; - } + if (os->k[0].c_p1 >= 1.0) + return (1 - ptl->g1_star) * a + ptl->g1_star * os->k[0].y_p1; + else + return (1 - ptl->g1) * a + ptl->g1 * os->k[0].y_p1; + } + break; + case PC_ROUND: + { + double a = ( os->k[0].y == SYSMIS ) ? 0 : os->k[0].y; - if ( pp.k1 >= 0 ) - { - pp.g1 = ( tc1 - wv[pp.k1]->cc ) / wv[pp.k1 + 1]->w; - pp.g1_star = tc1 - wv[pp.k1]->cc ; - } - else - { - pp.g1 = tc1 / wv[pp.k1 + 1]->w; - pp.g1_star = tc1 ; - } + if (os->k[0].c_p1 >= 1.0) + return (ptl->g1_star < 0.5) ? a : os->k[0].y_p1; + else + return (ptl->g1 < 0.5) ? a : os->k[0].y_p1; + } + break; + case PC_EMPIRICAL: + if ( ptl->g1_star == 0 ) + return os->k[0].y; + else + return os->k[0].y_p1; + break; - if ( pp.k2 + 1 >= n_data ) - { - pp.g2 = 0 ; - pp.g2_star = 0; - } - else - { - if ( pp.k2 >= 0 ) + case PC_HAVERAGE: + if ( ptl->g2_star >= 1.0) { - pp.g2 = ( tc2 - wv[pp.k2]->cc ) / wv[pp.k2 + 1]->w; - pp.g2_star = tc2 - wv[pp.k2]->cc ; + return os->k[1].y_p1; } else { - pp.g2 = tc2 / wv[pp.k2 + 1]->w; - pp.g2_star = tc2 ; + double a = ( os->k[1].y == SYSMIS ) ? 0 : os->k[1].y; + + if ( os->k[1].c_p1 >= 1.0) + { + if ( ptl->g2_star == 0) + return os->k[1].y; + + return (1 - ptl->g2_star) * a + ptl->g2_star * os->k[1].y_p1; + } + else + { + return (1 - ptl->g2) * a + ptl->g2 * os->k[1].y_p1; + } } - } - switch ( algorithm ) - { - case PC_HAVERAGE: - result = ptile_haverage(wv, &pp); - break; - case PC_WAVERAGE: - result = ptile_waverage(wv, &pp); - break; - case PC_ROUND: - result = ptile_round(wv, &pp); - break; - case PC_EMPIRICAL: - result = ptile_empirical(wv, &pp); break; + case PC_AEMPIRICAL: - result = ptile_aempirical(wv, &pp); + if ( ptl->g1_star == 0 ) + return (os->k[0].y + os->k[0].y_p1)/ 2.0; + else + return os->k[0].y_p1; break; + default: - result = SYSMIS; + NOT_REACHED (); + break; } - return result; + NOT_REACHED (); + + return SYSMIS; } -/* - Calculate the values of the percentiles in pc_hash. - wv is a sorted array of weighted values of the data set. -*/ -void -ptiles(struct hsh_table *pc_hash, - const struct weighted_value **wv, - int n_data, - double w, - enum pc_alg algorithm) +static void +destroy (struct statistic *stat) { - struct hsh_iterator hi; - struct percentile *p; - - if ( !pc_hash ) - return ; - for ( p = hsh_first(pc_hash, &hi); - p != 0 ; - p = hsh_next(pc_hash, &hi)) - { - p->v = ptile(p->p/100.0 , wv, n_data, w, algorithm); - } - + struct order_stats *os = (struct order_stats *) stat; + free (os->k); + free (os); } -/* Calculate Tukey's Hinges */ -void -tukey_hinges(const struct weighted_value **wv, - int n_data, - double w, - double hinge[3] - ) +struct order_stats * +percentile_create (double p, double W) { - int i; - double c_star = DBL_MAX; - double d; - double l[3]; - int h[3]; - double a, a_star; - - for ( i = 0 ; i < n_data ; ++i ) - { - c_star = MIN(c_star, wv[i]->w); - } - - if ( c_star > 1 ) c_star = 1; - - d = floor((w/c_star + 3 ) / 2.0)/ 2.0; - - l[0] = d*c_star; - l[1] = w/2.0 + c_star/2.0; - l[2] = w + c_star - d*c_star; - - h[0]=-1; - h[1]=-1; - h[2]=-1; - - for ( i = 0 ; i < n_data ; ++i ) - { - if ( l[0] >= wv[i]->cc ) h[0] = i ; - if ( l[1] >= wv[i]->cc ) h[1] = i ; - if ( l[2] >= wv[i]->cc ) h[2] = i ; - } - - for ( i = 0 ; i < 3 ; i++ ) - { - - if ( h[i] >= 0 ) - a_star = l[i] - wv[h[i]]->cc ; - else - a_star = l[i]; - - if ( h[i] + 1 >= n_data ) - { - assert( a_star < 1 ) ; - hinge[i] = (1 - a_star) * wv[h[i]]->v.f; - continue; - } - else - { - a = a_star / ( wv[h[i] + 1]->cc ) ; - } - - if ( a_star >= 1.0 ) - { - hinge[i] = wv[h[i] + 1]->v.f ; - continue; - } - - if ( wv[h[i] + 1]->w >= 1) - { - hinge[i] = ( 1 - a_star) * wv[h[i]]->v.f - + a_star * wv[h[i] + 1]->v.f; - - continue; - } + struct percentile *ptl = xzalloc (sizeof (*ptl)); + struct order_stats *os = (struct order_stats *) ptl; + struct statistic *stat = (struct statistic *) ptl; - hinge[i] = (1 - a) * wv[h[i]]->v.f + a * wv[h[i] + 1]->v.f; + assert (p >= 0); + assert (p <= 1.0); - } - - assert(hinge[0] <= hinge[1]); - assert(hinge[1] <= hinge[2]); + ptl->ptile = p; + ptl->w = W; -} + os->n_k = 2; + os->k = xcalloc (sizeof (*os->k), 2); + os->k[0].tc = W * p; + os->k[1].tc = (W + 1.0) * p; + ptl->g1 = ptl->g1_star = SYSMIS; + ptl->g2 = ptl->g2_star = SYSMIS; -int -ptile_compare(const struct percentile *p1, - const struct percentile *p2, - void *aux UNUSED) -{ - - int cmp; + os->k[1].y_p1 = os->k[1].y = SYSMIS; + os->k[0].y_p1 = os->k[0].y = SYSMIS; - if ( p1->p == p2->p) - cmp = 0 ; - else if (p1->p < p2->p) - cmp = -1 ; - else - cmp = +1; + stat->destroy = destroy; - return cmp; + return os; } -unsigned -ptile_hash(const struct percentile *p, void *aux UNUSED) -{ - return hsh_hash_double(p->p); -} - - diff --git a/src/math/percentiles.h b/src/math/percentiles.h index 9e0eb47a..0dd09820 100644 --- a/src/math/percentiles.h +++ b/src/math/percentiles.h @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2004 Free Software Foundation, Inc. + Copyright (C) 2004, 2008 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -14,13 +14,12 @@ You should have received a copy of the GNU General Public License along with this program. If not, see . */ -#ifndef PERCENTILES_H -#define PERCENTILES_H +#ifndef __PERCENTILES_H__ +#define __PERCENTILES_H__ +#include -#include - -struct weighted_value ; +#include "order-stats.h" /* The algorithm used to calculate percentiles */ enum pc_alg { @@ -32,48 +31,33 @@ enum pc_alg { PC_AEMPIRICAL } ; - - extern const char *const ptile_alg_desc[]; +struct percentile +{ + struct order_stats parent; + double ptile; + double w; -struct percentile { + /* Mutable */ + double g1; + double g1_star; - /* The break point of the percentile */ - double p; - - /* The value of the percentile */ - double v; + double g2; + double g2_star; }; - -/* Calculate the percentiles of the break points in pc_bp, - placing the values in pc_val. - wv is a sorted array of weighted values of the data set. +/* Create the Pth percentile. + W is the total sum of weights in the data set */ -void ptiles(struct hsh_table *pc_hash, - const struct weighted_value **wv, - int n_data, - double w, - enum pc_alg algorithm); - - -/* Calculate Tukey's Hinges and the Whiskers for the box plot*/ -void tukey_hinges(const struct weighted_value **wv, - int n_data, - double w, - double hinges[3]); - - +struct order_stats *percentile_create (double p, double W); -/* Hash utility functions */ -int ptile_compare(const struct percentile *p1, - const struct percentile *p2, - void *aux); +/* Return the value of the percentile */ +double percentile_calculate (const struct percentile *ptl, enum pc_alg alg); -unsigned ptile_hash(const struct percentile *p, void *aux); +void percentile_dump (const struct percentile *ptl); #endif diff --git a/src/output/charts/box-whisker.c b/src/output/charts/box-whisker.c index d4a5ccab..4878221d 100644 --- a/src/output/charts/box-whisker.c +++ b/src/output/charts/box-whisker.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2004 Free Software Foundation, Inc. + Copyright (C) 2004, 2008 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -26,193 +26,133 @@ #include #include -#include - - - +#include /* Draw a box-and-whiskers plot */ -/* Draw an outlier on the plot CH +/* Draw an OUTLIER on the plot CH * at CENTRELINE - * The outlier is in (*wvp)[idx] - * If EXTREME is non zero, then consider it to be an extreme - * value */ -void -draw_outlier(struct chart *ch, double centreline, - struct weighted_value **wvp, - int idx, - short extreme); - - -void -draw_outlier(struct chart *ch, double centreline, - struct weighted_value **wvp, - int idx, - short extreme - ) +static void +draw_case (struct chart *ch, double centreline, + const struct outlier *outlier) { - char label[10]; #define MARKER_CIRCLE 4 #define MARKER_STAR 3 pl_fmarker_r(ch->lp, centreline, - ch->data_bottom + - (wvp[idx]->v.f - ch->y_min ) * ch->ordinate_scale, - extreme?MARKER_STAR:MARKER_CIRCLE, + ch->data_bottom + (outlier->value - ch->y_min) * ch->ordinate_scale, + outlier->extreme ? MARKER_STAR : MARKER_CIRCLE, 20); pl_moverel_r(ch->lp, 10,0); - snprintf(label, 10, "%d", wvp[idx]->case_nos->num); - - pl_alabel_r(ch->lp, 'l', 'c', label); - + pl_alabel_r(ch->lp, 'l', 'c', ds_cstr (&outlier->label)); } void -boxplot_draw_boxplot(struct chart *ch, - double box_centre, - double box_width, - struct metrics *m, - const char *name) +boxplot_draw_boxplot (struct chart *ch, + double box_centre, + double box_width, + const struct box_whisker *bw, + const char *name) { double whisker[2]; - int i; - - const double *hinge = m->hinge; - struct weighted_value **wvp = m->wvp; - const int n_data = m->n_data; - - const double step = (hinge[2] - hinge[0]) * 1.5; + double hinge[3]; + const struct ll_list *outliers; const double box_left = box_centre - box_width / 2.0; const double box_right = box_centre + box_width / 2.0; + double box_bottom ; + double box_top ; + double bottom_whisker ; + double top_whisker ; - const double box_bottom = - ch->data_bottom + ( hinge[0] - ch->y_min ) * ch->ordinate_scale; - - - const double box_top = - ch->data_bottom + ( hinge[2] - ch->y_min ) * ch->ordinate_scale; - - assert(m); + box_whisker_whiskers (bw, whisker); + box_whisker_hinges (bw, hinge); - /* Can't really draw a boxplot if there's no data */ - if ( n_data == 0 ) - return ; + box_bottom = ch->data_bottom + (hinge[0] - ch->y_min ) * ch->ordinate_scale; - whisker[1] = hinge[2]; - whisker[0] = wvp[0]->v.f; + box_top = ch->data_bottom + (hinge[2] - ch->y_min ) * ch->ordinate_scale; - for ( i = 0 ; i < n_data ; ++i ) - { - if ( hinge[2] + step > wvp[i]->v.f) - whisker[1] = wvp[i]->v.f; - - if ( hinge[0] - step > wvp[i]->v.f) - whisker[0] = wvp[i]->v.f; - - } - - { - const double bottom_whisker = - ch->data_bottom + ( whisker[0] - ch->y_min ) * ch->ordinate_scale; - - const double top_whisker = - ch->data_bottom + ( whisker[1] - ch->y_min ) * ch->ordinate_scale; + bottom_whisker = ch->data_bottom + (whisker[0] - ch->y_min) * + ch->ordinate_scale; + top_whisker = ch->data_bottom + (whisker[1] - ch->y_min) * ch->ordinate_scale; pl_savestate_r(ch->lp); - /* Draw the box */ - pl_savestate_r(ch->lp); - pl_fillcolorname_r(ch->lp,ch->fill_colour); - pl_filltype_r(ch->lp,1); - pl_fbox_r(ch->lp, + pl_savestate_r (ch->lp); + pl_fillcolorname_r (ch->lp, ch->fill_colour); + pl_filltype_r (ch->lp,1); + pl_fbox_r (ch->lp, box_left, box_bottom, box_right, box_top); - pl_restorestate_r(ch->lp); - - + pl_restorestate_r (ch->lp); /* Draw the median */ - pl_savestate_r(ch->lp); - pl_linewidth_r(ch->lp,5); - pl_fline_r(ch->lp, + pl_savestate_r (ch->lp); + pl_linewidth_r (ch->lp, 5); + pl_fline_r (ch->lp, box_left, - ch->data_bottom + ( hinge[1] - ch->y_min ) * ch->ordinate_scale, + ch->data_bottom + (hinge[1] - ch->y_min) * ch->ordinate_scale, box_right, - ch->data_bottom + ( hinge[1] - ch->y_min ) * ch->ordinate_scale); - pl_restorestate_r(ch->lp); - + ch->data_bottom + (hinge[1] - ch->y_min) * ch->ordinate_scale); + pl_restorestate_r (ch->lp); /* Draw the bottom whisker */ - pl_fline_r(ch->lp, + pl_fline_r (ch->lp, box_left, bottom_whisker, box_right, bottom_whisker); /* Draw top whisker */ - pl_fline_r(ch->lp, + pl_fline_r (ch->lp, box_left, top_whisker, box_right, top_whisker); - /* Draw centre line. (bottom half) */ - pl_fline_r(ch->lp, + pl_fline_r (ch->lp, box_centre, bottom_whisker, box_centre, box_bottom); /* (top half) */ - pl_fline_r(ch->lp, + pl_fline_r (ch->lp, box_centre, top_whisker, box_centre, box_top); - } - /* Draw outliers */ - for ( i = 0 ; i < n_data ; ++i ) + outliers = box_whisker_outliers (bw); + for (struct ll *ll = ll_head (outliers); + ll != ll_null (outliers); ll = ll_next (ll)) { - if ( wvp[i]->v.f >= hinge[2] + step ) - draw_outlier(ch, box_centre, wvp, i, - ( wvp[i]->v.f > hinge[2] + 2 * step ) - ); - - if ( wvp[i]->v.f <= hinge[0] - step ) - draw_outlier(ch, box_centre, wvp, i, - ( wvp[i]->v.f < hinge[0] - 2 * step ) - ); + const struct outlier *outlier = ll_data (ll, struct outlier, ll); + draw_case (ch, box_centre, outlier); } - /* Draw tick mark on x axis */ draw_tick(ch, TICK_ABSCISSA, box_centre - ch->data_left, name); pl_restorestate_r(ch->lp); - } - - void -boxplot_draw_yscale(struct chart *ch , double y_max, double y_min) +boxplot_draw_yscale (struct chart *ch, double y_max, double y_min) { double y_tick; double d; @@ -223,7 +163,7 @@ boxplot_draw_yscale(struct chart *ch , double y_max, double y_min) ch->y_max = y_max; ch->y_min = y_min; - y_tick = chart_rounded_tick(fabs(ch->y_max - ch->y_min) / 5.0); + y_tick = chart_rounded_tick (fabs(ch->y_max - ch->y_min) / 5.0); ch->y_min = (ceil( ch->y_min / y_tick ) - 1.0 ) * y_tick; @@ -232,7 +172,6 @@ boxplot_draw_yscale(struct chart *ch , double y_max, double y_min) ch->ordinate_scale = fabs(ch->data_top - ch->data_bottom) / fabs(ch->y_max - ch->y_min) ; - /* Move to data bottom-left */ pl_move_r(ch->lp, ch->data_left, ch->data_bottom); @@ -241,5 +180,4 @@ boxplot_draw_yscale(struct chart *ch , double y_max, double y_min) { draw_tick (ch, TICK_ORDINATE, (d - ch->y_min ) * ch->ordinate_scale, "%g", d); } - } diff --git a/src/output/charts/box-whisker.h b/src/output/charts/box-whisker.h index 656d8d49..7b2c4b8f 100644 --- a/src/output/charts/box-whisker.h +++ b/src/output/charts/box-whisker.h @@ -18,28 +18,15 @@ #define BOX_WHISKER_H struct chart ; -struct weighted_value; -struct metrics; +struct box_whisker; -/* Draw an outlier on the plot CH - * at CENTRELINE - * The outlier is in (*wvp)[idx] - * If EXTREME is non zero, then consider it to be an extreme - * value - */ -void draw_outlier(struct chart *ch, double centreline, - struct weighted_value **wvp, - int idx, - short extreme); +void boxplot_draw_boxplot (struct chart *ch, + double box_centre, + double box_width, + const struct box_whisker *w, + const char *name); -void boxplot_draw_boxplot(struct chart *ch, - double box_centre, - double box_width, - struct metrics *m, - const char *name); - - -void boxplot_draw_yscale(struct chart *ch , double y_max, double y_min); +void boxplot_draw_yscale (struct chart *ch , double y_max, double y_min); #endif diff --git a/src/output/charts/plot-hist.c b/src/output/charts/plot-hist.c index 0f183208..e7dcd438 100644 --- a/src/output/charts/plot-hist.c +++ b/src/output/charts/plot-hist.c @@ -1,10 +1,10 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2004 Free Software Foundation, Inc. + Copyright (C) 2004 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. + (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -30,78 +30,79 @@ #include #include #include +#include +#include #include "gettext.h" #define _(msgid) gettext (msgid) /* Write the legend of the chart */ -void -histogram_write_legend(struct chart *ch, const struct normal_curve *norm) +static void +histogram_write_legend (struct chart *ch, double n, double mean, double stddev) { char buf[100]; - if ( !ch ) + + if (!ch) return ; - pl_savestate_r(ch->lp); + pl_savestate_r (ch->lp); - sprintf(buf,"N = %.2f",norm->N); - pl_move_r(ch->lp, ch->legend_left, ch->data_bottom); - pl_alabel_r(ch->lp,0,'b',buf); + sprintf (buf, "N = %.2f", n); + pl_move_r (ch->lp, ch->legend_left, ch->data_bottom); + pl_alabel_r (ch->lp, 0, 'b', buf); - sprintf(buf,"Mean = %.1f",norm->mean); - pl_fmove_r(ch->lp,ch->legend_left,ch->data_bottom + ch->font_size * 1.5); - pl_alabel_r(ch->lp,0,'b',buf); + sprintf (buf, "Mean = %.1f", mean); + pl_fmove_r (ch->lp,ch->legend_left,ch->data_bottom + ch->font_size * 1.5); + pl_alabel_r (ch->lp, 0, 'b', buf); - sprintf(buf,"Std. Dev = %.2f",norm->stddev); - pl_fmove_r(ch->lp,ch->legend_left,ch->data_bottom + ch->font_size * 1.5 * 2); - pl_alabel_r(ch->lp,0,'b',buf); + sprintf (buf, "Std. Dev = %.2f", stddev); + pl_fmove_r (ch->lp, ch->legend_left, ch->data_bottom + ch->font_size * 1.5 * 2); + pl_alabel_r (ch->lp, 0, 'b', buf); - pl_restorestate_r(ch->lp); + pl_restorestate_r (ch->lp); } -static void hist_draw_bar(struct chart *ch, const gsl_histogram *hist, int bar); +static void hist_draw_bar (struct chart *ch, const struct histogram *hist, int bar); static void -hist_draw_bar(struct chart *ch, const gsl_histogram *hist, int bar) +hist_draw_bar (struct chart *ch, const struct histogram *hist, int bar) { - if ( !ch ) + if (!ch) return ; - { double upper; double lower; double height; - const size_t bins = gsl_histogram_bins(hist); + const size_t bins = gsl_histogram_bins (hist->gsl_hist); const double x_pos = (ch->data_right - ch->data_left) * bar / (double) bins ; const double width = (ch->data_right - ch->data_left) / (double) bins ; + assert ( 0 == gsl_histogram_get_range (hist->gsl_hist, bar, &lower, &upper)); - assert ( 0 == gsl_histogram_get_range(hist, bar, &lower, &upper)); - - assert( upper >= lower); + assert ( upper >= lower); - height = gsl_histogram_get(hist, bar) * - (ch->data_top - ch->data_bottom) / gsl_histogram_max_val(hist); + height = gsl_histogram_get (hist->gsl_hist, bar) * + (ch->data_top - ch->data_bottom) / gsl_histogram_max_val (hist->gsl_hist); - pl_savestate_r(ch->lp); - pl_move_r(ch->lp,ch->data_left, ch->data_bottom); - pl_fillcolorname_r(ch->lp, ch->fill_colour); - pl_filltype_r(ch->lp,1); + pl_savestate_r (ch->lp); + pl_move_r (ch->lp,ch->data_left, ch->data_bottom); + pl_fillcolorname_r (ch->lp, ch->fill_colour); + pl_filltype_r (ch->lp,1); - pl_fboxrel_r(ch->lp, + pl_fboxrel_r (ch->lp, x_pos, 0, x_pos + width, height); - pl_restorestate_r(ch->lp); + pl_restorestate_r (ch->lp); { char buf[5]; - snprintf(buf,5,"%g",(upper + lower) / 2.0); - draw_tick(ch, TICK_ABSCISSA, + snprintf (buf,5,"%g", (upper + lower) / 2.0); + draw_tick (ch, TICK_ABSCISSA, x_pos + width / 2.0, buf); } } @@ -109,73 +110,85 @@ hist_draw_bar(struct chart *ch, const gsl_histogram *hist, int bar) +void +histogram_plot (const struct histogram *hist, + const char *label, + const struct moments1 *m) +{ + double mean, var, n; + + moments1_calculate (m, &n, &mean, &var, NULL, NULL); + + histogram_plot_n (hist, label, n, mean, sqrt(var), m); +} void -histogram_plot(const gsl_histogram *hist, - const char *factorname, - const struct normal_curve *norm, short show_normal) +histogram_plot_n (const struct histogram *hist, + const char *label, + double n, double mean, double stddev, + bool show_normal) { int i; int bins; - struct chart *ch; + struct chart *ch = chart_create (); - ch = chart_create(); - chart_write_title(ch, _("HISTOGRAM")); + chart_write_title (ch, _("HISTOGRAM")); - chart_write_ylabel(ch, _("Frequency")); - chart_write_xlabel(ch, factorname); + chart_write_ylabel (ch, _("Frequency")); + chart_write_xlabel (ch, label); if ( ! hist ) /* If this happens, probably all values are SYSMIS */ { - chart_submit(ch); - return ; + chart_submit (ch); + return; } else { - bins = gsl_histogram_bins(hist); + bins = gsl_histogram_bins (hist->gsl_hist); } - chart_write_yscale(ch, 0, gsl_histogram_max_val(hist), 5); + chart_write_yscale (ch, 0, gsl_histogram_max_val (hist->gsl_hist), 5); for ( i = 0 ; i < bins ; ++i ) - hist_draw_bar(ch, hist, i); + hist_draw_bar (ch, hist, i); - histogram_write_legend(ch, norm); + histogram_write_legend (ch, n, mean, stddev); - if ( show_normal ) - { - /* Draw the normal curve */ - - double d ; - double x_min, x_max, not_used ; - double abscissa_scale ; - double ordinate_scale ; - double range ; - - gsl_histogram_get_range(hist, 0, &x_min, ¬_used); - range = not_used - x_min; - gsl_histogram_get_range(hist, bins - 1, ¬_used, &x_max); - - abscissa_scale = (ch->data_right - ch->data_left) / (x_max - x_min); - ordinate_scale = (ch->data_top - ch->data_bottom) / - gsl_histogram_max_val(hist) ; - - pl_move_r(ch->lp, ch->data_left, ch->data_bottom); - for( d = ch->data_left; - d <= ch->data_right ; - d += (ch->data_right - ch->data_left) / 100.0) - { - const double x = (d - ch->data_left) / abscissa_scale + x_min ; - const double y = norm->N * range * - gsl_ran_gaussian_pdf(x - norm->mean, norm->stddev); - - pl_fcont_r(ch->lp, d, ch->data_bottom + y * ordinate_scale); - - } - pl_endpath_r(ch->lp); + if (show_normal) + { + /* Draw the normal curve */ + + double d ; + double x_min, x_max, not_used ; + double abscissa_scale ; + double ordinate_scale ; + double range ; + + gsl_histogram_get_range (hist->gsl_hist, 0, &x_min, ¬_used); + range = not_used - x_min; + gsl_histogram_get_range (hist->gsl_hist, bins - 1, ¬_used, &x_max); + + abscissa_scale = (ch->data_right - ch->data_left) / (x_max - x_min); + ordinate_scale = (ch->data_top - ch->data_bottom) / + gsl_histogram_max_val (hist->gsl_hist) ; + + pl_move_r (ch->lp, ch->data_left, ch->data_bottom); + for ( d = ch->data_left; + d <= ch->data_right ; + d += (ch->data_right - ch->data_left) / 100.0) + { + const double x = (d - ch->data_left) / abscissa_scale + x_min ; + const double y = n * range * + gsl_ran_gaussian_pdf (x - mean, stddev); + + pl_fcont_r (ch->lp, d, ch->data_bottom + y * ordinate_scale); + + } + pl_endpath_r (ch->lp); + } - } - chart_submit(ch); + chart_submit (ch); } + diff --git a/src/output/charts/plot-hist.h b/src/output/charts/plot-hist.h index c808cb4e..606206d5 100644 --- a/src/output/charts/plot-hist.h +++ b/src/output/charts/plot-hist.h @@ -17,23 +17,23 @@ #ifndef PLOT_HIST_H #define PLOT_HIST_H -#include +#include - -struct normal_curve -{ - double N ; - double mean ; - double stddev ; -}; struct chart; +struct moments1; +struct histogram; + +/* Plot M onto histogram HIST and label it with LABEL */ +void histogram_plot (const struct histogram *hist, + const char *label, const struct moments1 *m); -/* Write the legend of the chart */ -void histogram_write_legend(struct chart *ch, const struct normal_curve *norm); -void histogram_plot(const gsl_histogram *hist, - const char *factorname, - const struct normal_curve *norm, short show_normal); +/* A wrapper aroud histogram_plot. + Don't use this function. It's legacy only */ +void histogram_plot_n (const struct histogram *hist, + const char *label, + double n, double mean, double var, + bool show_normal); #endif diff --git a/tests/automake.mk b/tests/automake.mk index 6fb11d02..909dd7d4 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -107,6 +107,7 @@ dist_TESTS = \ tests/bugs/double-frequency.sh \ tests/bugs/empty-do-repeat.sh \ tests/bugs/get.sh \ + tests/bugs/examine-crash.sh \ tests/bugs/examine-1sample.sh \ tests/bugs/examine-missing.sh \ tests/bugs/examine-missing2.sh \ diff --git a/tests/bugs/examine-missing2.sh b/tests/bugs/examine-missing2.sh index 965b600e..97b19262 100755 --- a/tests/bugs/examine-missing2.sh +++ b/tests/bugs/examine-missing2.sh @@ -92,26 +92,26 @@ diff -b $TEMPDIR/pspp.list - <