You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-02111-1307, USA. */
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA. */
#include <config.h>
#include <gsl/gsl_cdf.h>
#include "hash.h"
#include "casefile.h"
#include "factor_stats.h"
+#include "moments.h"
+#include "percentiles.h"
+
/* (headers) */
#include "chart.h"
/* (specification)
"EXAMINE" (xmn_):
- *variables=custom;
+ *^variables=custom;
+total=custom;
+nototal=custom;
+missing=miss:pairwise/!listwise,
rep:report/!noreport,
incl:include/!exclude;
+compare=cmp:variables/!groups;
+ +percentiles=custom;
+ +id=var;
+plot[plt_]=stemleaf,boxplot,npplot,:spreadlevel(*d:n),histogram,all,none;
+cinterval=double;
+statistics[st_]=descriptives,:extreme(*d:n),all,none.
/* Hash table of factor stats indexed by 2 values */
struct hsh_table *fstats;
- /* The hash table after it's been crunched */
+ /* The hash table after it has been crunched */
struct factor_statistics **fs;
struct factor *next;
static struct metrics *totals=0;
-void
-print_factors(void)
-{
- struct factor *f = factors;
-
- while (f)
- {
- struct factor_statistics **fs = f->fs;
-
- printf("Factor: %s BY %s\n",
- var_to_string(f->indep_var[0]),
- var_to_string(f->indep_var[1]) );
-
-
- printf("Contains %d entries\n", hsh_count(f->fstats));
-
-
- while (*fs)
- {
- printf("Factor %g; %g\n", (*fs)->id[0].f, (*fs)->id[1].f);
-
- /*
- printf("Factor %s; %s\n",
- value_to_string(&(*fs)->id[0], f->indep_var[0]),
- value_to_string(&(*fs)->id[1], f->indep_var[1]));
- */
-
-
- printf("Sum is %g; ",(*fs)->m[0].sum);
- printf("N is %g; ",(*fs)->m[0].n);
- printf("Mean is %g\n",(*fs)->m[0].mean);
-
- fs++ ;
- }
-
- f = f->next;
- }
-
-
-}
-
-
/* Parse the clause specifying the factors */
static int examine_parse_independent_vars(struct cmd_examine *cmd);
int n_dep_var,
struct factor *factor);
+static void show_percentiles(struct variable **dependent_var,
+ int n_dep_var,
+ struct factor *factor);
+
+
+
void np_plot(const struct metrics *m, const char *factorname);
+void box_plot_group(const struct factor *fctr,
+ const struct variable **vars, int n_vars,
+ const struct variable *id
+ ) ;
+
+
+void box_plot_variables(const struct factor *fctr,
+ const struct variable **vars, int n_vars,
+ const struct variable *id
+ );
+
/* Per Split function */
double weight, int case_missing);
+/* Represent a factor as a string, so it can be
+ printed in a human readable fashion */
+const char * factor_to_string(const struct factor *fctr,
+ struct factor_statistics *fs,
+ const struct variable *var);
+
+
+/* Represent a factor as a string, so it can be
+ printed in a human readable fashion,
+ but sacrificing some readablility for the sake of brevity */
+const char *factor_to_string_concise(const struct factor *fctr,
+ struct factor_statistics *fs);
+
+
+
+
/* Function to use for testing for missing values */
static is_missing_func value_is_missing;
+/* PERCENTILES */
+
+static subc_list_double percentile_list;
+
+static enum pc_alg percentile_algorithm;
+
+static short sbc_percentile;
+
+
int
cmd_examine(void)
{
+ subc_list_double_create(&percentile_list);
+ percentile_algorithm = PC_HAVERAGE;
+
if ( !parse_examine(&cmd) )
return CMD_FAILURE;
if ( ! cmd.sbc_cinterval)
cmd.n_cinterval[0] = 95.0;
+ /* If descriptives have been requested, make sure the
+ quartiles are calculated */
+ if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES] )
+ {
+ subc_list_double_push(&percentile_list, 25);
+ subc_list_double_push(&percentile_list, 50);
+ subc_list_double_push(&percentile_list, 75);
+ }
+
multipass_procedure_with_splits (run_examine, &cmd);
+ if ( totals )
+ {
+ free( totals );
+ }
+
+ if ( dependent_vars )
+ free (dependent_vars);
+
+ {
+ struct factor *f = factors ;
+ while ( f )
+ {
+ struct factor *ff = f;
+
+ f = f->next;
+ free ( ff->fs );
+ hsh_destroy ( ff->fstats ) ;
+ free ( ff ) ;
+ }
+ }
+
+ subc_list_double_destroy(&percentile_list);
+
return CMD_SUCCESS;
};
show_descriptives(dependent_vars, n_dependent_vars, 0);
}
+ if ( sbc_percentile )
+ show_percentiles(dependent_vars, n_dependent_vars, 0);
if ( cmd.sbc_plot)
{
+ int v;
if ( cmd.a_plot[XMN_PLT_NPPLOT] )
{
- int v;
+ for ( v = 0 ; v < n_dependent_vars; ++v )
+ np_plot(&totals[v], var_to_string(dependent_vars[v]));
+ }
+
+ if ( cmd.a_plot[XMN_PLT_BOXPLOT] )
+ {
+ if ( cmd.cmp == XMN_GROUPS )
+ {
+ box_plot_group(0, dependent_vars, n_dependent_vars,
+ cmd.v_id);
+ }
+ else
+ box_plot_variables(0, dependent_vars, n_dependent_vars,
+ cmd.v_id);
+ }
+ if ( cmd.a_plot[XMN_PLT_HISTOGRAM] )
+ {
for ( v = 0 ; v < n_dependent_vars; ++v )
- np_plot(&totals[v], var_to_string(dependent_vars[v]));
+ {
+ struct normal_curve normal;
+
+ normal.N = totals[v].n;
+ normal.mean = totals[v].mean;
+ normal.stddev = totals[v].stddev;
+
+ histogram_plot(totals[v].histogram,
+ var_to_string(dependent_vars[v]),
+ &normal, 0);
+ }
}
- }
+ }
}
show_descriptives(dependent_vars, n_dependent_vars, fctr);
}
+ if ( sbc_percentile )
+ show_percentiles(dependent_vars, n_dependent_vars, fctr);
+
+
if ( cmd.sbc_plot)
{
- if ( cmd.a_plot[XMN_PLT_NPPLOT] )
+ int v;
+
+ struct factor_statistics **fs = fctr->fs ;
+
+ if ( cmd.a_plot[XMN_PLT_BOXPLOT] )
{
- int v;
- for ( v = 0 ; v < n_dependent_vars; ++ v)
+ if ( cmd.cmp == XMN_VARIABLES )
+ box_plot_variables(fctr, dependent_vars, n_dependent_vars,
+ cmd.v_id);
+ else
+ box_plot_group(fctr, dependent_vars, n_dependent_vars,
+ cmd.v_id);
+ }
+
+ for ( v = 0 ; v < n_dependent_vars; ++v )
+ {
+
+ for ( fs = fctr->fs ; *fs ; ++fs )
{
-
- struct factor_statistics **fs = fctr->fs ;
- for ( fs = fctr->fs ; *fs ; ++fs )
- {
- char buf1[100];
- char buf2[100];
- sprintf(buf1, "%s (",
- var_to_string(dependent_vars[v]));
-
- sprintf(buf2, "%s = %s",
- var_to_string(fctr->indep_var[0]),
- value_to_string(&(*fs)->id[0],fctr->indep_var[0]));
-
- strcat(buf1, buf2);
+ const char *s = factor_to_string(fctr, *fs, dependent_vars[v]);
-
- if ( fctr->indep_var[1] )
- {
- sprintf(buf2, "; %s = %s)",
- var_to_string(fctr->indep_var[1]),
- value_to_string(&(*fs)->id[1],
- fctr->indep_var[1]));
- strcat(buf1, buf2);
- }
- else
- {
- strcat(buf1, ")");
- }
-
- np_plot(&(*fs)->m[v],buf1);
+ if ( cmd.a_plot[XMN_PLT_NPPLOT] )
+ np_plot(&(*fs)->m[v], s);
+
+ if ( cmd.a_plot[XMN_PLT_HISTOGRAM] )
+ {
+ struct normal_curve normal;
+ normal.N = (*fs)->m[v].n;
+ normal.mean = (*fs)->m[v].mean;
+ normal.stddev = (*fs)->m[v].stddev;
+
+ histogram_plot((*fs)->m[v].histogram,
+ s, &normal, 0);
}
- }
+ } /* for ( fs .... */
+
+ } /* for ( v = 0 ..... */
- }
}
fctr = fctr->next;
}
+/* Create a hash table of percentiles and their values from the list of
+ percentiles */
+static struct hsh_table *
+list_to_ptile_hash(const subc_list_double *l)
+{
+ int i;
+
+ struct hsh_table *h ;
+
+ h = hsh_create(subc_list_double_count(l),
+ (hsh_compare_func *) ptile_compare,
+ (hsh_hash_func *) ptile_hash,
+ (hsh_free_func *) free,
+ 0);
+
+
+ for ( i = 0 ; i < subc_list_double_count(l) ; ++i )
+ {
+ struct percentile *p = xmalloc (sizeof (struct percentile));
+
+ p->p = subc_list_double_at(l,i);
+ p->v = SYSMIS;
+
+ hsh_insert(h, p);
+
+ }
+
+ return h;
+
+}
+
+/* Parse the PERCENTILES subcommand */
+static int
+xmn_custom_percentiles(struct cmd_examine *p UNUSED)
+{
+ sbc_percentile = 1;
+
+ lex_match('=');
+
+ lex_match('(');
+
+ while ( lex_is_number() )
+ {
+ subc_list_double_push(&percentile_list,lex_number());
+
+ lex_get();
+
+ lex_match(',') ;
+ }
+ lex_match(')');
+
+ lex_match('=');
+
+ if ( lex_match_id("HAVERAGE"))
+ percentile_algorithm = PC_HAVERAGE;
+
+ else if ( lex_match_id("WAVERAGE"))
+ percentile_algorithm = PC_WAVERAGE;
+
+ else if ( lex_match_id("ROUND"))
+ percentile_algorithm = PC_ROUND;
+
+ else if ( lex_match_id("EMPIRICAL"))
+ percentile_algorithm = PC_EMPIRICAL;
+
+ else if ( lex_match_id("AEMPIRICAL"))
+ percentile_algorithm = PC_AEMPIRICAL;
+
+ else if ( lex_match_id("NONE"))
+ percentile_algorithm = PC_NONE;
+
+
+ if ( 0 == subc_list_double_count(&percentile_list))
+ {
+ subc_list_double_push(&percentile_list, 5);
+ subc_list_double_push(&percentile_list, 10);
+ subc_list_double_push(&percentile_list, 25);
+ subc_list_double_push(&percentile_list, 50);
+ subc_list_double_push(&percentile_list, 75);
+ subc_list_double_push(&percentile_list, 90);
+ subc_list_double_push(&percentile_list, 95);
+ }
+
+ return 1;
+}
/* TOTAL and NOTOTAL are simple, mutually exclusive flags */
static int
-/* Parser for the variables sub command */
+/* Parser for the variables sub command
+ Returns 1 on success */
static int
xmn_custom_variables(struct cmd_examine *cmd )
{
-
lex_match('=');
if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
&& token != T_ALL)
- return 2;
+ {
+ return 2;
+ }
if (!parse_variables (default_dict, &dependent_vars, &n_dependent_vars,
PV_NO_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH) )
if ( lex_match(T_BY))
{
- return examine_parse_independent_vars(cmd);
+ int success ;
+ success = examine_parse_independent_vars(cmd);
+ if ( success != 1 ) {
+ free (dependent_vars);
+ free (totals) ;
+ }
+ return success;
}
return 1;
static int
examine_parse_independent_vars(struct cmd_examine *cmd)
{
-
+ int success;
struct factor *sf = xmalloc(sizeof(struct factor));
if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
&& token != T_ALL)
- return 2;
+ {
+ free ( sf ) ;
+ return 2;
+ }
sf->indep_var[0] = parse_variable();
if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
&& token != T_ALL)
- return 2;
+ {
+ free ( sf ) ;
+ return 2;
+ }
sf->indep_var[1] = parse_variable();
if ( token == '.' || token == '/' )
return 1;
- return examine_parse_independent_vars(cmd);
+ success = examine_parse_independent_vars(cmd);
+
+ if ( success != 1 )
+ free ( sf ) ;
+
+ return success;
}
+void populate_percentiles(struct tab_table *tbl, int col, int row,
+ const struct metrics *m);
+
void populate_descriptives(struct tab_table *t, int col, int row,
const struct metrics *fs);
if ( value_is_missing(val,var) || case_missing )
val = 0;
-
- metrics_calc( &(*foo)->m[v], val, weight, case_no );
+
+ metrics_calc( &(*foo)->m[v], val, weight, case_no);
+
}
fctr = fctr->next;
if ( value_is_missing(val,var) || case_missing )
val = 0;
- metrics_calc(&totals[v], val, weight, case_no );
+ metrics_calc(&totals[v], val, weight, case_no);
}
fs != 0 ;
fs = hsh_next(fctr->fstats, &hi))
{
+
+ fs->m[v].ptile_hash = list_to_ptile_hash(&percentile_list);
+ fs->m[v].ptile_alg = percentile_algorithm;
metrics_postcalc(&fs->m[v]);
}
fctr = fctr->next;
}
+
+ totals[v].ptile_hash = list_to_ptile_hash(&percentile_list);
+ totals[v].ptile_alg = percentile_algorithm;
metrics_postcalc(&totals[v]);
}
fctr = fctr->next;
}
- /*
- print_factors();
- */
-
output_examine();
+
+ if ( totals )
+ {
+ int i;
+ for ( i = 0 ; i < n_dependent_vars ; ++i )
+ {
+ metrics_destroy(&totals[i]);
+ }
+ }
+
}
n_rows = n_dep_var * n_factors ;
if ( fctr->indep_var[1] )
- heading_columns = 3;
+ heading_columns = 3;
}
else
{
if ( 0 != compare_values(&prev, &(*fs)->id[0],
fctr->indep_var[0]->width))
{
- tab_text (tbl,
- 1,
- (i * n_factors ) + count +
- heading_rows,
- TAB_LEFT | TAT_TITLE,
- value_to_string(&(*fs)->id[0], fctr->indep_var[0])
- );
-
- if (fctr->indep_var[1] && count > 0 )
- tab_hline(tbl, TAL_1, 1, n_cols - 1,
- (i * n_factors ) + count + heading_rows);
+ tab_text (tbl,
+ 1,
+ (i * n_factors ) + count +
+ heading_rows,
+ TAB_LEFT | TAT_TITLE,
+ value_to_string(&(*fs)->id[0], fctr->indep_var[0])
+ );
+
+ if (fctr->indep_var[1] && count > 0 )
+ tab_hline(tbl, TAL_1, 1, n_cols - 1,
+ (i * n_factors ) + count + heading_rows);
}
n_rows = n_dep_var * 2 * n_extremities * n_factors;
if ( fctr->indep_var[1] )
- heading_columns = 3;
+ heading_columns = 3;
}
else
{
tab_title (tbl, 0, _("Extreme Values"));
-
tab_vline (tbl, TAL_2, n_cols - 2, 0, n_rows -1);
tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows -1);
tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Value"));
tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Case Number"));
-
-
-
for ( i = 0 ; i < n_dep_var ; ++i )
{
int extremity;
int idx=0;
- const int n_data = hsh_count(m->ordered_data);
tab_text(t, col, row,
TAB_RIGHT | TAT_TITLE ,
/* Lowest */
- for (idx = 0, extremity = 0; extremity < n && idx < n_data ; ++idx )
+ for (idx = 0, extremity = 0; extremity < n && idx < m->n_data ; ++idx )
{
int j;
- const struct weighted_value *wv = &m->wv[idx];
+ const struct weighted_value *wv = m->wvp[idx];
struct case_node *cn = wv->case_nos;
cn->num, 8, 0);
if ( cn->next )
- cn = cn->next;
+ cn = cn->next;
}
/* Highest */
- for (idx = n_data - 1, extremity = 0; extremity < n && idx >= 0; --idx )
+ for (idx = m->n_data - 1, extremity = 0; extremity < n && idx >= 0; --idx )
{
int j;
- const struct weighted_value *wv = &m->wv[idx];
+ const struct weighted_value *wv = m->wvp[idx];
struct case_node *cn = wv->case_nos;
for (j = 0 ; j < wv->w ; ++j )
cn->num, 8, 0);
if ( cn->next )
- cn = cn->next;
+ cn = cn->next;
}
n_rows = n_dep_var * n_stat_rows * n_factors;
if ( fctr->indep_var[1] )
- heading_columns = 5;
+ heading_columns = 5;
}
else
{
);
populate_descriptives(tbl, heading_columns - 2,
- row, &(*fs)->m[i]);
+ row, &(*fs)->m[i]);
count++ ;
fs++;
-
-
/* Fill in the descriptives data */
void
populate_descriptives(struct tab_table *tbl, int col, int row,
tab_float (tbl, col + 3,
row,
TAB_CENTER,
- m->stderr,
+ m->se_mean,
8,3);
tab_float (tbl, col + 2,
row + 1,
TAB_CENTER,
- m->mean - t * m->stderr,
+ m->mean - t * m->se_mean,
8,3);
tab_text (tbl, col + 1,
tab_float (tbl, col + 2,
row + 2,
TAB_CENTER,
- m->mean + t * m->stderr,
+ m->mean + t * m->se_mean,
8,3);
tab_text (tbl, col,
TAB_LEFT | TAT_TITLE,
_("Median"));
+ {
+ struct percentile *p;
+ double d = 50;
+
+ p = hsh_find(m->ptile_hash, &d);
+
+ assert(p);
+
+
+ tab_float (tbl, col + 2,
+ row + 4,
+ TAB_CENTER,
+ p->v,
+ 8, 2);
+ }
+
+
tab_text (tbl, col,
row + 5,
TAB_LEFT | TAT_TITLE,
TAB_LEFT | TAT_TITLE,
_("Interquartile Range"));
+ {
+ struct percentile *p1;
+ struct percentile *p2;
+
+ double d = 75;
+ p1 = hsh_find(m->ptile_hash, &d);
+
+ d = 25;
+ p2 = hsh_find(m->ptile_hash, &d);
+
+ assert(p1);
+ assert(p2);
+
+ tab_float (tbl, col + 2,
+ row + 10,
+ TAB_CENTER,
+ p1->v - p2->v,
+ 8, 2);
+ }
+
+
+
tab_text (tbl, col,
row + 11,
TAB_LEFT | TAT_TITLE,
_("Skewness"));
+
+ tab_float (tbl, col + 2,
+ row + 11,
+ TAB_CENTER,
+ m->skewness,
+ 8,3);
+
+ /* stderr of skewness */
+ tab_float (tbl, col + 3,
+ row + 11,
+ TAB_CENTER,
+ calc_seskew(m->n),
+ 8,3);
+
+
tab_text (tbl, col,
row + 12,
TAB_LEFT | TAT_TITLE,
_("Kurtosis"));
+
+
+ tab_float (tbl, col + 2,
+ row + 12,
+ TAB_CENTER,
+ m->kurtosis,
+ 8,3);
+
+ /* stderr of kurtosis */
+ tab_float (tbl, col + 3,
+ row + 12,
+ TAB_CENTER,
+ calc_sekurt(m->n),
+ 8,3);
+
+
}
+void
+box_plot_variables(const struct factor *fctr,
+ const struct variable **vars, int n_vars,
+ const struct variable *id)
+{
+
+ int i;
+ struct factor_statistics **fs ;
+
+ if ( ! fctr )
+ {
+ box_plot_group(fctr, vars, n_vars, id);
+ return;
+ }
+
+ for ( fs = fctr->fs ; *fs ; ++fs )
+ {
+ double y_min = DBL_MAX;
+ double y_max = -DBL_MAX;
+ struct chart *ch;
+
+ ch = chart_create();
+
+ const char *s = factor_to_string(fctr, *fs, 0 );
+
+ chart_write_title(ch, s);
+
+ for ( i = 0 ; i < n_vars ; ++i )
+ {
+ y_max = max(y_max, (*fs)->m[i].max);
+ y_min = min(y_min, (*fs)->m[i].min);
+ }
+
+ boxplot_draw_yscale(ch, y_max, y_min);
+
+ for ( i = 0 ; i < n_vars ; ++i )
+ {
+
+ const double box_width = (ch->data_right - ch->data_left)
+ / (n_vars * 2.0 ) ;
+
+ const double box_centre = ( i * 2 + 1) * box_width
+ + ch->data_left;
+
+ boxplot_draw_boxplot(ch,
+ box_centre, box_width,
+ &(*fs)->m[i],
+ var_to_string(vars[i]));
+
+
+ }
+
+ chart_submit(ch);
+
+ }
+}
+
+
+
+/* Do a box plot, grouping all factors into one plot ;
+ each dependent variable has its own plot.
+*/
+void
+box_plot_group(const struct factor *fctr,
+ const struct variable **vars,
+ int n_vars,
+ const struct variable *id UNUSED)
+{
+
+ int i;
+
+ for ( i = 0 ; i < n_vars ; ++i )
+ {
+ struct factor_statistics **fs ;
+ struct chart *ch;
+
+ ch = chart_create();
+
+ boxplot_draw_yscale(ch, totals[i].max, totals[i].min);
+
+ if ( fctr )
+ {
+ int n_factors = 0;
+ int f=0;
+ for ( fs = fctr->fs ; *fs ; ++fs )
+ ++n_factors;
+
+ chart_write_title(ch, _("Boxplot of %s vs. %s"),
+ var_to_string(vars[i]), var_to_string(fctr->indep_var[0]) );
+
+ for ( fs = fctr->fs ; *fs ; ++fs )
+ {
+
+ const char *s = factor_to_string_concise(fctr, *fs);
+
+ const double box_width = (ch->data_right - ch->data_left)
+ / (n_factors * 2.0 ) ;
+
+ const double box_centre = ( f++ * 2 + 1) * box_width
+ + ch->data_left;
+
+ boxplot_draw_boxplot(ch,
+ box_centre, box_width,
+ &(*fs)->m[i],
+ s);
+ }
+ }
+ else if ( ch )
+ {
+ const double box_width = (ch->data_right - ch->data_left) / 3.0;
+ const double box_centre = (ch->data_right + ch->data_left) / 2.0;
+
+ chart_write_title(ch, _("Boxplot"));
+
+ boxplot_draw_boxplot(ch,
+ box_centre, box_width,
+ &totals[i],
+ var_to_string(vars[i]) );
+
+ }
+
+ chart_submit(ch);
+ }
+}
/* Plot the normal and detrended normal plots for m
double yfirst=0, ylast=0;
/* Normal Plot */
- struct chart np_chart;
+ struct chart *np_chart;
/* Detrended Normal Plot */
- struct chart dnp_chart;
-
- const struct weighted_value *wv = m->wv;
- const int n_data = hsh_count(m->ordered_data) ;
+ struct chart *dnp_chart;
/* The slope and intercept of the ideal normal probability line */
const double slope = 1.0 / m->stddev;
const double intercept = - m->mean / m->stddev;
/* Cowardly refuse to plot an empty data set */
- if ( n_data == 0 )
+ if ( m->n_data == 0 )
return ;
- chart_initialise(&np_chart);
- chart_write_title(&np_chart, _("Normal Q-Q Plot of %s"), factorname);
- chart_write_xlabel(&np_chart, _("Observed Value"));
- chart_write_ylabel(&np_chart, _("Expected Normal"));
+ np_chart = chart_create();
+ dnp_chart = chart_create();
- chart_initialise(&dnp_chart);
- chart_write_title(&dnp_chart, _("Detrended Normal Q-Q Plot of %s"),
+ if ( !np_chart || ! dnp_chart )
+ return ;
+
+ chart_write_title(np_chart, _("Normal Q-Q Plot of %s"), factorname);
+ chart_write_xlabel(np_chart, _("Observed Value"));
+ chart_write_ylabel(np_chart, _("Expected Normal"));
+
+
+ chart_write_title(dnp_chart, _("Detrended Normal Q-Q Plot of %s"),
factorname);
- chart_write_xlabel(&dnp_chart, _("Observed Value"));
- chart_write_ylabel(&dnp_chart, _("Dev from Normal"));
+ chart_write_xlabel(dnp_chart, _("Observed Value"));
+ chart_write_ylabel(dnp_chart, _("Dev from Normal"));
+
+ yfirst = gsl_cdf_ugaussian_Pinv (m->wvp[0]->rank / ( m->n + 1));
+ ylast = gsl_cdf_ugaussian_Pinv (m->wvp[m->n_data-1]->rank / ( m->n + 1));
- yfirst = gsl_cdf_ugaussian_Pinv (wv[0].rank / ( m->n + 1));
- ylast = gsl_cdf_ugaussian_Pinv (wv[n_data-1].rank / ( m->n + 1));
{
/* Need to make sure that both the scatter plot and the ideal fit into the
double x_upper = max(m->max, (ylast - intercept) / slope) ;
double slack = (x_upper - x_lower) * 0.05 ;
- chart_write_xscale(&np_chart, x_lower - slack, x_upper + slack,
- chart_rounded_tick((m->max - m->min) / 5.0));
+ chart_write_xscale(np_chart, x_lower - slack, x_upper + slack, 5);
-
- chart_write_xscale(&dnp_chart, m->min, m->max,
- chart_rounded_tick((m->max - m->min) / 5.0));
+ chart_write_xscale(dnp_chart, m->min, m->max, 5);
}
- chart_write_yscale(&np_chart, yfirst, ylast,
- chart_rounded_tick((ylast - yfirst)/5.0) );
+ chart_write_yscale(np_chart, yfirst, ylast, 5);
{
- /* We have to cache the detrended data, beacause we need to
- find its limits before we can plot it */
- double *d_data;
- d_data = xmalloc (n_data * sizeof(double));
- double d_max = -DBL_MAX;
- double d_min = DBL_MAX;
- for ( i = 0 ; i < n_data; ++i )
+ /* We have to cache the detrended data, beacause we need to
+ find its limits before we can plot it */
+ double *d_data;
+ d_data = xmalloc (m->n_data * sizeof(double));
+ double d_max = -DBL_MAX;
+ double d_min = DBL_MAX;
+ for ( i = 0 ; i < m->n_data; ++i )
+ {
+ const double ns = gsl_cdf_ugaussian_Pinv (m->wvp[i]->rank / ( m->n + 1));
+
+ chart_datum(np_chart, 0, m->wvp[i]->v.f, ns);
+
+ d_data[i] = (m->wvp[i]->v.f - m->mean) / m->stddev - ns;
+
+ if ( d_data[i] < d_min ) d_min = d_data[i];
+ if ( d_data[i] > d_max ) d_max = d_data[i];
+ }
+ chart_write_yscale(dnp_chart, d_min, d_max, 5);
+
+ for ( i = 0 ; i < m->n_data; ++i )
+ chart_datum(dnp_chart, 0, m->wvp[i]->v.f, d_data[i]);
+
+ free(d_data);
+ }
+
+ chart_line(np_chart, slope, intercept, yfirst, ylast , CHART_DIM_Y);
+ chart_line(dnp_chart, 0, 0, m->min, m->max , CHART_DIM_X);
+
+ chart_submit(np_chart);
+ chart_submit(dnp_chart);
+}
+
+
+
+
+/* Show the percentiles */
+void
+show_percentiles(struct variable **dependent_var,
+ int n_dep_var,
+ struct factor *fctr)
+{
+ struct tab_table *tbl;
+ int i;
+
+ int n_cols, n_rows;
+ int n_factors;
+
+ struct hsh_table *ptiles ;
+
+ int n_heading_columns;
+ const int n_heading_rows = 2;
+ const int n_stat_rows = 2;
+
+ int n_ptiles ;
+
+ if ( fctr )
{
- const double ns = gsl_cdf_ugaussian_Pinv (wv[i].rank / ( m->n + 1));
+ struct factor_statistics **fs = fctr->fs ;
+ n_heading_columns = 3;
+ n_factors = hsh_count(fctr->fstats);
- chart_datum(&np_chart, 0, wv[i].v.f, ns);
+ ptiles = (*fs)->m[0].ptile_hash;
- d_data[i] = (wv[i].v.f - m->mean) / m->stddev - ns;
-
- if ( d_data[i] < d_min ) d_min = d_data[i];
- if ( d_data[i] > d_max ) d_max = d_data[i];
+ if ( fctr->indep_var[1] )
+ n_heading_columns = 4;
}
+ else
+ {
+ n_factors = 1;
+ n_heading_columns = 2;
+
+ ptiles = totals[0].ptile_hash;
+ }
+
+ n_ptiles = hsh_count(ptiles);
+
+ n_rows = n_heading_rows + n_dep_var * n_stat_rows * n_factors;
+
+ n_cols = n_heading_columns + n_ptiles ;
+
+ tbl = tab_create (n_cols, n_rows, 0);
+
+ tab_headers (tbl, n_heading_columns + 1, 0, n_heading_rows, 0);
+
+ tab_dim (tbl, tab_natural_dimensions);
+
+ /* Outline the box and have no internal lines*/
+ tab_box (tbl,
+ TAL_2, TAL_2,
+ -1, -1,
+ 0, 0,
+ n_cols - 1, n_rows - 1);
+
+ tab_hline (tbl, TAL_2, 0, n_cols - 1, n_heading_rows );
+
+ tab_vline (tbl, TAL_2, n_heading_columns, 0, n_rows - 1);
+
+
+ tab_title (tbl, 0, _("Percentiles"));
+
+
+ tab_hline (tbl, TAL_1, n_heading_columns, n_cols - 1, 1 );
+
+
+ tab_box (tbl,
+ -1, -1,
+ -1, TAL_1,
+ 0, n_heading_rows,
+ n_heading_columns - 1, n_rows - 1);
+
+
+ tab_box (tbl,
+ -1, -1,
+ -1, TAL_1,
+ n_heading_columns, n_heading_rows - 1,
+ n_cols - 1, n_rows - 1);
- chart_write_yscale(&dnp_chart, d_min, d_max,
- chart_rounded_tick((d_max - d_min) / 5.0));
+ tab_joint_text(tbl, n_heading_columns + 1, 0,
+ n_cols - 1 , 0,
+ TAB_CENTER | TAT_TITLE ,
+ _("Percentiles"));
- for ( i = 0 ; i < n_data; ++i )
- chart_datum(&dnp_chart, 0, wv[i].v.f, d_data[i]);
- free(d_data);
+ {
+ /* Put in the percentile break points as headings */
+
+ struct percentile **p = (struct percentile **) hsh_sort(ptiles);
+
+ i = 0;
+ while ( (*p) )
+ {
+ tab_float(tbl, n_heading_columns + i++ , 1,
+ TAB_CENTER,
+ (*p)->p, 8, 0);
+
+ p++;
+ }
+
}
- chart_line(&np_chart, slope, intercept, yfirst, ylast , CHART_DIM_Y);
- chart_line(&dnp_chart, 0, 0, m->min, m->max , CHART_DIM_X);
+ for ( i = 0 ; i < n_dep_var ; ++i )
+ {
+ const int n_stat_rows = 2;
+ const int row = n_heading_rows + i * n_stat_rows * n_factors ;
+
+ if ( i > 0 )
+ tab_hline(tbl, TAL_1, 0, n_cols - 1, row );
+
+ tab_text (tbl, 0,
+ i * n_stat_rows * n_factors + n_heading_rows,
+ TAB_LEFT | TAT_TITLE,
+ var_to_string(dependent_var[i])
+ );
+
+ if ( fctr )
+ {
+ struct factor_statistics **fs = fctr->fs;
+ int count = 0;
+
+ tab_text (tbl, 1, n_heading_rows - 1,
+ TAB_CENTER | TAT_TITLE,
+ var_to_string(fctr->indep_var[0]));
+
+
+ if ( fctr->indep_var[1])
+ tab_text (tbl, 2, n_heading_rows - 1, TAB_CENTER | TAT_TITLE,
+ var_to_string(fctr->indep_var[1]));
+
+ while( *fs )
+ {
+
+ static union value prev ;
+
+ const int row = n_heading_rows + n_stat_rows *
+ ( ( i * n_factors ) + count );
+
+
+ if ( 0 != compare_values(&prev, &(*fs)->id[0],
+ fctr->indep_var[0]->width))
+ {
+
+ if ( count > 0 )
+ tab_hline (tbl, TAL_1, 1, n_cols - 1, row);
+
+ tab_text (tbl,
+ 1, row,
+ TAB_LEFT | TAT_TITLE,
+ value_to_string(&(*fs)->id[0], fctr->indep_var[0])
+ );
+
+
+ }
+
+ prev = (*fs)->id[0];
+
+ if (fctr->indep_var[1] && count > 0 )
+ tab_hline(tbl, TAL_1, 2, n_cols - 1, row);
+
+ if ( fctr->indep_var[1])
+ tab_text (tbl, 2, row,
+ TAB_LEFT | TAT_TITLE,
+ value_to_string(&(*fs)->id[1], fctr->indep_var[1])
+ );
+
+
+ populate_percentiles(tbl, n_heading_columns - 1,
+ row, &(*fs)->m[i]);
+
+
+ count++ ;
+ fs++;
+ }
+
+
+ }
+ else
+ {
+ populate_percentiles(tbl, n_heading_columns - 1,
+ i * n_stat_rows * n_factors + n_heading_rows,
+ &totals[i]);
+ }
+
+
+ }
+
+
+ tab_submit(tbl);
+
+
+}
+
+
+
+
+void
+populate_percentiles(struct tab_table *tbl, int col, int row,
+ const struct metrics *m)
+{
+ int i;
+
+ struct percentile **p = (struct percentile **) hsh_sort(m->ptile_hash);
+
+ tab_text (tbl,
+ col, row + 1,
+ TAB_LEFT | TAT_TITLE,
+ _("Tukey\'s Hinges")
+ );
+
+ tab_text (tbl,
+ col, row,
+ TAB_LEFT | TAT_TITLE,
+ ptile_alg_desc[m->ptile_alg]
+ );
+
+
+ i = 0;
+ while ( (*p) )
+ {
+ tab_float(tbl, col + i + 1 , row,
+ TAB_CENTER,
+ (*p)->v, 8, 2);
+ if ( (*p)->p == 25 )
+ tab_float(tbl, col + i + 1 , row + 1,
+ TAB_CENTER,
+ m->hinge[0], 8, 2);
+
+ if ( (*p)->p == 50 )
+ tab_float(tbl, col + i + 1 , row + 1,
+ TAB_CENTER,
+ m->hinge[1], 8, 2);
+
+ if ( (*p)->p == 75 )
+ tab_float(tbl, col + i + 1 , row + 1,
+ TAB_CENTER,
+ m->hinge[2], 8, 2);
+
+
+ i++;
+
+ p++;
+ }
+
+}
+
+
+
+const char *
+factor_to_string(const struct factor *fctr,
+ struct factor_statistics *fs,
+ const struct variable *var)
+{
+
+ static char buf1[100];
+ char buf2[100];
+
+ strcpy(buf1,"");
+
+ if (var)
+ sprintf(buf1, "%s (",var_to_string(var) );
+
+
+ snprintf(buf2, 100, "%s = %s",
+ var_to_string(fctr->indep_var[0]),
+ value_to_string(&fs->id[0],fctr->indep_var[0]));
+
+ strcat(buf1, buf2);
+
+ if ( fctr->indep_var[1] )
+ {
+ sprintf(buf2, "; %s = %s)",
+ var_to_string(fctr->indep_var[1]),
+ value_to_string(&fs->id[1],
+ fctr->indep_var[1]));
+ strcat(buf1, buf2);
+ }
+ else
+ {
+ if ( var )
+ strcat(buf1, ")");
+ }
+
+ return buf1;
+}
+
+
+
+const char *
+factor_to_string_concise(const struct factor *fctr,
+ struct factor_statistics *fs)
+
+{
+
+ static char buf[100];
+
+ char buf2[100];
+
+ snprintf(buf, 100, "%s",
+ value_to_string(&fs->id[0], fctr->indep_var[0]));
+
+ if ( fctr->indep_var[1] )
+ {
+ sprintf(buf2, ",%s)", value_to_string(&fs->id[1], fctr->indep_var[1]) );
+ strcat(buf, buf2);
+ }
- chart_finalise(&np_chart);
- chart_finalise(&dnp_chart);
+ return buf;
}