/* PSPP - EXAMINE data for normality . -*-c-*-
-Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
+Copyright (C) 2004 Free Software Foundation, Inc.
Author: John Darrington 2004
This program is free software; you can redistribute it and/or
02111-1307, USA. */
#include <config.h>
+#include <gsl/gsl_cdf.h>
#include "error.h"
#include <stdio.h>
#include <stdlib.h>
#include "alloc.h"
#include "str.h"
#include "case.h"
+#include "dictionary.h"
#include "command.h"
#include "lexer.h"
#include "error.h"
#include "vfm.h"
#include "hash.h"
#include "casefile.h"
+#include "factor_stats.h"
+/* (headers) */
+#include "chart.h"
/* (specification)
- "EXAMINE" (examine_):
+ "EXAMINE" (xmn_):
*variables=custom;
+total=custom;
+nototal=custom;
rep:report/!noreport,
incl:include/!exclude;
+compare=cmp:variables/!groups;
+ +plot[plt_]=stemleaf,boxplot,npplot,:spreadlevel(*d:n),histogram,all,none;
+cinterval=double;
+statistics[st_]=descriptives,:extreme(*d:n),all,none.
*/
/* (functions) */
+
static struct cmd_examine cmd;
static struct variable **dependent_vars;
static int n_dependent_vars;
-static struct hsh_table *hash_table_factors;
+static struct hsh_table *hash_table_factors=0;
+
+
struct factor
{
- struct variable *v1;
- struct hsh_table *hash_table_v1;
+ /* The independent variable for this factor */
+ struct variable *indep_var;
+
+ /* The factor statistics for each value of the independent variable */
+ struct hsh_table *hash_table_val;
+
+ /* The subfactor (if any) */
+ struct factor *subfactor;
- struct variable *v2;
- struct hsh_table *hash_table_v2;
};
+
+
/* Parse the clause specifying the factors */
static int examine_parse_independent_vars(struct cmd_examine *cmd,
struct hsh_table *hash_factors );
int n_extremities);
-/* Calculations */
-static void calculate(const struct casefile *cf, void *cmd_);
+void np_plot(const struct metrics *m, const char *varname);
+
+
+
+/* Per Split function */
+static void run_examine(const struct casefile *cf, void *);
+
+static void output_examine(void);
+
+
+static struct factor_statistics *totals = 0;
+
int
cmd_examine(void)
{
- int i;
- short total=1;
if ( !parse_examine(&cmd) )
return CMD_FAILURE;
+
+ if ( cmd.st_n == SYSMIS )
+ cmd.st_n = 5;
if ( ! cmd.sbc_cinterval)
cmd.n_cinterval[0] = 95.0;
- if ( cmd.sbc_nototal )
- total = 0;
+ totals = xmalloc ( sizeof (struct factor_statistics *) );
+
+ totals->stats = xmalloc(sizeof ( struct metrics ) * n_dependent_vars);
+
+ multipass_procedure_with_splits (run_examine, NULL);
- multipass_procedure_with_splits (calculate, &cmd);
+
+ hsh_destroy(hash_table_factors);
+
+ free(totals->stats);
+ free(totals);
+
+ return CMD_SUCCESS;
+};
+
+
+/* Show all the appropriate tables */
+static void
+output_examine(void)
+{
/* Show totals if appropriate */
- if ( total || !hash_table_factors || 0 == hsh_count (hash_table_factors))
+ if ( ! cmd.sbc_nototal ||
+ ! hash_table_factors || 0 == hsh_count (hash_table_factors))
{
show_summary(dependent_vars, n_dependent_vars,0);
if ( cmd.sbc_statistics )
{
- if ( cmd.a_statistics[EXAMINE_ST_DESCRIPTIVES])
+ if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES])
show_descriptives(dependent_vars, n_dependent_vars, 0);
- if ( cmd.st_n != SYSMIS )
+ if ( cmd.a_statistics[XMN_ST_EXTREME])
show_extremes(dependent_vars, n_dependent_vars, 0, cmd.st_n);
}
+
+ if ( cmd.sbc_plot)
+ {
+ if ( cmd.a_plot[XMN_PLT_NPPLOT] )
+ {
+ int v;
+
+ for ( v = 0 ; v < n_dependent_vars; ++v )
+ {
+ np_plot(&totals->stats[v], var_to_string(dependent_vars[v]));
+ }
+
+ }
+ }
+
}
+
/* Show grouped statistics if appropriate */
if ( hash_table_factors && 0 != hsh_count (hash_table_factors))
{
if ( cmd.sbc_statistics )
{
- if ( cmd.a_statistics[EXAMINE_ST_DESCRIPTIVES])
- show_descriptives(dependent_vars, n_dependent_vars,f);
+ if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES])
+ show_descriptives(dependent_vars, n_dependent_vars, f);
- if ( cmd.st_n != SYSMIS )
- show_extremes(dependent_vars, n_dependent_vars,f,cmd.st_n);
+ if ( cmd.a_statistics[XMN_ST_EXTREME])
+ show_extremes(dependent_vars, n_dependent_vars, f, cmd.st_n);
+ }
+
+
+ if ( cmd.sbc_plot)
+ {
+ if ( cmd.a_plot[XMN_PLT_NPPLOT] )
+ {
+ struct hsh_iterator h2;
+ struct factor_statistics *foo ;
+ for (foo = hsh_first(f->hash_table_val,&h2);
+ foo != 0 ;
+ foo = hsh_next(f->hash_table_val,&h2))
+ {
+ int v;
+ for ( v = 0 ; v < n_dependent_vars; ++ v)
+ {
+ char buf[100];
+ sprintf(buf, "%s (%s = %s)",
+ var_to_string(dependent_vars[v]),
+ var_to_string(f->indep_var),
+ value_to_string(foo->id,f->indep_var));
+ np_plot(&foo->stats[v], buf);
+ }
+ }
+ }
}
}
}
- hsh_destroy(hash_table_factors);
+}
- return CMD_SUCCESS;
-};
/* TOTAL and NOTOTAL are simple, mutually exclusive flags */
static int
-examine_custom_total(struct cmd_examine *p)
+xmn_custom_total(struct cmd_examine *p)
{
if ( p->sbc_nototal )
{
}
static int
-examine_custom_nototal(struct cmd_examine *p)
+xmn_custom_nototal(struct cmd_examine *p)
{
if ( p->sbc_total )
{
int
compare_factors (const struct factor *f1,
const struct factor *f2,
- void *aux UNUSED)
+ void *aux)
{
- int v1_cmp;
+ int indep_var_cmp = strcmp(f1->indep_var->name, f2->indep_var->name);
- v1_cmp = strcmp(f1->v1->name, f2->v1->name);
+ if ( 0 != indep_var_cmp )
+ return indep_var_cmp;
- if ( 0 != v1_cmp )
- return v1_cmp;
-
- if ( f1->v2 == 0 && f2->v2 == 0 )
+ /* If the names are identical, and there are no subfactors then
+ the factors are identical */
+ if ( ! f1->subfactor && ! f2->subfactor )
return 0;
+
+ /* ... otherwise we must compare the subfactors */
- if ( f1->v2 == 0 && f2->v2 != 0 )
- return -1;
-
- if ( f1->v2 != 0 && f2->v2 == 0 )
- return +1;
-
- return strcmp(f1->v2->name, f2->v2->name);
+ return compare_factors(f1->subfactor, f2->subfactor, aux);
}
/* Create a hash of a factor */
unsigned
-hash_factor( const struct factor *f,
- void *aux UNUSED)
+hash_factor( const struct factor *f, void *aux)
{
unsigned h;
- h = hsh_hash_string(f->v1->name);
+ h = hsh_hash_string(f->indep_var->name);
- if ( f->v2 )
- h += hsh_hash_string(f->v2->name);
+ if ( f->subfactor )
+ h += hash_factor(f->subfactor, aux);
return h;
}
/* Free up a factor */
void
-free_factor(struct factor *f, void *aux UNUSED)
+free_factor(struct factor *f, void *aux)
{
- hsh_destroy(f->hash_table_v1);
- hsh_destroy(f->hash_table_v2);
+ hsh_destroy(f->hash_table_val);
+
+ if ( f->subfactor )
+ free_factor(f->subfactor, aux);
free(f);
}
/* Parser for the variables sub command */
static int
-examine_custom_variables(struct cmd_examine *cmd )
+xmn_custom_variables(struct cmd_examine *cmd )
{
lex_match('=');
if ( !f )
{
f = xmalloc(sizeof(struct factor));
- f->v2 = 0;
- f->v1 = 0;
- f->hash_table_v2 = 0;
- f->hash_table_v1 = 0;
+ f->indep_var = 0;
+ f->hash_table_val = 0;
+ f->subfactor = 0;
}
- f->v1 = parse_variable();
+ f->indep_var = parse_variable();
- if ( ! f->hash_table_v1 )
- f->hash_table_v1 = hsh_create(4,(hsh_compare_func *)compare_values,
- (hsh_hash_func *)hash_value,
- 0,(void *) f->v1->width);
+ if ( ! f->hash_table_val )
+ f->hash_table_val = hsh_create(4,(hsh_compare_func *) compare_indep_values,
+ (hsh_hash_func *) hash_indep_value,
+ (hsh_free_func *) free_factor_stats,
+ (void *) f->indep_var->width);
if ( token == T_BY )
{
lex_match(T_BY);
+
if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
&& token != T_ALL)
return 2;
- f->v2 = parse_variable();
+ f->subfactor = xmalloc(sizeof(struct factor));
+
+ f->subfactor->indep_var = parse_variable();
- if ( !f->hash_table_v2 )
- {
- f->hash_table_v2 = hsh_create(4,
- (hsh_compare_func *) compare_values,
- (hsh_hash_func *) hash_value,
- 0,
- (void *) f->v2->width);
- }
+ f->subfactor->subfactor = 0;
+
+ f->subfactor->hash_table_val =
+ hsh_create(4,
+ (hsh_compare_func *) compare_indep_values,
+ (hsh_hash_func *) hash_indep_value,
+ (hsh_free_func *) free_factor_stats,
+ (void *) f->subfactor->indep_var->width);
}
hsh_insert(hash_table_factors, f);
}
-void populate_descriptives(struct tab_table *t, int col, int row);
+void populate_descriptives(struct tab_table *t, int col, int row,
+ const struct metrics *fs);
void populate_extremities(struct tab_table *t, int col, int row, int n);
}
else
{
- assert(factor->v1);
- if ( factor->v2 == 0 )
+ assert(factor->indep_var);
+ if ( factor->subfactor == 0 )
{
heading_columns = 2;
- n_rows += n_dep_var * hsh_count(factor->hash_table_v1) * n_stat_rows;
+ n_rows += n_dep_var * hsh_count(factor->hash_table_val) * n_stat_rows;
}
else
{
heading_columns = 3;
- n_rows += n_dep_var * hsh_count(factor->hash_table_v1) *
- hsh_count(factor->hash_table_v2) * n_stat_rows ;
+ n_rows += n_dep_var * hsh_count(factor->hash_table_val) *
+ hsh_count(factor->subfactor->hash_table_val) * n_stat_rows ;
}
}
t = tab_create (n_cols, n_rows, 0);
- tab_headers (t, heading_columns, 0, heading_rows, 0);
+ tab_headers (t, heading_columns + 1, 0, heading_rows, 0);
tab_dim (t, tab_natural_dimensions);
tab_hline (t, TAL_2, 0, n_cols - 1, heading_rows );
- tab_vline (t, TAL_2, heading_columns, 0, n_rows - 1);
- tab_vline (t, TAL_1, n_cols - 2, 0, n_rows - 1);
+ tab_vline (t, TAL_1, heading_columns, 0, n_rows - 1);
+ tab_vline (t, TAL_2, n_cols - 2, 0, n_rows - 1);
tab_vline (t, TAL_1, n_cols - 1, 0, n_rows - 1);
tab_text (t, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Statistic"));
if ( factor )
{
- n_factors = hsh_count(factor->hash_table_v1);
- if ( factor->v2 )
- n_subfactors = hsh_count(factor->hash_table_v2);
+ n_factors = hsh_count(factor->hash_table_val);
+ if ( factor->subfactor )
+ n_subfactors = hsh_count(factor->subfactor->hash_table_val);
}
if ( i > 0 )
tab_hline(t, TAL_1, 0, n_cols - 1, row );
-
-
if ( factor )
{
struct hsh_iterator hi;
- union value *v;
+ const struct factor_statistics *fs;
int count = 0;
- tab_text (t, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
- var_to_string(factor->v1));
+ tab_text (t, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
+ var_to_string(factor->indep_var));
- for ( v = hsh_first(factor->hash_table_v1, &hi);
- v != 0;
- v = hsh_next(factor->hash_table_v1, &hi))
+ for (fs = hsh_first(factor->hash_table_val, &hi);
+ fs != 0;
+ fs = hsh_next(factor->hash_table_val, &hi))
{
- struct hsh_iterator h2;
- union value *vv;
-
tab_text (t, 1,
row + count * n_subfactors * n_stat_rows,
TAB_RIGHT | TAT_TITLE,
- value_to_string(v, factor->v1)
+ value_to_string(fs->id, factor->indep_var)
);
if ( count > 0 )
tab_hline (t, TAL_1, 1, n_cols - 1,
row + count * n_subfactors * n_stat_rows);
- if ( factor->v2 )
+ if ( factor->subfactor )
{
int count2=0;
-
+ struct hsh_iterator h2;
+ const struct factor_statistics *sub_fs;
+
tab_text (t, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
- var_to_string(factor->v2));
+ var_to_string(factor->subfactor->indep_var));
- for ( vv = hsh_first(factor->hash_table_v2, &h2);
- vv != 0;
- vv = hsh_next(factor->hash_table_v2, &h2))
+ for ( sub_fs = hsh_first(factor->subfactor->hash_table_val,
+ &h2);
+ sub_fs != 0;
+ sub_fs = hsh_next(factor->subfactor->hash_table_val,
+ &h2))
{
tab_text(t, 2,
+ count * n_subfactors * n_stat_rows
+ count2 * n_stat_rows,
TAB_RIGHT | TAT_TITLE ,
- value_to_string(vv, factor->v2)
+ value_to_string(sub_fs->id, factor->subfactor->indep_var)
);
if ( count2 > 0 )
populate_descriptives(t, heading_columns,
row
+ count * n_subfactors
- * n_stat_rows
- + count2 * n_stat_rows);
+ * n_stat_rows
+ + count2 * n_stat_rows,
+ &sub_fs->stats[i]);
count2++;
}
else
{
+
populate_descriptives(t, heading_columns,
row
- + count * n_subfactors * n_stat_rows);
+ + count * n_subfactors * n_stat_rows,
+ &fs->stats[i]);
}
count ++;
else
{
populate_descriptives(t, heading_columns,
- row);
+ row, &totals->stats[i]);
}
tab_text (t,
tab_title (t, 0, _("Descriptives"));
tab_submit(t);
+
}
/* Fill in the descriptives data */
void
-populate_descriptives(struct tab_table *t, int col, int row)
+populate_descriptives(struct tab_table *tbl, int col, int row,
+ const struct metrics *m)
{
- tab_text (t, col,
+ const double t = gsl_cdf_tdist_Qinv(1 - cmd.n_cinterval[0]/100.0/2.0, \
+ m->n -1);
+
+
+ tab_text (tbl, col,
row,
TAB_LEFT | TAT_TITLE,
_("Mean"));
+ tab_float (tbl, col + 2,
+ row,
+ TAB_CENTER,
+ m->mean,
+ 8,2);
+
+ tab_float (tbl, col + 3,
+ row,
+ TAB_CENTER,
+ m->stderr,
+ 8,3);
+
- tab_text (t, col,
+ tab_text (tbl, col,
row + 1,
TAB_LEFT | TAT_TITLE | TAT_PRINTF,
_("%g%% Confidence Interval for Mean"), cmd.n_cinterval[0]);
- tab_text (t, col + 1,
- row + 1,
- TAB_LEFT | TAT_TITLE,
- _("Upper Bound"));
- tab_text (t, col + 1,
- row + 2,
+ tab_text (tbl, col + 1,
+ row + 1,
TAB_LEFT | TAT_TITLE,
_("Lower Bound"));
+ tab_float (tbl, col + 2,
+ row + 1,
+ TAB_CENTER,
+ m->mean - t * m->stderr,
+ 8,3);
+
+ tab_text (tbl, col + 1,
+ row + 2,
+ TAB_LEFT | TAT_TITLE,
+ _("Upper Bound"));
+
+
+ tab_float (tbl, col + 2,
+ row + 2,
+ TAB_CENTER,
+ m->mean + t * m->stderr,
+ 8,3);
- tab_text (t, col,
+ tab_text (tbl, col,
row + 3,
TAB_LEFT | TAT_TITLE,
_("5% Trimmed Mean"));
- tab_text (t, col,
+ tab_float (tbl, col + 2,
+ row + 3,
+ TAB_CENTER,
+ m->trimmed_mean,
+ 8,2);
+
+ tab_text (tbl, col,
row + 4,
TAB_LEFT | TAT_TITLE,
_("Median"));
- tab_text (t, col,
+ tab_text (tbl, col,
row + 5,
TAB_LEFT | TAT_TITLE,
_("Variance"));
- tab_text (t, col,
+ tab_float (tbl, col + 2,
+ row + 5,
+ TAB_CENTER,
+ m->var,
+ 8,3);
+
+
+ tab_text (tbl, col,
row + 6,
TAB_LEFT | TAT_TITLE,
_("Std. Deviation"));
- tab_text (t, col,
+
+ tab_float (tbl, col + 2,
+ row + 6,
+ TAB_CENTER,
+ m->stddev,
+ 8,3);
+
+
+ tab_text (tbl, col,
row + 7,
TAB_LEFT | TAT_TITLE,
_("Minimum"));
- tab_text (t, col,
+ tab_float (tbl, col + 2,
+ row + 7,
+ TAB_CENTER,
+ m->min,
+ 8,3);
+
+ tab_text (tbl, col,
row + 8,
TAB_LEFT | TAT_TITLE,
_("Maximum"));
- tab_text (t, col,
+ tab_float (tbl, col + 2,
+ row + 8,
+ TAB_CENTER,
+ m->max,
+ 8,3);
+
+
+ tab_text (tbl, col,
row + 9,
TAB_LEFT | TAT_TITLE,
_("Range"));
- tab_text (t, col,
+
+ tab_float (tbl, col + 2,
+ row + 9,
+ TAB_CENTER,
+ m->max - m->min,
+ 8,3);
+
+ tab_text (tbl, col,
row + 10,
TAB_LEFT | TAT_TITLE,
_("Interquartile Range"));
- tab_text (t, col,
+ tab_text (tbl, col,
row + 11,
TAB_LEFT | TAT_TITLE,
_("Skewness"));
- tab_text (t, col,
+ tab_text (tbl, col,
row + 12,
TAB_LEFT | TAT_TITLE,
_("Kurtosis"));
int heading_columns ;
int n_cols;
const int heading_rows = 3;
- struct tab_table *t;
+ struct tab_table *tbl;
int n_rows = heading_rows;
}
else
{
- assert(factor->v1);
- if ( factor->v2 == 0 )
+ assert(factor->indep_var);
+ if ( factor->subfactor == 0 )
{
heading_columns = 2;
- n_rows += n_dep_var * hsh_count(factor->hash_table_v1);
+ n_rows += n_dep_var * hsh_count(factor->hash_table_val);
}
else
{
heading_columns = 3;
- n_rows += n_dep_var * hsh_count(factor->hash_table_v1) *
- hsh_count(factor->hash_table_v2) ;
+ n_rows += n_dep_var * hsh_count(factor->hash_table_val) *
+ hsh_count(factor->subfactor->hash_table_val) ;
}
}
n_cols = heading_columns + 6;
- t = tab_create (n_cols,n_rows,0);
- tab_headers (t, heading_columns, 0, heading_rows, 0);
+ tbl = tab_create (n_cols,n_rows,0);
+ tab_headers (tbl, heading_columns, 0, heading_rows, 0);
- tab_dim (t, tab_natural_dimensions);
+ tab_dim (tbl, tab_natural_dimensions);
/* Outline the box and have vertical internal lines*/
- tab_box (t,
+ tab_box (tbl,
TAL_2, TAL_2,
-1, TAL_1,
0, 0,
n_cols - 1, n_rows - 1);
- tab_hline (t, TAL_2, 0, n_cols - 1, heading_rows );
- tab_hline (t, TAL_1, heading_columns, n_cols - 1, 1 );
- tab_hline (t, TAL_1, 0, n_cols - 1, heading_rows -1 );
+ tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
+ tab_hline (tbl, TAL_1, heading_columns, n_cols - 1, 1 );
+ tab_hline (tbl, TAL_1, 0, n_cols - 1, heading_rows -1 );
- tab_vline (t, TAL_2, heading_columns, 0, n_rows - 1);
+ tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
- tab_title (t, 0, _("Case Processing Summary"));
+ tab_title (tbl, 0, _("Case Processing Summary"));
- tab_joint_text(t, heading_columns, 0,
+ tab_joint_text(tbl, heading_columns, 0,
n_cols -1, 0,
TAB_CENTER | TAT_TITLE,
_("Cases"));
/* Remove lines ... */
- tab_box (t,
+ tab_box (tbl,
-1, -1,
TAL_0, TAL_0,
heading_columns, 0,
if ( factor )
{
- tab_text (t, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
- var_to_string(factor->v1));
+ tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
+ var_to_string(factor->indep_var));
- if ( factor->v2 )
- tab_text (t, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
- var_to_string(factor->v2));
+ if ( factor->subfactor )
+ tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
+ var_to_string(factor->subfactor->indep_var));
}
for ( i = 0 ; i < 3 ; ++i )
{
- tab_text (t, heading_columns + i*2 , 2, TAB_CENTER | TAT_TITLE, _("N"));
- tab_text (t, heading_columns + i*2 + 1, 2, TAB_CENTER | TAT_TITLE,
+ tab_text (tbl, heading_columns + i*2 , 2, TAB_CENTER | TAT_TITLE, _("N"));
+ tab_text (tbl, heading_columns + i*2 + 1, 2, TAB_CENTER | TAT_TITLE,
_("Percent"));
- tab_joint_text(t, heading_columns + i*2 , 1,
+ tab_joint_text(tbl, heading_columns + i*2 , 1,
heading_columns + i*2 + 1, 1,
TAB_CENTER | TAT_TITLE,
subtitle[i]);
- tab_box (t, -1, -1,
+ tab_box (tbl, -1, -1,
TAL_0, TAL_0,
heading_columns + i*2, 1,
heading_columns + i*2 + 1, 1);
if ( factor )
{
- n_factors = hsh_count(factor->hash_table_v1);
- if ( factor->v2 )
- n_subfactors = hsh_count(factor->hash_table_v2);
+ n_factors = hsh_count(factor->hash_table_val);
+ if ( factor->subfactor )
+ n_subfactors = hsh_count(factor->subfactor->hash_table_val);
}
- tab_text (t,
+ tab_text (tbl,
0, i * n_factors * n_subfactors + heading_rows,
TAB_LEFT | TAT_TITLE,
var_to_string(dependent_var[i])
if ( factor )
{
struct hsh_iterator hi;
- union value *v;
+ const struct factor_statistics *fs;
int count = 0;
- for ( v = hsh_first(factor->hash_table_v1, &hi);
- v != 0;
- v = hsh_next(factor->hash_table_v1, &hi))
+ for (fs = hsh_first(factor->hash_table_val, &hi);
+ fs != 0;
+ fs = hsh_next(factor->hash_table_val, &hi))
{
- struct hsh_iterator h2;
- union value *vv;
-
- tab_text (t, 1,
+ tab_text (tbl, 1,
i * n_factors * n_subfactors + heading_rows
+ count * n_subfactors,
TAB_RIGHT | TAT_TITLE,
- value_to_string(v, factor->v1)
+ value_to_string(fs->id, factor->indep_var)
);
- if ( factor->v2 )
+ if ( factor->subfactor )
{
int count2=0;
- for ( vv = hsh_first(factor->hash_table_v2, &h2);
- vv != 0;
- vv = hsh_next(factor->hash_table_v2, &h2))
+ struct hsh_iterator h2;
+ const struct factor_statistics *sub_fs;
+
+ for ( sub_fs = hsh_first(factor->subfactor->hash_table_val,
+ &h2);
+ sub_fs != 0;
+ sub_fs = hsh_next(factor->subfactor->hash_table_val,
+ &h2))
{
- tab_text(t, 2,
+ tab_text(tbl, 2,
i * n_factors * n_subfactors + heading_rows
+ count * n_subfactors + count2,
TAB_RIGHT | TAT_TITLE ,
- value_to_string(vv, factor->v2)
+ value_to_string(sub_fs->id, factor->subfactor->indep_var)
);
count2++;
}
- tab_submit (t);
+ tab_submit (tbl);
}
-
-
static int bad_weight_warn = 1;
+
static void
-calculate(const struct casefile *cf, void *cmd_)
+run_examine(const struct casefile *cf, void *aux UNUSED)
{
+ struct hsh_iterator hi;
+ struct factor *fctr;
+
struct casereader *r;
struct ccase c;
+ int v;
+
+ /* Make sure we haven't got rubbish left over from a
+ previous split */
+ if ( hash_table_factors )
+ {
+ for ( fctr = hsh_first(hash_table_factors, &hi);
+ fctr != 0;
+ fctr = hsh_next (hash_table_factors, &hi) )
+ {
+ hsh_clear(fctr->hash_table_val);
+
+ while ( (fctr = fctr->subfactor) )
+ hsh_clear(fctr->hash_table_val);
+ }
+ }
- struct cmd_examine *cmd = (struct cmd_examine *) cmd_;
+ for ( v = 0 ; v < n_dependent_vars ; ++v )
+ metrics_precalc(&totals->stats[v]);
for(r = casefile_get_reader (cf);
casereader_read (r, &c) ;
- case_destroy (&c))
+ case_destroy (&c) )
{
- int i;
- struct hsh_iterator hi;
- struct factor *fctr;
-
const double weight =
- dict_get_case_weight(default_dict,&c,&bad_weight_warn);
+ dict_get_case_weight(default_dict, &c, &bad_weight_warn);
+
+ for ( v = 0 ; v < n_dependent_vars ; ++v )
+ {
+ const struct variable *var = dependent_vars[v];
+ const union value *val = case_data (&c, var->fv);
+
+ metrics_calc(&totals->stats[v], val, weight);
+ }
if ( hash_table_factors )
{
fctr != 0;
fctr = hsh_next (hash_table_factors, &hi) )
{
- union value *val;
+ const union value *indep_val =
+ case_data(&c, fctr->indep_var->fv);
+
+ struct factor_statistics **foo = ( struct factor_statistics ** )
+ hsh_probe(fctr->hash_table_val, (void *) &indep_val);
+
+ if ( !*foo )
+ {
+ *foo = xmalloc ( sizeof ( struct factor_statistics));
+ (*foo)->id = indep_val;
+ (*foo)->stats = xmalloc ( sizeof ( struct metrics )
+ * n_dependent_vars);
+ for ( v = 0 ; v < n_dependent_vars ; ++v )
+ metrics_precalc( &(*foo)->stats[v] );
- val = case_data (&c, fctr->v1->fv);
- hsh_insert(fctr->hash_table_v1,val);
+ hsh_insert(fctr->hash_table_val, (void *) *foo);
+ }
- if ( fctr->hash_table_v2 )
+ for ( v = 0 ; v < n_dependent_vars ; ++v )
{
- val = case_data (&c, fctr->v2->fv);
- hsh_insert(fctr->hash_table_v2,val);
+ const struct variable *var = dependent_vars[v];
+ const union value *val = case_data (&c, var->fv);
+
+ metrics_calc( &(*foo)->stats[v], val, weight );
+ }
+
+ if ( fctr->subfactor )
+ {
+ struct factor *sfctr = fctr->subfactor;
+
+ const union value *ii_val =
+ case_data (&c, sfctr->indep_var->fv);
+
+ struct factor_statistics **bar =
+ (struct factor_statistics **)
+ hsh_probe(sfctr->hash_table_val, (void *) &ii_val);
+
+ if ( !*bar )
+ {
+ *bar = xmalloc ( sizeof ( struct factor_statistics));
+ (*bar)->id = ii_val;
+ (*bar)->stats = xmalloc ( sizeof ( struct metrics )
+ * n_dependent_vars);
+
+ for ( v = 0 ; v < n_dependent_vars ; ++v )
+ metrics_precalc( &(*bar)->stats[v] );
+
+ hsh_insert(sfctr->hash_table_val,
+ (void *) *bar);
+ }
+
+ for ( v = 0 ; v < n_dependent_vars ; ++v )
+ {
+ const struct variable *var = dependent_vars[v];
+ const union value *val = case_data (&c, var->fv);
+
+ metrics_calc( &(*bar)->stats[v], val, weight );
+ }
}
}
}
}
+
+ for ( v = 0 ; v < n_dependent_vars ; ++v)
+ {
+ if ( hash_table_factors )
+ {
+ for ( fctr = hsh_first(hash_table_factors, &hi);
+ fctr != 0;
+ fctr = hsh_next (hash_table_factors, &hi) )
+ {
+ struct hsh_iterator h2;
+ struct factor_statistics *fs;
+
+ for ( fs = hsh_first(fctr->hash_table_val,&h2);
+ fs != 0;
+ fs = hsh_next(fctr->hash_table_val,&h2))
+ {
+ metrics_postcalc( &fs->stats[v] );
+ }
+
+ if ( fctr->subfactor)
+ {
+ struct hsh_iterator hsf;
+ struct factor_statistics *fss;
+
+ for ( fss = hsh_first(fctr->subfactor->hash_table_val,&hsf);
+ fss != 0;
+ fss = hsh_next(fctr->subfactor->hash_table_val,&hsf))
+ {
+ metrics_postcalc( &fss->stats[v] );
+ }
+ }
+ }
+ }
+
+ metrics_postcalc(&totals->stats[v]);
+ }
+
+ output_examine();
+
}
static void
show_extremes(struct variable **dependent_var,
- int n_dep_var,
- struct factor *factor,
- int n_extremities)
+ int n_dep_var,
+ struct factor *factor,
+ int n_extremities)
{
int i;
int heading_columns ;
}
else
{
- assert(factor->v1);
- if ( factor->v2 == 0 )
+ assert(factor->indep_var);
+ if ( factor->subfactor == 0 )
{
heading_columns = 2 + 1;
n_rows += n_dep_var * 2 * n_extremities
- * hsh_count(factor->hash_table_v1);
+ * hsh_count(factor->hash_table_val);
}
else
{
heading_columns = 3 + 1;
n_rows += n_dep_var * 2 * n_extremities
- * hsh_count(factor->hash_table_v1)
- * hsh_count(factor->hash_table_v2) ;
+ * hsh_count(factor->hash_table_val)
+ * hsh_count(factor->subfactor->hash_table_val) ;
}
}
if ( factor )
{
tab_text (t, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
- var_to_string(factor->v1));
+ var_to_string(factor->indep_var));
- if ( factor->v2 )
+ if ( factor->subfactor )
tab_text (t, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
- var_to_string(factor->v2));
+ var_to_string(factor->subfactor->indep_var));
}
tab_text (t, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Value"));
if ( factor )
{
- n_factors = hsh_count(factor->hash_table_v1);
- if ( factor->v2 )
- n_subfactors = hsh_count(factor->hash_table_v2);
+ n_factors = hsh_count(factor->hash_table_val);
+ if ( factor->subfactor )
+ n_subfactors = hsh_count(factor->subfactor->hash_table_val);
}
tab_text (t,
TAL_1, 0, n_cols - 1,
heading_rows + 2 * n_extremities *
(i * n_factors * n_subfactors )
- );
+ );
if ( factor )
{
struct hsh_iterator hi;
- union value *v;
+ const struct factor_statistics *fs;
int count = 0;
- for ( v = hsh_first(factor->hash_table_v1, &hi);
- v != 0;
- v = hsh_next(factor->hash_table_v1, &hi))
+ for ( fs = hsh_first(factor->hash_table_val, &hi);
+ fs != 0;
+ fs = hsh_next(factor->hash_table_val, &hi))
{
- struct hsh_iterator h2;
- union value *vv;
-
tab_text (t, 1, heading_rows + 2 * n_extremities *
(i * n_factors * n_subfactors
+ count * n_subfactors),
TAB_RIGHT | TAT_TITLE,
- value_to_string(v, factor->v1)
+ value_to_string(fs->id, factor->indep_var)
);
if ( count > 0 )
+ count * n_subfactors));
- if ( factor->v2 )
+ if ( factor->subfactor )
{
+ struct hsh_iterator h2;
+ const struct factor_statistics *sub_fs;
int count2=0;
- for ( vv = hsh_first(factor->hash_table_v2, &h2);
- vv != 0;
- vv = hsh_next(factor->hash_table_v2, &h2))
+
+ for ( sub_fs = hsh_first(factor->subfactor->hash_table_val,
+ &h2);
+ sub_fs != 0;
+ sub_fs = hsh_next(factor->subfactor->hash_table_val,
+ &h2))
{
tab_text(t, 2, heading_rows + 2 * n_extremities *
(i * n_factors * n_subfactors
+ count * n_subfactors + count2 ),
TAB_RIGHT | TAT_TITLE ,
- value_to_string(vv, factor->v2)
+ value_to_string(sub_fs->id,
+ factor->subfactor->indep_var)
);
heading_rows + 2 * n_extremities *
(i * n_factors * n_subfactors
+ count * n_subfactors),
- n_extremities);
+ n_extremities);
}
count ++;
}
}
-
tab_submit (t);
-
}
_("Highest")
);
-
tab_text(t, col, row + n ,
TAB_RIGHT | TAT_TITLE ,
_("Lowest")
);
-
for (i = 0; i < n ; ++i )
{
tab_float(t, col + 1, row + i,
- TAB_RIGHT | TAT_TITLE,
+ TAB_RIGHT,
i + 1, 8, 0);
tab_float(t, col + 1, row + i + n,
- TAB_RIGHT | TAT_TITLE,
+ TAB_RIGHT,
i + 1, 8, 0);
}
}
+
+
+
+/* Plot the normal and detrended normal plots for m
+ Label the plots with factorname */
+void
+np_plot(const struct metrics *m, const char *factorname)
+{
+ int i;
+ double yfirst=0, ylast=0;
+
+ /* Normal Plot */
+ struct chart np_chart;
+
+ /* Detrended Normal Plot */
+ struct chart dnp_chart;
+
+ const struct weighted_value *wv = m->wv;
+ const int n_data = hsh_count(m->ordered_data) ;
+
+ /* The slope and intercept of the ideal normal probability line */
+ const double slope = 1.0 / m->stddev;
+ const double intercept = - m->mean / m->stddev;
+
+ chart_initialise(&np_chart);
+ chart_write_title(&np_chart, _("Normal Q-Q Plot of %s"), factorname);
+ chart_write_xlabel(&np_chart, _("Observed Value"));
+ chart_write_ylabel(&np_chart, _("Expected Normal"));
+
+ chart_initialise(&dnp_chart);
+ chart_write_title(&dnp_chart, _("Detrended Normal Q-Q Plot of %s"),
+ factorname);
+ chart_write_xlabel(&dnp_chart, _("Observed Value"));
+ chart_write_ylabel(&dnp_chart, _("Dev from Normal"));
+
+ yfirst = gsl_cdf_ugaussian_Pinv (wv[0].rank / ( m->n + 1));
+ ylast = gsl_cdf_ugaussian_Pinv (wv[n_data-1].rank / ( m->n + 1));
+
+ {
+ /* Need to make sure that both the scatter plot and the ideal fit into the
+ plot */
+ double x_lower = min(m->min, (yfirst - intercept) / slope) ;
+ double x_upper = max(m->max, (ylast - intercept) / slope) ;
+ double slack = (x_upper - x_lower) * 0.05 ;
+
+ chart_write_xscale(&np_chart, x_lower - slack, x_upper + slack,
+ chart_rounded_tick((m->max - m->min) / 5.0));
+
+
+ chart_write_xscale(&dnp_chart, m->min, m->max,
+ chart_rounded_tick((m->max - m->min) / 5.0));
+
+ }
+
+ chart_write_yscale(&np_chart, yfirst, ylast,
+ chart_rounded_tick((ylast - yfirst)/5.0) );
+
+ {
+ /* We have to cache the detrended data, beacause we need to
+ find its limits before we can plot it */
+ double *d_data;
+ d_data = xmalloc (n_data * sizeof(double));
+ double d_max = -DBL_MAX;
+ double d_min = DBL_MAX;
+ for ( i = 0 ; i < n_data; ++i )
+ {
+ const double ns = gsl_cdf_ugaussian_Pinv (wv[i].rank / ( m->n + 1));
+
+ chart_datum(&np_chart, 0, wv[i].v.f, ns);
+
+ d_data[i] = (wv[i].v.f - m->mean) / m->stddev - ns;
+
+ if ( d_data[i] < d_min ) d_min = d_data[i];
+ if ( d_data[i] > d_max ) d_max = d_data[i];
+ }
+
+ chart_write_yscale(&dnp_chart, d_min, d_max,
+ chart_rounded_tick((d_max - d_min) / 5.0));
+
+ for ( i = 0 ; i < n_data; ++i )
+ chart_datum(&dnp_chart, 0, wv[i].v.f, d_data[i]);
+
+ free(d_data);
+ }
+
+ chart_line(&np_chart, slope, intercept, yfirst, ylast , CHART_DIM_Y);
+ chart_line(&dnp_chart, 0, 0, m->min, m->max , CHART_DIM_X);
+
+ chart_finalise(&np_chart);
+ chart_finalise(&dnp_chart);
+
+}