+
+
+/* Show the descriptives table */
+void
+show_descriptives(struct variable **dependent_var,
+ int n_dep_var,
+ struct factor *fctr)
+{
+ int i;
+ int heading_columns ;
+ int n_cols;
+ const int n_stat_rows = 13;
+
+ const int heading_rows = 1;
+
+ struct tab_table *tbl;
+
+ int n_factors = 1;
+ int n_rows ;
+
+ if ( fctr )
+ {
+ heading_columns = 4;
+ n_factors = hsh_count(fctr->fstats);
+
+ n_rows = n_dep_var * n_stat_rows * n_factors;
+
+ if ( fctr->indep_var[1] )
+ heading_columns = 5;
+ }
+ else
+ {
+ heading_columns = 3;
+ n_rows = n_dep_var * n_stat_rows;
+ }
+
+ n_rows += heading_rows;
+
+ n_cols = heading_columns + 2;
+
+
+ tbl = tab_create (n_cols, n_rows, 0);
+
+ tab_headers (tbl, heading_columns + 1, 0, heading_rows, 0);
+
+ tab_dim (tbl, tab_natural_dimensions);
+
+ /* Outline the box and have no internal lines*/
+ tab_box (tbl,
+ TAL_2, TAL_2,
+ -1, -1,
+ 0, 0,
+ n_cols - 1, n_rows - 1);
+
+ tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
+
+ tab_vline (tbl, TAL_1, heading_columns, 0, n_rows - 1);
+ tab_vline (tbl, TAL_2, n_cols - 2, 0, n_rows - 1);
+ tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows - 1);
+
+ tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Statistic"));
+ tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Std. Error"));
+
+ tab_title (tbl, 0, _("Descriptives"));
+
+
+ for ( i = 0 ; i < n_dep_var ; ++i )
+ {
+ const int row = heading_rows + i * n_stat_rows * n_factors ;
+
+ if ( i > 0 )
+ tab_hline(tbl, TAL_1, 0, n_cols - 1, row );
+
+ tab_text (tbl, 0,
+ i * n_stat_rows * n_factors + heading_rows,
+ TAB_LEFT | TAT_TITLE,
+ var_to_string(dependent_var[i])
+ );
+
+
+ if ( fctr )
+ {
+ struct factor_statistics **fs = fctr->fs;
+ int count = 0;
+
+ tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
+ var_to_string(fctr->indep_var[0]));
+
+
+ if ( fctr->indep_var[1])
+ tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
+ var_to_string(fctr->indep_var[1]));
+
+ while( *fs )
+ {
+
+ static union value prev ;
+
+ const int row = heading_rows + n_stat_rows *
+ ( ( i * n_factors ) + count );
+
+
+ if ( 0 != compare_values(&prev, &(*fs)->id[0],
+ fctr->indep_var[0]->width))
+ {
+
+ if ( count > 0 )
+ tab_hline (tbl, TAL_1, 1, n_cols - 1, row);
+
+ tab_text (tbl,
+ 1, row,
+ TAB_LEFT | TAT_TITLE,
+ value_to_string(&(*fs)->id[0], fctr->indep_var[0])
+ );
+ }
+
+ prev = (*fs)->id[0];
+
+ if (fctr->indep_var[1] && count > 0 )
+ tab_hline(tbl, TAL_1, 2, n_cols - 1, row);
+
+ if ( fctr->indep_var[1])
+ tab_text (tbl, 2, row,
+ TAB_LEFT | TAT_TITLE,
+ value_to_string(&(*fs)->id[1], fctr->indep_var[1])
+ );
+
+ populate_descriptives(tbl, heading_columns - 2,
+ row, &(*fs)->m[i]);
+
+ count++ ;
+ fs++;
+ }
+
+ }
+
+ else
+ {
+
+ populate_descriptives(tbl, heading_columns - 2,
+ i * n_stat_rows * n_factors + heading_rows,
+ &totals[i]);
+ }
+ }
+
+ tab_submit(tbl);
+
+}
+
+
+
+
+/* Fill in the descriptives data */
+void
+populate_descriptives(struct tab_table *tbl, int col, int row,
+ const struct metrics *m)
+{
+
+ const double t = gsl_cdf_tdist_Qinv(1 - cmd.n_cinterval[0]/100.0/2.0, \
+ m->n -1);
+
+
+ tab_text (tbl, col,
+ row,
+ TAB_LEFT | TAT_TITLE,
+ _("Mean"));
+
+ tab_float (tbl, col + 2,
+ row,
+ TAB_CENTER,
+ m->mean,
+ 8,2);
+
+ tab_float (tbl, col + 3,
+ row,
+ TAB_CENTER,
+ m->se_mean,
+ 8,3);
+
+
+ tab_text (tbl, col,
+ row + 1,
+ TAB_LEFT | TAT_TITLE | TAT_PRINTF,
+ _("%g%% Confidence Interval for Mean"), cmd.n_cinterval[0]);
+
+
+ tab_text (tbl, col + 1,
+ row + 1,
+ TAB_LEFT | TAT_TITLE,
+ _("Lower Bound"));
+
+ tab_float (tbl, col + 2,
+ row + 1,
+ TAB_CENTER,
+ m->mean - t * m->se_mean,
+ 8,3);
+
+ tab_text (tbl, col + 1,
+ row + 2,
+ TAB_LEFT | TAT_TITLE,
+ _("Upper Bound"));
+
+
+ tab_float (tbl, col + 2,
+ row + 2,
+ TAB_CENTER,
+ m->mean + t * m->se_mean,
+ 8,3);
+
+ tab_text (tbl, col,
+ row + 3,
+ TAB_LEFT | TAT_TITLE,
+ _("5% Trimmed Mean"));
+
+ tab_float (tbl, col + 2,
+ row + 3,
+ TAB_CENTER,
+ m->trimmed_mean,
+ 8,2);
+
+ tab_text (tbl, col,
+ row + 4,
+ TAB_LEFT | TAT_TITLE,
+ _("Median"));
+
+ {
+ struct percentile *p;
+ double d = 50;
+
+ p = hsh_find(m->ptile_hash, &d);
+
+ assert(p);
+
+
+ tab_float (tbl, col + 2,
+ row + 4,
+ TAB_CENTER,
+ p->v,
+ 8, 2);
+ }
+
+
+ tab_text (tbl, col,
+ row + 5,
+ TAB_LEFT | TAT_TITLE,
+ _("Variance"));
+
+ tab_float (tbl, col + 2,
+ row + 5,
+ TAB_CENTER,
+ m->var,
+ 8,3);
+
+
+ tab_text (tbl, col,
+ row + 6,
+ TAB_LEFT | TAT_TITLE,
+ _("Std. Deviation"));
+
+
+ tab_float (tbl, col + 2,
+ row + 6,
+ TAB_CENTER,
+ m->stddev,
+ 8,3);
+
+
+ tab_text (tbl, col,
+ row + 7,
+ TAB_LEFT | TAT_TITLE,
+ _("Minimum"));
+
+ tab_float (tbl, col + 2,
+ row + 7,
+ TAB_CENTER,
+ m->min,
+ 8,3);
+
+ tab_text (tbl, col,
+ row + 8,
+ TAB_LEFT | TAT_TITLE,
+ _("Maximum"));
+
+ tab_float (tbl, col + 2,
+ row + 8,
+ TAB_CENTER,
+ m->max,
+ 8,3);
+
+
+ tab_text (tbl, col,
+ row + 9,
+ TAB_LEFT | TAT_TITLE,
+ _("Range"));
+
+
+ tab_float (tbl, col + 2,
+ row + 9,
+ TAB_CENTER,
+ m->max - m->min,
+ 8,3);
+
+ tab_text (tbl, col,
+ row + 10,
+ TAB_LEFT | TAT_TITLE,
+ _("Interquartile Range"));
+
+ {
+ struct percentile *p1;
+ struct percentile *p2;
+
+ double d = 75;
+ p1 = hsh_find(m->ptile_hash, &d);
+
+ d = 25;
+ p2 = hsh_find(m->ptile_hash, &d);
+
+ assert(p1);
+ assert(p2);
+
+ tab_float (tbl, col + 2,
+ row + 10,
+ TAB_CENTER,
+ p1->v - p2->v,
+ 8, 2);
+ }
+
+
+
+ tab_text (tbl, col,
+ row + 11,
+ TAB_LEFT | TAT_TITLE,
+ _("Skewness"));
+
+
+ tab_float (tbl, col + 2,
+ row + 11,
+ TAB_CENTER,
+ m->skewness,
+ 8,3);
+
+ /* stderr of skewness */
+ tab_float (tbl, col + 3,
+ row + 11,
+ TAB_CENTER,
+ calc_seskew(m->n),
+ 8,3);
+
+
+ tab_text (tbl, col,
+ row + 12,
+ TAB_LEFT | TAT_TITLE,
+ _("Kurtosis"));
+
+
+ tab_float (tbl, col + 2,
+ row + 12,
+ TAB_CENTER,
+ m->kurtosis,
+ 8,3);
+
+ /* stderr of kurtosis */
+ tab_float (tbl, col + 3,
+ row + 12,
+ TAB_CENTER,
+ calc_sekurt(m->n),
+ 8,3);
+
+
+}
+
+
+
+void
+box_plot_variables(const struct factor *fctr,
+ const struct variable **vars, int n_vars,
+ const struct variable *id)
+{
+
+ int i;
+ struct factor_statistics **fs ;
+
+ if ( ! fctr )
+ {
+ box_plot_group(fctr, vars, n_vars, id);
+ return;
+ }
+
+ for ( fs = fctr->fs ; *fs ; ++fs )
+ {
+ double y_min = DBL_MAX;
+ double y_max = -DBL_MAX;
+ struct chart *ch;
+
+ ch = chart_create();
+
+ const char *s = factor_to_string(fctr, *fs, 0 );
+
+ chart_write_title(ch, s);
+
+ for ( i = 0 ; i < n_vars ; ++i )
+ {
+ y_max = max(y_max, (*fs)->m[i].max);
+ y_min = min(y_min, (*fs)->m[i].min);
+ }
+
+ boxplot_draw_yscale(ch, y_max, y_min);
+
+ for ( i = 0 ; i < n_vars ; ++i )
+ {
+
+ const double box_width = (ch->data_right - ch->data_left)
+ / (n_vars * 2.0 ) ;
+
+ const double box_centre = ( i * 2 + 1) * box_width
+ + ch->data_left;
+
+ boxplot_draw_boxplot(ch,
+ box_centre, box_width,
+ &(*fs)->m[i],
+ var_to_string(vars[i]));
+
+
+ }
+
+ chart_submit(ch);
+
+ }
+}
+
+
+
+/* Do a box plot, grouping all factors into one plot ;
+ each dependent variable has its own plot.
+*/
+void
+box_plot_group(const struct factor *fctr,
+ const struct variable **vars,
+ int n_vars,
+ const struct variable *id UNUSED)
+{
+
+ int i;
+
+ for ( i = 0 ; i < n_vars ; ++i )
+ {
+ struct factor_statistics **fs ;
+ struct chart *ch;
+
+ ch = chart_create();
+
+ boxplot_draw_yscale(ch, totals[i].max, totals[i].min);
+
+ if ( fctr )
+ {
+ int n_factors = 0;
+ int f=0;
+ for ( fs = fctr->fs ; *fs ; ++fs )
+ ++n_factors;
+
+ chart_write_title(ch, _("Boxplot of %s vs. %s"),
+ var_to_string(vars[i]), var_to_string(fctr->indep_var[0]) );
+
+ for ( fs = fctr->fs ; *fs ; ++fs )
+ {
+
+ const char *s = factor_to_string_concise(fctr, *fs);
+
+ const double box_width = (ch->data_right - ch->data_left)
+ / (n_factors * 2.0 ) ;
+
+ const double box_centre = ( f++ * 2 + 1) * box_width
+ + ch->data_left;
+
+ boxplot_draw_boxplot(ch,
+ box_centre, box_width,
+ &(*fs)->m[i],
+ s);
+ }
+ }
+ else if ( ch )
+ {
+ const double box_width = (ch->data_right - ch->data_left) / 3.0;
+ const double box_centre = (ch->data_right + ch->data_left) / 2.0;
+
+ chart_write_title(ch, _("Boxplot"));
+
+ boxplot_draw_boxplot(ch,
+ box_centre, box_width,
+ &totals[i],
+ var_to_string(vars[i]) );
+
+ }
+
+ chart_submit(ch);
+ }
+}
+
+
+/* Plot the normal and detrended normal plots for m
+ Label the plots with factorname */
+void
+np_plot(const struct metrics *m, const char *factorname)
+{
+ int i;
+ double yfirst=0, ylast=0;
+
+ /* Normal Plot */
+ struct chart *np_chart;
+
+ /* Detrended Normal Plot */
+ struct chart *dnp_chart;
+
+ /* The slope and intercept of the ideal normal probability line */
+ const double slope = 1.0 / m->stddev;
+ const double intercept = - m->mean / m->stddev;
+
+ /* Cowardly refuse to plot an empty data set */
+ if ( m->n_data == 0 )
+ return ;
+
+ np_chart = chart_create();
+ dnp_chart = chart_create();
+
+ if ( !np_chart || ! dnp_chart )
+ return ;
+
+ chart_write_title(np_chart, _("Normal Q-Q Plot of %s"), factorname);
+ chart_write_xlabel(np_chart, _("Observed Value"));
+ chart_write_ylabel(np_chart, _("Expected Normal"));
+
+
+ chart_write_title(dnp_chart, _("Detrended Normal Q-Q Plot of %s"),
+ factorname);
+ chart_write_xlabel(dnp_chart, _("Observed Value"));
+ chart_write_ylabel(dnp_chart, _("Dev from Normal"));
+
+ yfirst = gsl_cdf_ugaussian_Pinv (m->wvp[0]->rank / ( m->n + 1));
+ ylast = gsl_cdf_ugaussian_Pinv (m->wvp[m->n_data-1]->rank / ( m->n + 1));
+
+
+ {
+ /* Need to make sure that both the scatter plot and the ideal fit into the
+ plot */
+ double x_lower = min(m->min, (yfirst - intercept) / slope) ;
+ double x_upper = max(m->max, (ylast - intercept) / slope) ;
+ double slack = (x_upper - x_lower) * 0.05 ;
+
+ chart_write_xscale(np_chart, x_lower - slack, x_upper + slack, 5);
+
+ chart_write_xscale(dnp_chart, m->min, m->max, 5);
+
+ }
+
+ chart_write_yscale(np_chart, yfirst, ylast, 5);
+
+ {
+ /* We have to cache the detrended data, beacause we need to
+ find its limits before we can plot it */
+ double *d_data;
+ d_data = xmalloc (m->n_data * sizeof(double));
+ double d_max = -DBL_MAX;
+ double d_min = DBL_MAX;
+ for ( i = 0 ; i < m->n_data; ++i )
+ {
+ const double ns = gsl_cdf_ugaussian_Pinv (m->wvp[i]->rank / ( m->n + 1));
+
+ chart_datum(np_chart, 0, m->wvp[i]->v.f, ns);
+
+ d_data[i] = (m->wvp[i]->v.f - m->mean) / m->stddev - ns;
+
+ if ( d_data[i] < d_min ) d_min = d_data[i];
+ if ( d_data[i] > d_max ) d_max = d_data[i];
+ }
+ chart_write_yscale(dnp_chart, d_min, d_max, 5);
+
+ for ( i = 0 ; i < m->n_data; ++i )
+ chart_datum(dnp_chart, 0, m->wvp[i]->v.f, d_data[i]);
+
+ free(d_data);
+ }
+
+ chart_line(np_chart, slope, intercept, yfirst, ylast , CHART_DIM_Y);
+ chart_line(dnp_chart, 0, 0, m->min, m->max , CHART_DIM_X);
+
+ chart_submit(np_chart);
+ chart_submit(dnp_chart);
+}
+
+
+
+
+/* Show the percentiles */
+void
+show_percentiles(struct variable **dependent_var,
+ int n_dep_var,
+ struct factor *fctr)
+{
+ struct tab_table *tbl;
+ int i;
+
+ int n_cols, n_rows;
+ int n_factors;
+
+ struct hsh_table *ptiles ;
+
+ int n_heading_columns;
+ const int n_heading_rows = 2;
+ const int n_stat_rows = 2;
+
+ int n_ptiles ;
+
+ if ( fctr )
+ {
+ struct factor_statistics **fs = fctr->fs ;
+ n_heading_columns = 3;
+ n_factors = hsh_count(fctr->fstats);
+
+ ptiles = (*fs)->m[0].ptile_hash;
+
+ if ( fctr->indep_var[1] )
+ n_heading_columns = 4;
+ }
+ else
+ {
+ n_factors = 1;
+ n_heading_columns = 2;
+
+ ptiles = totals[0].ptile_hash;
+ }
+
+ n_ptiles = hsh_count(ptiles);
+
+ n_rows = n_heading_rows + n_dep_var * n_stat_rows * n_factors;
+
+ n_cols = n_heading_columns + n_ptiles ;
+
+ tbl = tab_create (n_cols, n_rows, 0);
+
+ tab_headers (tbl, n_heading_columns + 1, 0, n_heading_rows, 0);
+
+ tab_dim (tbl, tab_natural_dimensions);
+
+ /* Outline the box and have no internal lines*/
+ tab_box (tbl,
+ TAL_2, TAL_2,
+ -1, -1,
+ 0, 0,
+ n_cols - 1, n_rows - 1);
+
+ tab_hline (tbl, TAL_2, 0, n_cols - 1, n_heading_rows );
+
+ tab_vline (tbl, TAL_2, n_heading_columns, 0, n_rows - 1);
+
+
+ tab_title (tbl, 0, _("Percentiles"));
+
+
+ tab_hline (tbl, TAL_1, n_heading_columns, n_cols - 1, 1 );
+
+
+ tab_box (tbl,
+ -1, -1,
+ -1, TAL_1,
+ 0, n_heading_rows,
+ n_heading_columns - 1, n_rows - 1);
+
+
+ tab_box (tbl,
+ -1, -1,
+ -1, TAL_1,
+ n_heading_columns, n_heading_rows - 1,
+ n_cols - 1, n_rows - 1);
+
+ tab_joint_text(tbl, n_heading_columns + 1, 0,
+ n_cols - 1 , 0,
+ TAB_CENTER | TAT_TITLE ,
+ _("Percentiles"));
+
+
+ {
+ /* Put in the percentile break points as headings */
+
+ struct percentile **p = (struct percentile **) hsh_sort(ptiles);
+
+ i = 0;
+ while ( (*p) )
+ {
+ tab_float(tbl, n_heading_columns + i++ , 1,
+ TAB_CENTER,
+ (*p)->p, 8, 0);
+
+ p++;
+ }
+
+ }
+
+ for ( i = 0 ; i < n_dep_var ; ++i )
+ {
+ const int n_stat_rows = 2;
+ const int row = n_heading_rows + i * n_stat_rows * n_factors ;
+
+ if ( i > 0 )
+ tab_hline(tbl, TAL_1, 0, n_cols - 1, row );
+
+ tab_text (tbl, 0,
+ i * n_stat_rows * n_factors + n_heading_rows,
+ TAB_LEFT | TAT_TITLE,
+ var_to_string(dependent_var[i])
+ );
+
+ if ( fctr )
+ {
+ struct factor_statistics **fs = fctr->fs;
+ int count = 0;
+
+ tab_text (tbl, 1, n_heading_rows - 1,
+ TAB_CENTER | TAT_TITLE,
+ var_to_string(fctr->indep_var[0]));
+
+
+ if ( fctr->indep_var[1])
+ tab_text (tbl, 2, n_heading_rows - 1, TAB_CENTER | TAT_TITLE,
+ var_to_string(fctr->indep_var[1]));
+
+ while( *fs )
+ {
+
+ static union value prev ;
+
+ const int row = n_heading_rows + n_stat_rows *
+ ( ( i * n_factors ) + count );
+
+
+ if ( 0 != compare_values(&prev, &(*fs)->id[0],
+ fctr->indep_var[0]->width))
+ {
+
+ if ( count > 0 )
+ tab_hline (tbl, TAL_1, 1, n_cols - 1, row);
+
+ tab_text (tbl,
+ 1, row,
+ TAB_LEFT | TAT_TITLE,
+ value_to_string(&(*fs)->id[0], fctr->indep_var[0])
+ );
+
+
+ }
+
+ prev = (*fs)->id[0];
+
+ if (fctr->indep_var[1] && count > 0 )
+ tab_hline(tbl, TAL_1, 2, n_cols - 1, row);
+
+ if ( fctr->indep_var[1])
+ tab_text (tbl, 2, row,
+ TAB_LEFT | TAT_TITLE,
+ value_to_string(&(*fs)->id[1], fctr->indep_var[1])
+ );
+
+
+ populate_percentiles(tbl, n_heading_columns - 1,
+ row, &(*fs)->m[i]);
+
+
+ count++ ;
+ fs++;
+ }
+
+
+ }
+ else
+ {
+ populate_percentiles(tbl, n_heading_columns - 1,
+ i * n_stat_rows * n_factors + n_heading_rows,
+ &totals[i]);
+ }
+
+
+ }
+
+
+ tab_submit(tbl);
+
+
+}
+
+
+
+
+void
+populate_percentiles(struct tab_table *tbl, int col, int row,
+ const struct metrics *m)
+{
+ int i;
+
+ struct percentile **p = (struct percentile **) hsh_sort(m->ptile_hash);
+
+ tab_text (tbl,
+ col, row + 1,
+ TAB_LEFT | TAT_TITLE,
+ _("Tukey\'s Hinges")
+ );
+
+ tab_text (tbl,
+ col, row,
+ TAB_LEFT | TAT_TITLE,
+ ptile_alg_desc[m->ptile_alg]
+ );
+
+
+ i = 0;
+ while ( (*p) )
+ {
+ tab_float(tbl, col + i + 1 , row,
+ TAB_CENTER,
+ (*p)->v, 8, 2);
+ if ( (*p)->p == 25 )
+ tab_float(tbl, col + i + 1 , row + 1,
+ TAB_CENTER,
+ m->hinge[0], 8, 2);
+
+ if ( (*p)->p == 50 )
+ tab_float(tbl, col + i + 1 , row + 1,
+ TAB_CENTER,
+ m->hinge[1], 8, 2);
+
+ if ( (*p)->p == 75 )
+ tab_float(tbl, col + i + 1 , row + 1,
+ TAB_CENTER,
+ m->hinge[2], 8, 2);
+
+
+ i++;
+
+ p++;
+ }
+
+}
+
+
+
+const char *
+factor_to_string(const struct factor *fctr,
+ struct factor_statistics *fs,
+ const struct variable *var)
+{
+
+ static char buf1[100];
+ char buf2[100];
+
+ strcpy(buf1,"");
+
+ if (var)
+ sprintf(buf1, "%s (",var_to_string(var) );
+
+
+ snprintf(buf2, 100, "%s = %s",
+ var_to_string(fctr->indep_var[0]),
+ value_to_string(&fs->id[0],fctr->indep_var[0]));
+
+ strcat(buf1, buf2);
+
+ if ( fctr->indep_var[1] )
+ {
+ sprintf(buf2, "; %s = %s)",
+ var_to_string(fctr->indep_var[1]),
+ value_to_string(&fs->id[1],
+ fctr->indep_var[1]));
+ strcat(buf1, buf2);
+ }
+ else
+ {
+ if ( var )
+ strcat(buf1, ")");
+ }
+
+ return buf1;
+}
+
+
+
+const char *
+factor_to_string_concise(const struct factor *fctr,
+ struct factor_statistics *fs)
+
+{
+
+ static char buf[100];
+
+ char buf2[100];
+
+ snprintf(buf, 100, "%s",
+ value_to_string(&fs->id[0], fctr->indep_var[0]));
+
+ if ( fctr->indep_var[1] )
+ {
+ sprintf(buf2, ",%s)", value_to_string(&fs->id[1], fctr->indep_var[1]) );
+ strcat(buf, buf2);
+ }
+
+
+ return buf;
+}