#define _(msgid) gettext (msgid)
struct glm_spec
-{
- size_t n_dep_vars;
- const struct variable **dep_vars;
+ {
+ const struct variable **dep_vars;
+ size_t n_dep_vars;
- size_t n_factor_vars;
- const struct variable **factor_vars;
+ const struct variable **factor_vars;
+ size_t n_factor_vars;
- size_t n_interactions;
- struct interaction **interactions;
+ struct interaction **interactions;
+ size_t n_interactions;
- enum mv_class exclude;
+ enum mv_class exclude;
- /* The weight variable */
- const struct variable *wv;
+ const struct variable *wv; /* The weight variable */
- const struct dictionary *dict;
+ const struct dictionary *dict;
- int ss_type;
- bool intercept;
+ int ss_type;
+ bool intercept;
- double alpha;
+ double alpha;
- bool dump_coding;
-};
+ bool dump_coding;
+ };
struct glm_workspace
-{
- double total_ssq;
- struct moments *totals;
-
- struct categoricals *cats;
+ {
+ double total_ssq;
+ struct moments *totals;
- /*
- Sums of squares due to different variables. Element 0 is the SSE
- for the entire model. For i > 0, element i is the SS due to
- variable i.
- */
- gsl_vector *ssq;
-};
+ struct categoricals *cats;
+ /*
+ Sums of squares due to different variables. Element 0 is the SSE
+ for the entire model. For i > 0, element i is the SS due to
+ variable i.
+ */
+ gsl_vector *ssq;
+ };
/* Default design: all possible interactions */
static void
design_full (struct glm_spec *glm)
{
- int sz;
- int i = 0;
- glm->n_interactions = (1 << glm->n_factor_vars) - 1;
-
- glm->interactions = xcalloc (glm->n_interactions, sizeof *glm->interactions);
+ size_t n = (1 << glm->n_factor_vars) - 1;
+ glm->interactions = xnmalloc (n, sizeof *glm->interactions);
/* All subsets, with exception of the empty set, of [0, glm->n_factor_vars) */
- for (sz = 1; sz <= glm->n_factor_vars; ++sz)
+ for (size_t sz = 1; sz <= glm->n_factor_vars; ++sz)
{
gsl_combination *c = gsl_combination_calloc (glm->n_factor_vars, sz);
do
{
struct interaction *iact = interaction_create (NULL);
- int e;
- for (e = 0 ; e < gsl_combination_k (c); ++e)
- interaction_add_variable (iact, glm->factor_vars [gsl_combination_get (c, e)]);
+ for (int e = 0; e < gsl_combination_k (c); ++e)
+ interaction_add_variable (
+ iact, glm->factor_vars [gsl_combination_get (c, e)]);
- glm->interactions[i++] = iact;
+ glm->interactions[glm->n_interactions++] = iact;
}
while (gsl_combination_next (c) == GSL_SUCCESS);
gsl_combination_free (c);
}
+ assert (glm->n_interactions == n);
}
static void output_glm (const struct glm_spec *,
static void run_glm (struct glm_spec *cmd, struct casereader *input,
const struct dataset *ds);
-
-static bool parse_design_spec (struct lexer *lexer, struct glm_spec *glm);
-
+static struct interaction *parse_design_term (struct lexer *,
+ const struct dictionary *);
int
cmd_glm (struct lexer *lexer, struct dataset *ds)
{
- int i;
struct const_var_set *factors = NULL;
- struct glm_spec glm;
bool design = false;
- glm.dict = dataset_dict (ds);
- glm.n_dep_vars = 0;
- glm.n_factor_vars = 0;
- glm.n_interactions = 0;
- glm.interactions = NULL;
- glm.dep_vars = NULL;
- glm.factor_vars = NULL;
- glm.exclude = MV_ANY;
- glm.intercept = true;
- glm.wv = dict_get_weight (glm.dict);
- glm.alpha = 0.05;
- glm.dump_coding = false;
- glm.ss_type = 3;
-
+ struct dictionary *dict = dataset_dict (ds);
+ struct glm_spec glm = {
+ .dict = dict,
+ .exclude = MV_ANY,
+ .intercept = true,
+ .wv = dict_get_weight (dict),
+ .alpha = 0.05,
+ .ss_type = 3,
+ };
+
+ int dep_vars_start = lex_ofs (lexer);
if (!parse_variables_const (lexer, glm.dict,
&glm.dep_vars, &glm.n_dep_vars,
PV_NO_DUPLICATE | PV_NUMERIC))
goto error;
+ int dep_vars_end = lex_ofs (lexer) - 1;
- if (! lex_force_match (lexer, T_BY))
+ if (!lex_force_match (lexer, T_BY))
goto error;
if (!parse_variables_const (lexer, glm.dict,
if (glm.n_dep_vars > 1)
{
- msg (ME, _("Multivariate analysis is not yet implemented"));
- return CMD_FAILURE;
+ lex_ofs_error (lexer, dep_vars_start, dep_vars_end,
+ _("Multivariate analysis is not yet implemented."));
+ goto error;
}
- factors =
- const_var_set_create_from_array (glm.factor_vars, glm.n_factor_vars);
+ factors = const_var_set_create_from_array (glm.factor_vars, glm.n_factor_vars);
+ size_t allocated_interactions = 0;
while (lex_token (lexer) != T_ENDCMD)
{
lex_match (lexer, T_SLASH);
&& lex_token (lexer) != T_SLASH)
{
if (lex_match_id (lexer, "INCLUDE"))
- {
- glm.exclude = MV_SYSTEM;
- }
+ glm.exclude = MV_SYSTEM;
else if (lex_match_id (lexer, "EXCLUDE"))
- {
- glm.exclude = MV_ANY;
- }
+ glm.exclude = MV_ANY;
else
{
- lex_error (lexer, NULL);
+ lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
goto error;
}
}
&& lex_token (lexer) != T_SLASH)
{
if (lex_match_id (lexer, "INCLUDE"))
- {
- glm.intercept = true;
- }
+ glm.intercept = true;
else if (lex_match_id (lexer, "EXCLUDE"))
- {
- glm.intercept = false;
- }
+ glm.intercept = false;
else
{
- lex_error (lexer, NULL);
+ lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
goto error;
}
}
else if (lex_match_id (lexer, "CRITERIA"))
{
lex_match (lexer, T_EQUALS);
- if (lex_match_id (lexer, "ALPHA"))
- {
- if (lex_force_match (lexer, T_LPAREN))
- {
- if (! lex_force_num (lexer))
- {
- lex_error (lexer, NULL);
- goto error;
- }
-
- glm.alpha = lex_number (lexer);
- lex_get (lexer);
- if (! lex_force_match (lexer, T_RPAREN))
- {
- lex_error (lexer, NULL);
- goto error;
- }
- }
- }
- else
- {
- lex_error (lexer, NULL);
- goto error;
- }
+ if (!lex_force_match_phrase (lexer, "ALPHA(")
+ || !lex_force_num (lexer))
+ goto error;
+ glm.alpha = lex_number (lexer);
+ lex_get (lexer);
+ if (!lex_force_match (lexer, T_RPAREN))
+ goto error;
}
else if (lex_match_id (lexer, "METHOD"))
{
lex_match (lexer, T_EQUALS);
- if (!lex_force_match_id (lexer, "SSTYPE"))
- {
- lex_error (lexer, NULL);
- goto error;
- }
-
- if (! lex_force_match (lexer, T_LPAREN))
- {
- lex_error (lexer, NULL);
- goto error;
- }
-
- if (!lex_force_int_range (lexer, "SSTYPE", 1, 3))
- {
- lex_error (lexer, NULL);
- goto error;
- }
+ if (!lex_force_match_phrase (lexer, "SSTYPE(")
+ || !lex_force_int_range (lexer, "SSTYPE", 1, 3))
+ goto error;
glm.ss_type = lex_integer (lexer);
lex_get (lexer);
- if (! lex_force_match (lexer, T_RPAREN))
- {
- lex_error (lexer, NULL);
- goto error;
- }
+ if (!lex_force_match (lexer, T_RPAREN))
+ goto error;
}
else if (lex_match_id (lexer, "DESIGN"))
{
lex_match (lexer, T_EQUALS);
- if (! parse_design_spec (lexer, &glm))
- goto error;
+ do
+ {
+ struct interaction *iact = parse_design_term (lexer, glm.dict);
+ if (!iact)
+ goto error;
+
+ if (glm.n_interactions >= allocated_interactions)
+ glm.interactions = x2nrealloc (glm.interactions,
+ &allocated_interactions,
+ sizeof *glm.interactions);
+ glm.interactions[glm.n_interactions++] = iact;
+
+ lex_match (lexer, T_COMMA);
+ }
+ while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH);
if (glm.n_interactions > 0)
design = true;
}
else if (lex_match_id (lexer, "SHOWCODES"))
- /* Undocumented debug option */
{
- lex_match (lexer, T_EQUALS);
-
+ /* Undocumented debug option */
glm.dump_coding = true;
}
else
{
- lex_error (lexer, NULL);
+ lex_error_expecting (lexer, "MISSING", "INTERCEPT", "CRITERIA",
+ "METHOD", "DESIGN");
goto error;
}
}
- if (! design)
- {
- design_full (&glm);
- }
+ if (!design)
+ design_full (&glm);
- {
- struct casegrouper *grouper;
- struct casereader *group;
- bool ok;
-
- grouper = casegrouper_create_splits (proc_open (ds), glm.dict);
- while (casegrouper_get_next_group (grouper, &group))
- run_glm (&glm, group, ds);
- ok = casegrouper_destroy (grouper);
- ok = proc_commit (ds) && ok;
- }
+ struct casegrouper *grouper = casegrouper_create_splits (proc_open (ds), glm.dict);
+ struct casereader *group;
+ while (casegrouper_get_next_group (grouper, &group))
+ run_glm (&glm, group, ds);
+ bool ok = casegrouper_destroy (grouper);
+ ok = proc_commit (ds) && ok;
const_var_set_destroy (factors);
free (glm.factor_vars);
- for (i = 0 ; i < glm.n_interactions; ++i)
+ for (size_t i = 0; i < glm.n_interactions; ++i)
interaction_destroy (glm.interactions[i]);
free (glm.interactions);
free (glm.dep_vars);
-
return CMD_SUCCESS;
error:
-
const_var_set_destroy (factors);
free (glm.factor_vars);
- for (i = 0 ; i < glm.n_interactions; ++i)
+ for (size_t i = 0; i < glm.n_interactions; ++i)
interaction_destroy (glm.interactions[i]);
free (glm.interactions);
static inline bool
not_dropped (size_t j, const bool *ff)
{
- return ! ff[j];
+ return !ff[j];
}
static void
n_dropped_submodel = n_dropped_model;
for (i = cmd->n_dep_vars; i < covariance_dim (cov); i++)
- {
- submodel_dropped[i] = model_dropped[i];
- }
+ submodel_dropped[i] = model_dropped[i];
for (i = cmd->n_dep_vars; i < covariance_dim (cov); i++)
{
ssq_type2 (struct covariance *cov, gsl_vector *ssq, const struct glm_spec *cmd)
{
const gsl_matrix *cm = covariance_calculate_unnormalized (cov);
- size_t i;
- size_t k;
bool *model_dropped = XCALLOC (covariance_dim (cov), bool);
bool *submodel_dropped = XCALLOC (covariance_dim (cov), bool);
const struct categoricals *cats = covariance_get_categoricals (cov);
- for (k = 0; k < cmd->n_interactions; k++)
+ for (size_t k = 0; k < cmd->n_interactions; k++)
{
gsl_matrix *model_cov = NULL;
gsl_matrix *submodel_cov = NULL;
size_t n_dropped_model = 0;
size_t n_dropped_submodel = 0;
- for (i = cmd->n_dep_vars; i < covariance_dim (cov); i++)
+ for (size_t i = cmd->n_dep_vars; i < covariance_dim (cov); i++)
{
const struct interaction * x =
categoricals_get_interaction_by_subscript (cats, i - cmd->n_dep_vars);
ssq_type3 (struct covariance *cov, gsl_vector *ssq, const struct glm_spec *cmd)
{
const gsl_matrix *cm = covariance_calculate_unnormalized (cov);
- size_t i;
- size_t k;
bool *model_dropped = XCALLOC (covariance_dim (cov), bool);
bool *submodel_dropped = XCALLOC (covariance_dim (cov), bool);
const struct categoricals *cats = covariance_get_categoricals (cov);
- double ss0;
gsl_matrix *submodel_cov = gsl_matrix_alloc (cm->size1, cm->size2);
fill_submatrix (cm, submodel_cov, submodel_dropped);
reg_sweep (submodel_cov, 0);
- ss0 = gsl_matrix_get (submodel_cov, 0, 0);
+ double ss0 = gsl_matrix_get (submodel_cov, 0, 0);
gsl_matrix_free (submodel_cov);
free (submodel_dropped);
- for (k = 0; k < cmd->n_interactions; k++)
+ for (size_t k = 0; k < cmd->n_interactions; k++)
{
- gsl_matrix *model_cov = NULL;
size_t n_dropped_model = 0;
-
- for (i = cmd->n_dep_vars; i < covariance_dim (cov); i++)
+ for (size_t i = cmd->n_dep_vars; i < covariance_dim (cov); i++)
{
const struct interaction * x =
categoricals_get_interaction_by_subscript (cats, i - cmd->n_dep_vars);
}
}
- model_cov = gsl_matrix_alloc (cm->size1 - n_dropped_model, cm->size2 - n_dropped_model);
+ gsl_matrix *model_cov = gsl_matrix_alloc (cm->size1 - n_dropped_model,
+ cm->size2 - n_dropped_model);
- fill_submatrix (cm, model_cov, model_dropped);
+ fill_submatrix (cm, model_cov, model_dropped);
reg_sweep (model_cov, 0);
- gsl_vector_set (ssq, k + 1,
- gsl_matrix_get (model_cov, 0, 0) - ss0);
+ gsl_vector_set (ssq, k + 1, gsl_matrix_get (model_cov, 0, 0) - ss0);
gsl_matrix_free (model_cov);
}
free (model_dropped);
}
-
-
-//static void dump_matrix (const gsl_matrix *m);
-
static void
run_glm (struct glm_spec *cmd, struct casereader *input,
const struct dataset *ds)
{
bool warn_bad_weight = true;
- int v;
- struct taint *taint;
struct dictionary *dict = dataset_dict (ds);
- struct casereader *reader;
- struct ccase *c;
-
- struct glm_workspace ws;
- struct covariance *cov;
- input = casereader_create_filter_missing (input,
- cmd->dep_vars, cmd->n_dep_vars,
- cmd->exclude,
- NULL, NULL);
- input = casereader_create_filter_missing (input,
- cmd->factor_vars, cmd->n_factor_vars,
- cmd->exclude,
- NULL, NULL);
+ input = casereader_create_filter_missing (input,
+ cmd->dep_vars, cmd->n_dep_vars,
+ cmd->exclude,
+ NULL, NULL);
- ws.cats = categoricals_create (cmd->interactions, cmd->n_interactions,
- cmd->wv, MV_ANY);
+ input = casereader_create_filter_missing (input,
+ cmd->factor_vars, cmd->n_factor_vars,
+ cmd->exclude,
+ NULL, NULL);
- cov = covariance_2pass_create (cmd->n_dep_vars, cmd->dep_vars,
- ws.cats, cmd->wv, cmd->exclude, true);
+ struct glm_workspace ws = {
+ .cats = categoricals_create (cmd->interactions, cmd->n_interactions,
+ cmd->wv, MV_ANY)
+ };
+ struct covariance *cov = covariance_2pass_create (
+ cmd->n_dep_vars, cmd->dep_vars, ws.cats, cmd->wv, cmd->exclude, true);
- c = casereader_peek (input, 0);
- if (c == NULL)
- {
- casereader_destroy (input);
- return;
- }
- output_split_file_values (ds, c);
- case_unref (c);
+ output_split_file_values_peek (ds, input);
- taint = taint_clone (casereader_get_taint (input));
+ struct taint *taint = taint_clone (casereader_get_taint (input));
ws.totals = moments_create (MOMENT_VARIANCE);
- for (reader = casereader_clone (input);
- (c = casereader_read (reader)) != NULL; case_unref (c))
+ struct casereader *reader = casereader_clone (input);
+ struct ccase *c;
+ for (; (c = casereader_read (reader)) != NULL; case_unref (c))
{
double weight = dict_get_case_weight (dict, c, &warn_bad_weight);
- for (v = 0; v < cmd->n_dep_vars; ++v)
- moments_pass_one (ws.totals, case_data (c, cmd->dep_vars[v])->f,
- weight);
+ for (int v = 0; v < cmd->n_dep_vars; ++v)
+ moments_pass_one (ws.totals, case_num (c, cmd->dep_vars[v]), weight);
covariance_accumulate_pass1 (cov, c);
}
else
reader = input;
- for (;
- (c = casereader_read (reader)) != NULL; case_unref (c))
+ for (; (c = casereader_read (reader)) != NULL; case_unref (c))
{
double weight = dict_get_case_weight (dict, c, &warn_bad_weight);
- for (v = 0; v < cmd->n_dep_vars; ++v)
- moments_pass_two (ws.totals, case_data (c, cmd->dep_vars[v])->f,
- weight);
+ for (size_t v = 0; v < cmd->n_dep_vars; ++v)
+ moments_pass_two (ws.totals, case_num (c, cmd->dep_vars[v]), weight);
covariance_accumulate_pass2 (cov, c);
}
\f
-static bool
-parse_nested_variable (struct lexer *lexer, struct glm_spec *glm)
+static struct interaction *
+parse_design_term (struct lexer *lexer, const struct dictionary *dict)
{
- const struct variable *v = NULL;
- if (! lex_match_variable (lexer, glm->dict, &v))
- return false;
-
- if (lex_match (lexer, T_LPAREN))
+ struct interaction *iact = interaction_create (NULL);
+ do
{
- if (! parse_nested_variable (lexer, glm))
- return false;
-
- if (! lex_force_match (lexer, T_RPAREN))
- return false;
- }
+ struct variable *var = parse_variable (lexer, dict);
+ if (!var)
+ goto error;
+ interaction_add_variable (iact, var);
- lex_error (lexer, "Nested variables are not yet implemented"); return false;
- return true;
-}
-
-/* A design term is an interaction OR a nested variable */
-static bool
-parse_design_term (struct lexer *lexer, struct glm_spec *glm)
-{
- struct interaction *iact = NULL;
- if (parse_design_interaction (lexer, glm->dict, &iact))
- {
- /* Interaction parsing successful. Add to list of interactions */
- glm->interactions = xrealloc (glm->interactions, sizeof *glm->interactions * ++glm->n_interactions);
- glm->interactions[glm->n_interactions - 1] = iact;
- return true;
+ if (lex_match (lexer, T_LPAREN) || lex_match_id (lexer, "WITHIN"))
+ {
+ lex_next_error (lexer, -1, -1,
+ "Nested variables are not yet implemented.");
+ goto error;
+ }
}
+ while (lex_match (lexer, T_ASTERISK));
- if (parse_nested_variable (lexer, glm))
- return true;
-
- return false;
-}
-
-
+ return iact;
-/* Parse a complete DESIGN specification.
- A design spec is a design term, optionally followed by a comma,
- and another design spec.
-*/
-static bool
-parse_design_spec (struct lexer *lexer, struct glm_spec *glm)
-{
- if (lex_token (lexer) == T_ENDCMD || lex_token (lexer) == T_SLASH)
- return true;
-
- if (! parse_design_term (lexer, glm))
- return false;
-
- lex_match (lexer, T_COMMA);
-
- return parse_design_spec (lexer, glm);
+error:
+ interaction_destroy (iact);
+ return NULL;
}
-