X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fmatrix.c;h=18fcfdd8197de1a373d263fe2aa86a35c3285867;hb=8d33786b32ea7f1d709c33d5a7c2c442fbeaafff;hp=bcae19971750704affa257ef127ca17693ed9b53;hpb=f4c07cacab73e6f4b72e6393f394e990501b5916;p=pspp diff --git a/src/language/stats/matrix.c b/src/language/stats/matrix.c index bcae199717..18fcfdd819 100644 --- a/src/language/stats/matrix.c +++ b/src/language/stats/matrix.c @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -53,12 +54,14 @@ #include "libpspp/string-array.h" #include "libpspp/stringi-set.h" #include "libpspp/u8-line.h" +#include "math/distributions.h" #include "math/random.h" #include "output/driver.h" #include "output/output-item.h" #include "output/pivot-table.h" #include "gl/c-ctype.h" +#include "gl/c-strcase.h" #include "gl/ftoastr.h" #include "gl/minmax.h" #include "gl/xsize.h" @@ -81,10 +84,15 @@ struct msave_common struct string_array variables; struct string_array fnames; struct string_array snames; - bool has_factors; - bool has_splits; size_t n_varnames; + /* Collects and owns factors and splits. The individual msave_command + structs point to these but do not own them. */ + struct matrix_expr **factors; + size_t n_factors, allocated_factors; + struct matrix_expr **splits; + size_t n_splits, allocated_splits; + /* Execution state. */ struct dictionary *dict; struct casewriter *writer; @@ -105,6 +113,22 @@ struct write_file struct u8_line *held; }; +struct save_file + { + struct file_handle *file; + struct dataset *dataset; + + /* Parameters from parsing the first SAVE command for 'file'. */ + struct string_array variables; + struct matrix_expr *names; + struct stringi_set strings; + + /* Results from the first (only) attempt to open this save_file. */ + bool error; + struct casewriter *writer; + struct dictionary *dict; + }; + struct matrix_state { struct dataset *dataset; @@ -112,7 +136,6 @@ struct matrix_state struct lexer *lexer; struct hmap vars; bool in_loop; - struct file_handle *prev_save_outfile; struct msave_common *common; struct file_handle *prev_read_file; @@ -122,6 +145,10 @@ struct matrix_state struct file_handle *prev_write_file; struct write_file **write_files; size_t n_write_files; + + struct file_handle *prev_save_file; + struct save_file **save_files; + size_t n_save_files; }; static struct matrix_var * @@ -153,87 +180,187 @@ matrix_var_set (struct matrix_var *var, gsl_matrix *value) var->value = value; } -#define MATRIX_FUNCTIONS \ - F(ABS, m_m) \ - F(ALL, d_m) \ - F(ANY, d_m) \ - F(ARSIN, m_m) \ - F(ARTAN, m_m) \ - F(BLOCK, m_any) \ - F(CHOL, m_m) \ - F(CMIN, m_m) \ - F(CMAX, m_m) \ - F(COS, m_m) \ - F(CSSQ, m_m) \ - F(CSUM, m_m) \ - F(DESIGN, m_m) \ - F(DET, d_m) \ - F(DIAG, m_m) \ - F(EVAL, m_m) \ - F(EXP, m_m) \ - F(GINV, m_m) \ - F(GRADE, m_m) \ - F(GSCH, m_m) \ - F(IDENT, IDENT) \ - F(INV, m_m) \ - F(KRONEKER, m_mm) \ - F(LG10, m_m) \ - F(LN, m_m) \ - F(MAGIC, m_d) \ - F(MAKE, m_ddd) \ - F(MDIAG, m_v) \ - F(MMAX, d_m) \ - F(MMIN, d_m) \ - F(MOD, m_md) \ - F(MSSQ, d_m) \ - F(MSUM, d_m) \ - F(NCOL, d_m) \ - F(NROW, d_m) \ - F(RANK, d_m) \ - F(RESHAPE, m_mdd) \ - F(RMAX, m_m) \ - F(RMIN, m_m) \ - F(RND, m_m) \ - F(RNKORDER, m_m) \ - F(RSSQ, m_m) \ - F(RSUM, m_m) \ - F(SIN, m_m) \ - F(SOLVE, m_mm) \ - F(SQRT, m_m) \ - F(SSCP, m_m) \ - F(SVAL, m_m) \ - F(SWEEP, m_md) \ - F(T, m_m) \ - F(TRACE, d_m) \ - F(TRANSPOS, m_m) \ - F(TRUNC, m_m) \ - F(UNIFORM, m_dd) +#define MATRIX_FUNCTIONS \ + F(ABS, "ABS", m_m_e, NULL) \ + F(ALL, "ALL", d_m, NULL) \ + F(ANY, "ANY", d_m, NULL) \ + F(ARSIN, "ARSIN", m_m_e, "a[-1,1]") \ + F(ARTAN, "ARTAN", m_m_e, NULL) \ + F(BLOCK, "BLOCK", m_any, NULL) \ + F(CHOL, "CHOL", m_m, NULL) \ + F(CMIN, "CMIN", m_m, NULL) \ + F(CMAX, "CMAX", m_m, NULL) \ + F(COS, "COS", m_m_e, NULL) \ + F(CSSQ, "CSSQ", m_m, NULL) \ + F(CSUM, "CSUM", m_m, NULL) \ + F(DESIGN, "DESIGN", m_m, NULL) \ + F(DET, "DET", d_m, NULL) \ + F(DIAG, "DIAG", m_m, NULL) \ + F(EVAL, "EVAL", m_m, NULL) \ + F(EXP, "EXP", m_m_e, NULL) \ + F(GINV, "GINV", m_m, NULL) \ + F(GRADE, "GRADE", m_m, NULL) \ + F(GSCH, "GSCH", m_m, NULL) \ + F(IDENT, "IDENT", IDENT, NULL) \ + F(INV, "INV", m_m, NULL) \ + F(KRONEKER, "KRONEKER", m_mm, NULL) \ + F(LG10, "LG10", m_m_e, "a>0") \ + F(LN, "LN", m_m_e, "a>0") \ + F(MAGIC, "MAGIC", m_d, NULL) \ + F(MAKE, "MAKE", m_ddd, NULL) \ + F(MDIAG, "MDIAG", m_v, NULL) \ + F(MMAX, "MMAX", d_m, NULL) \ + F(MMIN, "MMIN", d_m, NULL) \ + F(MOD, "MOD", m_md, NULL) \ + F(MSSQ, "MSSQ", d_m, NULL) \ + F(MSUM, "MSUM", d_m, NULL) \ + F(NCOL, "NCOL", d_m, NULL) \ + F(NROW, "NROW", d_m, NULL) \ + F(RANK, "RANK", d_m, NULL) \ + F(RESHAPE, "RESHAPE", m_mdd, NULL) \ + F(RMAX, "RMAX", m_m, NULL) \ + F(RMIN, "RMIN", m_m, NULL) \ + F(RND, "RND", m_m_e, NULL) \ + F(RNKORDER, "RNKORDER", m_m, NULL) \ + F(RSSQ, "RSSQ", m_m, NULL) \ + F(RSUM, "RSUM", m_m, NULL) \ + F(SIN, "SIN", m_m_e, NULL) \ + F(SOLVE, "SOLVE", m_mm, NULL) \ + F(SQRT, "SQRT", m_m, NULL) \ + F(SSCP, "SSCP", m_m, NULL) \ + F(SVAL, "SVAL", m_m, NULL) \ + F(SWEEP, "SWEEP", m_md, NULL) \ + F(T, "T", m_m, NULL) \ + F(TRACE, "TRACE", d_m, NULL) \ + F(TRANSPOS, "TRANSPOS", m_m, NULL) \ + F(TRUNC, "TRUNC", m_m_e, NULL) \ + F(UNIFORM, "UNIFORM", m_dd, NULL) \ + F(PDF_BETA, "PDF.BETA", m_mdd_e, "a[0,1] b>0 c>0") \ + F(CDF_BETA, "CDF.BETA", m_mdd_e, "a[0,1] b>0 c>0") \ + F(IDF_BETA, "IDF.BETA", m_mdd_e, "a[0,1] b>0 c>0") \ + F(RV_BETA, "RV.BETA", d_dd, "a>0 b>0") \ + F(NCDF_BETA, "NCDF.BETA", m_mddd_e, "a>=0 b>0 c>0 d>0") \ + F(NPDF_BETA, "NCDF.BETA", m_mddd_e, "a>=0 b>0 c>0 d>0") \ + /* XXX CDF.BVNOR */ \ + F(PDF_BVNOR, "PDF.BVNOR", m_mdd_e, "c[-1,1]") \ + F(CDF_CAUCHY, "CDF.CAUCHY", m_mdd_e, "c>0") \ + F(IDF_CAUCHY, "IDF.CAUCHY", m_mdd_e, "a(0,1) c>0") \ + F(PDF_CAUCHY, "PDF.CAUCHY", m_mdd_e, "c>0") \ + F(RV_CAUCHY, "RV.CAUCHY", d_dd, "b>0") \ + F(CDF_CHISQ, "CDF.CHISQ", m_md_e, "a>=0 b>0") \ + F(IDF_CHISQ, "IDF.CHISQ", m_md_e, "a[0,1) b>0") \ + F(PDF_CHISQ, "PDF.CHISQ", m_md_e, "a>=0 b>0") \ + F(RV_CHISQ, "RV.CHISQ", d_d, "a>0") \ + F(SIG_CHISQ, "SIG.CHISQ", m_md_e, "a>=0 b>0") \ + F(CDF_EXP, "CDF.EXP", m_md_e, "a>=0 b>=0") \ + F(IDF_EXP, "IDF.EXP", m_md_e, "a[0,1) b>0") \ + F(PDF_EXP, "PDF.EXP", m_md_e, "a>=0 b>0") \ + F(RV_EXP, "RV.EXP", d_d, "a>0") \ + F(PDF_XPOWER, "PDF.XPOWER", m_mdd_e, "b>0 c>=0") \ + F(RV_XPOWER, "RV.XPOWER", d_dd, "a>0 c>=0") + +struct matrix_function_properties + { + const char *name; + const char *constraints; + }; +/* +() +(a) +(a > 0) +(a >= 0) +(a > 0 && a < 1) +(a > 0 && a <= 1) +(a >= 0 && a <= 1) +(a > 0 && a < 1, b > 0) +(a >= 0 && a < 1, b > 0) +(a >= 0 && a <= 1, b > 0) +(a == 0 || a == 1, b >= 0 && b <= 1) +(a >= 0 && a < 1, b > 0, c > 0) +(a >= 0 && a <= 1, b > 0, c > 0) +(a >= 0 && a < 1, b >= 1, c >= 1) +(a >= 0 && a <= 1, b, c) +(a > 0 && a < 1, b, c > 0) +(a >= 0 && a <= 1, b <= c, c) +(a > 0 && a == floor (a), +(a >= 0 && a == floor (a) && a <= b, +(a >= 0 && a == floor (a) && a <= d, +(a >= 0 && a == floor (a), b > 0) +(a > 0 && a == floor (a), b >= 0 && b <= 1) +(a > 0, b > 0) +(a > 0, b >= 0) +(a >= 0, b > 0) +(a >= 0, b > 0, c) +(a > 0, b > 0, c > 0) +(a >= 0, b > 0, c > 0) +(a >= 0, b > 0, c > 0, d > 0) +(a >= 0, b > 0, c > 0, d >= 0) +(a > 0, b >= 1, c >= 1) +(a >= 1 && a == floor (a), b >= 0 && b <= 1) +(a >= 1, b > 0 && b <= 1) +(a >= 1, b == floor (b), c > 0 && c <= 1) +(a, b) +(a, b > 0) +(a, b > 0 && b <= 2) +(a, b > 0 && b <= 2, c >= -1 && c <= 1) +(a, b > 0 && b == floor (c), c >= 0 && c <= 1) +(a, b > 0, c) +(a, b > 0, c > 0) +(a, b > 0, c >= 0) +(a <= b, b) +(a >= b, b > 0, c > 0) +(a, b, c) +(a, b, c > 0) +(a, b, c >= -1 && c <= 1) +(a <= c, b <= a, c) +(a == floor (a), b > 0 && b <= 1) +b > 0 && b == floor (b), +b > 0 && b == floor (b) && b <= a, +c >= 0 && c <= 1) +c > 0 && c == floor (c) && c <= a) +c > 0 && c == floor (c) && c <= b, +d > 0 && d == floor (d) && d <= b) +*/ + +enum { d_d_MIN_ARGS = 1, d_d_MAX_ARGS = 1 }; +enum { d_dd_MIN_ARGS = 2, d_dd_MAX_ARGS = 2 }; enum { m_d_MIN_ARGS = 1, m_d_MAX_ARGS = 1 }; enum { m_dd_MIN_ARGS = 2, m_dd_MAX_ARGS = 2 }; enum { m_ddd_MIN_ARGS = 3, m_ddd_MAX_ARGS = 3 }; enum { m_m_MIN_ARGS = 1, m_m_MAX_ARGS = 1 }; +enum { m_m_e_MIN_ARGS = 1, m_m_e_MAX_ARGS = 1 }; enum { m_md_MIN_ARGS = 2, m_md_MAX_ARGS = 2 }; +enum { m_md_e_MIN_ARGS = 2, m_md_e_MAX_ARGS = 2 }; enum { m_mdd_MIN_ARGS = 3, m_mdd_MAX_ARGS = 3 }; +enum { m_mdd_e_MIN_ARGS = 3, m_mdd_e_MAX_ARGS = 3 }; +enum { m_mddd_e_MIN_ARGS = 4, m_mddd_e_MAX_ARGS = 4 }; enum { m_mm_MIN_ARGS = 2, m_mm_MAX_ARGS = 2 }; enum { m_v_MIN_ARGS = 1, m_v_MAX_ARGS = 1 }; enum { d_m_MIN_ARGS = 1, d_m_MAX_ARGS = 1 }; enum { m_any_MIN_ARGS = 1, m_any_MAX_ARGS = INT_MAX }; enum { IDENT_MIN_ARGS = 1, IDENT_MAX_ARGS = 2 }; +typedef double matrix_proto_d_d (double); +typedef double matrix_proto_d_dd (double, double); typedef gsl_matrix *matrix_proto_m_d (double); typedef gsl_matrix *matrix_proto_m_dd (double, double); typedef gsl_matrix *matrix_proto_m_ddd (double, double, double); typedef gsl_matrix *matrix_proto_m_m (gsl_matrix *); +typedef double matrix_proto_m_m_e (double); typedef gsl_matrix *matrix_proto_m_md (gsl_matrix *, double); +typedef double matrix_proto_m_md_e (double, double); typedef gsl_matrix *matrix_proto_m_mdd (gsl_matrix *, double, double); +typedef double matrix_proto_m_mdd_e (double, double, double); +typedef double matrix_proto_m_mddd_e (double, double, double, double); typedef gsl_matrix *matrix_proto_m_mm (gsl_matrix *, gsl_matrix *); typedef gsl_matrix *matrix_proto_m_v (gsl_vector *); typedef double matrix_proto_d_m (gsl_matrix *); typedef gsl_matrix *matrix_proto_m_any (gsl_matrix *[], size_t n); typedef gsl_matrix *matrix_proto_IDENT (double, double); -#define F(NAME, PROTOTYPE) static matrix_proto_##PROTOTYPE matrix_eval_##NAME; +#define F(ENUM, STRING, PROTO, CONSTRAINTS) \ + static matrix_proto_##PROTO matrix_eval_##ENUM; MATRIX_FUNCTIONS #undef F @@ -244,7 +371,7 @@ struct matrix_expr enum matrix_op { /* Functions. */ -#define F(NAME, PROTOTYPE) MOP_F_##NAME, +#define F(ENUM, STRING, PROTO, CONSTRAINTS) MOP_F_##ENUM, MATRIX_FUNCTIONS #undef F @@ -319,7 +446,7 @@ matrix_expr_destroy (struct matrix_expr *e) switch (e->op) { -#define F(NAME, PROTOTYPE) case MOP_F_##NAME: +#define F(ENUM, STRING, PROTO, CONSTRAINTS) case MOP_F_##ENUM: MATRIX_FUNCTIONS #undef F case MOP_NEGATE: @@ -478,12 +605,10 @@ to_vector (gsl_matrix *m) } -static gsl_matrix * -matrix_eval_ABS (gsl_matrix *m) +static double +matrix_eval_ABS (double d) { - MATRIX_FOR_ALL_ELEMENTS (d, y, x, m) - *d = fabs (*d); - return m; + return fabs (d); } static double @@ -504,20 +629,16 @@ matrix_eval_ANY (gsl_matrix *m) return 0.0; } -static gsl_matrix * -matrix_eval_ARSIN (gsl_matrix *m) +static double +matrix_eval_ARSIN (double d) { - MATRIX_FOR_ALL_ELEMENTS (d, y, x, m) - *d = asin (*d); - return m; + return asin (d); } -static gsl_matrix * -matrix_eval_ARTAN (gsl_matrix *m) +static double +matrix_eval_ARTAN (double d) { - MATRIX_FOR_ALL_ELEMENTS (d, y, x, m) - *d = atan (*d); - return m; + return atan (d); } static gsl_matrix * @@ -599,12 +720,10 @@ matrix_eval_CMIN (gsl_matrix *m) return matrix_eval_col_extremum (m, true); } -static gsl_matrix * -matrix_eval_COS (gsl_matrix *m) +static double +matrix_eval_COS (double d) { - MATRIX_FOR_ALL_ELEMENTS (d, y, x, m) - *d = cos (*d); - return m; + return cos (d); } static gsl_matrix * @@ -764,12 +883,10 @@ matrix_eval_EVAL (gsl_matrix *m) return eval; } -static gsl_matrix * -matrix_eval_EXP (gsl_matrix *m) +static double +matrix_eval_EXP (double d) { - MATRIX_FOR_ALL_ELEMENTS (d, y, x, m) - *d = exp (*d); - return m; + return exp (d); } /* From https://gist.github.com/turingbirds/5e99656e08dbe1324c99, where it was @@ -1015,20 +1132,16 @@ matrix_eval_KRONEKER (gsl_matrix *a, gsl_matrix *b) return k; } -static gsl_matrix * -matrix_eval_LG10 (gsl_matrix *m) +static double +matrix_eval_LG10 (double d) { - MATRIX_FOR_ALL_ELEMENTS (d, y, x, m) - *d = log10 (*d); - return m; + return log10 (d); } -static gsl_matrix * -matrix_eval_LN (gsl_matrix *m) +static double +matrix_eval_LN (double d) { - MATRIX_FOR_ALL_ELEMENTS (d, y, x, m) - *d = log (*d); - return m; + return log (d); } static void @@ -1354,12 +1467,10 @@ matrix_eval_RMIN (gsl_matrix *m) return matrix_eval_row_extremum (m, true); } -static gsl_matrix * -matrix_eval_RND (gsl_matrix *m) +static double +matrix_eval_RND (double d) { - MATRIX_FOR_ALL_ELEMENTS (d, y, x, m) - *d = rint (*d); - return m; + return rint (d); } struct rank @@ -1440,12 +1551,10 @@ matrix_eval_RSUM (gsl_matrix *m) return matrix_eval_row_sum (m, false); } -static gsl_matrix * -matrix_eval_SIN (gsl_matrix *m) +static double +matrix_eval_SIN (double d) { - MATRIX_FOR_ALL_ELEMENTS (d, y, x, m) - *d = sin (*d); - return m; + return sin (d); } static gsl_matrix * @@ -1603,12 +1712,10 @@ matrix_eval_TRANSPOS (gsl_matrix *m) } } -static gsl_matrix * -matrix_eval_TRUNC (gsl_matrix *m) +static double +matrix_eval_TRUNC (double d) { - MATRIX_FOR_ALL_ELEMENTS (d, y, x, m) - *d = trunc (*d); - return m; + return trunc (d); } static gsl_matrix * @@ -1633,6 +1740,138 @@ matrix_eval_UNIFORM (double r_, double c_) return m; } +static double +matrix_eval_PDF_BETA (double x, double a, double b) +{ + return gsl_ran_beta_pdf (x, a, b); +} + +static double +matrix_eval_CDF_BETA (double x, double a, double b) +{ + return gsl_cdf_beta_P (x, a, b); +} + +static double +matrix_eval_IDF_BETA (double P, double a, double b) +{ + return gsl_cdf_beta_Pinv (P, a, b); +} + +static double +matrix_eval_RV_BETA (double a, double b) +{ + return gsl_ran_beta (get_rng (), a, b); +} + +static double +matrix_eval_NCDF_BETA (double x, double a, double b, double lambda) +{ + return ncdf_beta (x, a, b, lambda); +} + +static double +matrix_eval_NPDF_BETA (double x, double a, double b, double lambda) +{ + return npdf_beta (x, a, b, lambda); +} + +static double +matrix_eval_PDF_BVNOR (double x0, double x1, double r) +{ + return gsl_ran_bivariate_gaussian_pdf (x0, x1, 1, 1, r); +} + +static double +matrix_eval_CDF_CAUCHY (double x, double a, double b) +{ + return gsl_cdf_cauchy_P ((x - a) / b, 1); +} + +static double +matrix_eval_IDF_CAUCHY (double P, double a, double b) +{ + return a + b * gsl_cdf_cauchy_Pinv (P, 1); +} + +static double +matrix_eval_PDF_CAUCHY (double x, double a, double b) +{ + return gsl_ran_cauchy_pdf ((x - a) / b, 1) / b; +} + +static double +matrix_eval_RV_CAUCHY (double a, double b) +{ + return a + b * gsl_ran_cauchy (get_rng (), 1); +} + +static double +matrix_eval_CDF_CHISQ (double x, double df) +{ + return gsl_cdf_chisq_P (x, df); +} + +static double +matrix_eval_IDF_CHISQ (double P, double df) +{ + return gsl_cdf_chisq_Pinv (P, df); +} + +static double +matrix_eval_PDF_CHISQ (double x, double df) +{ + return gsl_ran_chisq_pdf (x, df); +} + +static double +matrix_eval_RV_CHISQ (double df) +{ + return gsl_ran_chisq (get_rng (), df); +} + +static double +matrix_eval_SIG_CHISQ (double x, double df) +{ + return gsl_cdf_chisq_Q (x, df); +} + +static double +matrix_eval_CDF_EXP (double x, double a) +{ + return gsl_cdf_exponential_P (x, 1. / a); +} + +static double +matrix_eval_IDF_EXP (double P, double a) +{ + return gsl_cdf_exponential_Pinv (P, 1. / a); +} + +static double +matrix_eval_PDF_EXP (double x, double a) +{ + return gsl_ran_exponential_pdf (x, 1. / a); +} + +static double +matrix_eval_RV_EXP (double a) +{ + return gsl_ran_exponential (get_rng (), 1. / a); +} + +static double +matrix_eval_PDF_XPOWER (double x, double a, double b) +{ + return gsl_ran_exppow_pdf (x, a, b); +} + +static double +matrix_eval_RV_XPOWER (double a, double b) +{ + return gsl_ran_exppow (get_rng (), a, b); +} + struct matrix_function { const char *name; @@ -1642,12 +1881,58 @@ struct matrix_function static struct matrix_expr *matrix_parse_expr (struct matrix_state *); +static bool +word_matches (const char **test, const char **name) +{ + size_t test_len = strcspn (*test, "."); + size_t name_len = strcspn (*name, "."); + if (test_len == name_len) + { + if (buf_compare_case (*test, *name, test_len)) + return false; + } + else if (test_len < 3 || test_len > name_len) + return false; + else + { + if (buf_compare_case (*test, *name, test_len)) + return false; + } + + *test += test_len; + *name += name_len; + if (**test != **name) + return false; + + if (**test == '.') + { + (*test)++; + (*name)++; + } + return true; +} + +/* Returns 0 if TOKEN and FUNC do not match, + 1 if TOKEN is an acceptable abbreviation for FUNC, + 2 if TOKEN equals FUNC. */ +static int +compare_function_names (const char *token_, const char *func_) +{ + const char *token = token_; + const char *func = func_; + while (*token || *func) + if (!word_matches (&token, &func)) + return 0; + return !c_strcasecmp (token_, func_) ? 2 : 1; +} + static const struct matrix_function * matrix_parse_function_name (const char *token) { static const struct matrix_function functions[] = { -#define F(NAME, PROTO) { #NAME, MOP_F_##NAME, PROTO##_MIN_ARGS, PROTO##_MAX_ARGS }, +#define F(ENUM, STRING, PROTO, CONSTRAINTS) \ + { STRING, MOP_F_##ENUM, PROTO##_MIN_ARGS, PROTO##_MAX_ARGS }, MATRIX_FUNCTIONS #undef F }; @@ -1655,7 +1940,7 @@ matrix_parse_function_name (const char *token) for (size_t i = 0; i < N_FUNCTIONS; i++) { - if (lex_id_match_n (ss_cstr (functions[i].name), ss_cstr (token), 3)) + if (compare_function_names (token, functions[i].name) > 0) return &functions[i]; } return NULL; @@ -2192,7 +2477,7 @@ matrix_op_eval (enum matrix_op op, double a, double b) case MOP_OR: return (a > 0) || (b > 0); case MOP_XOR: return (a > 0) != (b > 0); -#define F(NAME, PROTOTYPE) case MOP_F_##NAME: +#define F(ENUM, STRING, PROTO, CONSTRAINTS) case MOP_F_##ENUM: MATRIX_FUNCTIONS #undef F case MOP_NEGATE: @@ -2237,7 +2522,7 @@ matrix_op_name (enum matrix_op op) case MOP_OR: return "OR"; case MOP_XOR: return "XOR"; -#define F(NAME, PROTOTYPE) case MOP_F_##NAME: +#define F(ENUM, STRING, PROTO, CONSTRAINTS) case MOP_F_##ENUM: MATRIX_FUNCTIONS #undef F case MOP_NEGATE: @@ -2527,6 +2812,7 @@ matrix_to_vector (gsl_matrix *m) assert (!v.owner); v.owner = 1; m->owner = 0; + gsl_matrix_free (m); return xmemdup (&v, sizeof v); } @@ -2694,10 +2980,10 @@ matrix_expr_evaluate_mat_index (gsl_matrix *sm, gsl_matrix *im0, return dm; } -#define F(NAME, PROTOTYPE) \ - static gsl_matrix *matrix_expr_evaluate_##PROTOTYPE ( \ - const char *name, gsl_matrix *[], size_t, \ - matrix_proto_##PROTOTYPE *); +#define F(ENUM, STRING, PROTO, CONSTRAINTS) \ + static gsl_matrix *matrix_expr_evaluate_##PROTO ( \ + const struct matrix_function_properties *, gsl_matrix *[], size_t, \ + matrix_proto_##PROTO *); MATRIX_FUNCTIONS #undef F @@ -2739,41 +3025,194 @@ to_scalar_args (const char *name, gsl_matrix *subs[], size_t n_subs, double d[]) return true; } +static int +parse_constraint_value (const char **constraintsp) +{ + char *tail; + long retval = strtol (*constraintsp, &tail, 10); + *constraintsp = tail; + return retval; +} + +static bool +check_constraints (const struct matrix_function_properties *props, + gsl_matrix *args[], size_t n_args) +{ + const char *constraints = props->constraints; + if (!constraints) + return true; + + size_t arg_index = SIZE_MAX; + while (*constraints) + { + if (*constraints >= 'a' && *constraints <= 'd') + { + arg_index = *constraints++ - 'a'; + assert (arg_index < n_args); + } + else if (*constraints == '[' || *constraints == '(') + { + assert (arg_index < n_args); + bool open_lower = *constraints++ == '('; + int minimum = parse_constraint_value (&constraints); + assert (*constraints == ','); + constraints++; + int maximum = parse_constraint_value (&constraints); + assert (*constraints == ']' || *constraints == ')'); + bool open_upper = *constraints++ == ')'; + + MATRIX_FOR_ALL_ELEMENTS (d, y, x, args[arg_index]) + if ((open_lower ? *d <= minimum : *d < minimum) + || (open_upper ? *d >= maximum : *d > maximum)) + { + if (!is_scalar (args[arg_index])) + msg (ME, _("Row %zu, column %zu of argument %zu to matrix " + "function %s has value %g, which is outside " + "the valid range %c%d,%d%c."), + y + 1, x + 1, arg_index + 1, props->name, *d, + open_lower ? '(' : '[', + minimum, maximum, + open_upper ? ')' : ']'); + else + msg (ME, _("Argument %zu to matrix function %s has value %g, " + "which is outside the valid range %c%d,%d%c."), + arg_index + 1, props->name, *d, + open_lower ? '(' : '[', + minimum, maximum, + open_upper ? ')' : ']'); + return false; + } + } + else if (*constraints == '>' || *constraints == '<') + { + bool greater = *constraints++ == '>'; + bool equal; + if (*constraints == '=') + { + equal = true; + constraints++; + } + else + equal = false; + int comparand = parse_constraint_value (&constraints); + + assert (arg_index < n_args); + MATRIX_FOR_ALL_ELEMENTS (d, y, x, args[arg_index]) + if (greater + ? (equal ? !(*d >= comparand) : !(*d > comparand)) + : (equal ? !(*d <= comparand) : !(*d < comparand))) + { + struct string s = DS_EMPTY_INITIALIZER; + if (!is_scalar (args[arg_index])) + ds_put_format (&s, _("Row %zu, column %zu of argument %zu " + "to matrix function %s has " + "invalid value %g."), + y + 1, x + 1, arg_index + 1, props->name, *d); + else + ds_put_format (&s, _("Argument %zu to matrix function %s " + "has invalid value %g."), + arg_index + 1, props->name, *d); + + ds_put_cstr (&s, " "); + if (greater && equal) + ds_put_format (&s, _("This argument must be greater than or " + "equal to %d."), comparand); + else if (greater && !equal) + ds_put_format (&s, _("This argument must be greater than %d."), + comparand); + else if (equal) + ds_put_format (&s, _("This argument must be less than or " + "equal to %d."), comparand); + else + ds_put_format (&s, _("This argument must be less than %d."), + comparand); + msg (ME, ds_cstr (&s)); + ds_destroy (&s); + return false; + } + } + else + { + assert (*constraints == ' '); + constraints++; + } + } + return true; +} + +static gsl_matrix * +matrix_expr_evaluate_d_d (const struct matrix_function_properties *props, + gsl_matrix *subs[], size_t n_subs, + matrix_proto_d_d *f) +{ + assert (n_subs == 1); + + double d; + if (!to_scalar_args (props->name, subs, n_subs, &d)) + return NULL; + + if (!check_constraints (props, subs, n_subs)) + return NULL; + + gsl_matrix *m = gsl_matrix_alloc (1, 1); + gsl_matrix_set (m, 0, 0, f (d)); + return m; +} + +static gsl_matrix * +matrix_expr_evaluate_d_dd (const struct matrix_function_properties *props, + gsl_matrix *subs[], size_t n_subs, + matrix_proto_d_dd *f) +{ + assert (n_subs == 2); + + double d[2]; + if (!to_scalar_args (props->name, subs, n_subs, d)) + return NULL; + + if (!check_constraints (props, subs, n_subs)) + return NULL; + + gsl_matrix *m = gsl_matrix_alloc (1, 1); + gsl_matrix_set (m, 0, 0, f (d[0], d[1])); + return m; +} + static gsl_matrix * -matrix_expr_evaluate_m_d (const char *name, +matrix_expr_evaluate_m_d (const struct matrix_function_properties *props, gsl_matrix *subs[], size_t n_subs, matrix_proto_m_d *f) { assert (n_subs == 1); double d; - return to_scalar_args (name, subs, n_subs, &d) ? f(d) : NULL; + return to_scalar_args (props->name, subs, n_subs, &d) ? f (d) : NULL; } static gsl_matrix * -matrix_expr_evaluate_m_dd (const char *name, +matrix_expr_evaluate_m_dd (const struct matrix_function_properties *props, gsl_matrix *subs[], size_t n_subs, matrix_proto_m_dd *f) { assert (n_subs == 2); double d[2]; - return to_scalar_args (name, subs, n_subs, d) ? f(d[0], d[1]) : NULL; + return to_scalar_args (props->name, subs, n_subs, d) ? f(d[0], d[1]) : NULL; } static gsl_matrix * -matrix_expr_evaluate_m_ddd (const char *name, +matrix_expr_evaluate_m_ddd (const struct matrix_function_properties *props, gsl_matrix *subs[], size_t n_subs, matrix_proto_m_ddd *f) { assert (n_subs == 3); double d[3]; - return to_scalar_args (name, subs, n_subs, d) ? f(d[0], d[1], d[2]) : NULL; + return to_scalar_args (props->name, subs, n_subs, d) ? f(d[0], d[1], d[2]) : NULL; } static gsl_matrix * -matrix_expr_evaluate_m_m (const char *name UNUSED, +matrix_expr_evaluate_m_m (const struct matrix_function_properties *props UNUSED, gsl_matrix *subs[], size_t n_subs, matrix_proto_m_m *f) { @@ -2782,50 +3221,123 @@ matrix_expr_evaluate_m_m (const char *name UNUSED, } static gsl_matrix * -matrix_expr_evaluate_m_md (const char *name UNUSED, +matrix_expr_evaluate_m_m_e (const struct matrix_function_properties *props, + gsl_matrix *subs[], size_t n_subs, + matrix_proto_m_m_e *f) +{ + assert (n_subs == 1); + + if (!check_constraints (props, subs, n_subs)) + return NULL; + + MATRIX_FOR_ALL_ELEMENTS (a, y, x, subs[0]) + *a = f (*a); + return subs[0]; +} + +static gsl_matrix * +matrix_expr_evaluate_m_md (const struct matrix_function_properties *props UNUSED, gsl_matrix *subs[], size_t n_subs, matrix_proto_m_md *f) { assert (n_subs == 2); - if (!check_scalar_arg (name, subs, 1)) + if (!check_scalar_arg (props->name, subs, 1)) return NULL; return f (subs[0], to_scalar (subs[1])); } static gsl_matrix * -matrix_expr_evaluate_m_mdd (const char *name UNUSED, +matrix_expr_evaluate_m_md_e (const struct matrix_function_properties *props, + gsl_matrix *subs[], size_t n_subs, + matrix_proto_m_md_e *f) +{ + assert (n_subs == 2); + if (!check_scalar_arg (props->name, subs, 1)) + return NULL; + + if (!check_constraints (props, subs, n_subs)) + return NULL; + + double b = to_scalar (subs[1]); + MATRIX_FOR_ALL_ELEMENTS (a, y, x, subs[0]) + *a = f (*a, b); + return subs[0]; +} + +static gsl_matrix * +matrix_expr_evaluate_m_mdd (const struct matrix_function_properties *props UNUSED, gsl_matrix *subs[], size_t n_subs, matrix_proto_m_mdd *f) { assert (n_subs == 3); - if (!check_scalar_arg (name, subs, 1) || !check_scalar_arg (name, subs, 2)) + if (!check_scalar_arg (props->name, subs, 1) || !check_scalar_arg (props->name, subs, 2)) return NULL; return f (subs[0], to_scalar (subs[1]), to_scalar (subs[2])); } static gsl_matrix * -matrix_expr_evaluate_m_mm (const char *name UNUSED, - gsl_matrix *subs[], size_t n_subs, - matrix_proto_m_mm *f) +matrix_expr_evaluate_m_mdd_e (const struct matrix_function_properties *props, + gsl_matrix *subs[], size_t n_subs, + matrix_proto_m_mdd_e *f) { - assert (n_subs == 2); - return f (subs[0], subs[1]); + assert (n_subs == 3); + if (!check_scalar_arg (props->name, subs, 1) || !check_scalar_arg (props->name, subs, 2)) + return NULL; + + if (!check_constraints (props, subs, n_subs)) + return NULL; + + double b = to_scalar (subs[1]); + double c = to_scalar (subs[2]); + MATRIX_FOR_ALL_ELEMENTS (a, y, x, subs[0]) + *a = f (*a, b, c); + return subs[0]; } static gsl_matrix * -matrix_expr_evaluate_m_v (const char *name, - gsl_matrix *subs[], size_t n_subs, - matrix_proto_m_v *f) +matrix_expr_evaluate_m_mddd_e (const struct matrix_function_properties *props, + gsl_matrix *subs[], size_t n_subs, + matrix_proto_m_mddd_e *f) { - assert (n_subs == 1); - if (!check_vector_arg (name, subs, 0)) + assert (n_subs == 4); + for (size_t i = 1; i < 4; i++) + if (!check_scalar_arg (props->name, subs, i)) return NULL; - gsl_vector v = to_vector (subs[0]); - return f (&v); + + if (!check_constraints (props, subs, n_subs)) + return NULL; + + double b = to_scalar (subs[1]); + double c = to_scalar (subs[2]); + double d = to_scalar (subs[3]); + MATRIX_FOR_ALL_ELEMENTS (a, y, x, subs[0]) + *a = f (*a, b, c, d); + return subs[0]; +} + +static gsl_matrix * +matrix_expr_evaluate_m_mm (const struct matrix_function_properties *props UNUSED, + gsl_matrix *subs[], size_t n_subs, + matrix_proto_m_mm *f) +{ + assert (n_subs == 2); + return f (subs[0], subs[1]); +} + +static gsl_matrix * +matrix_expr_evaluate_m_v (const struct matrix_function_properties *props, + gsl_matrix *subs[], size_t n_subs, + matrix_proto_m_v *f) +{ + assert (n_subs == 1); + if (!check_vector_arg (props->name, subs, 0)) + return NULL; + gsl_vector v = to_vector (subs[0]); + return f (&v); } static gsl_matrix * -matrix_expr_evaluate_d_m (const char *name UNUSED, +matrix_expr_evaluate_d_m (const struct matrix_function_properties *props UNUSED, gsl_matrix *subs[], size_t n_subs, matrix_proto_d_m *f) { @@ -2837,7 +3349,7 @@ matrix_expr_evaluate_d_m (const char *name UNUSED, } static gsl_matrix * -matrix_expr_evaluate_m_any (const char *name UNUSED, +matrix_expr_evaluate_m_any (const struct matrix_function_properties *props UNUSED, gsl_matrix *subs[], size_t n_subs, matrix_proto_m_any *f) { @@ -2845,14 +3357,14 @@ matrix_expr_evaluate_m_any (const char *name UNUSED, } static gsl_matrix * -matrix_expr_evaluate_IDENT (const char *name, +matrix_expr_evaluate_IDENT (const struct matrix_function_properties *props, gsl_matrix *subs[], size_t n_subs, matrix_proto_IDENT *f) { assert (n_subs <= 2); double d[2]; - if (!to_scalar_args (name, subs, n_subs, d)) + if (!to_scalar_args (props->name, subs, n_subs, d)) return NULL; return f (d[0], d[n_subs - 1]); @@ -2911,11 +3423,16 @@ matrix_expr_evaluate (const struct matrix_expr *e) gsl_matrix *result = NULL; switch (e->op) { -#define F(NAME, PROTOTYPE) \ - case MOP_F_##NAME: \ - result = matrix_expr_evaluate_##PROTOTYPE (#NAME, \ - subs, e->n_subs, \ - matrix_eval_##NAME); \ +#define F(ENUM, STRING, PROTO, CONSTRAINTS) \ + case MOP_F_##ENUM: \ + { \ + static const struct matrix_function_properties props = { \ + .name = STRING, \ + .constraints = CONSTRAINTS, \ + }; \ + result = matrix_expr_evaluate_##PROTO (&props, subs, e->n_subs, \ + matrix_eval_##ENUM); \ + } \ break; MATRIX_FUNCTIONS #undef F @@ -3376,8 +3893,6 @@ struct matrix_cmd struct display_command { struct matrix_state *state; - bool dictionary; - bool status; } display; @@ -3391,10 +3906,7 @@ struct matrix_cmd struct save_command { struct matrix_expr *expression; - struct file_handle *outfile; - struct string_array *variables; - struct matrix_expr *names; - struct stringi_set strings; + struct save_file *sf; } save; @@ -3406,7 +3918,6 @@ struct matrix_cmd int c1, c2; enum fmt_type format; int w; - //int d; bool symmetric; bool reread; } @@ -3432,9 +3943,11 @@ struct matrix_cmd struct get_command { struct matrix_lvalue *dst; + struct dataset *dataset; struct file_handle *file; char *encoding; - struct string_array variables; + struct var_syntax *vars; + size_t n_vars; struct matrix_var *names; /* Treatment of missing values. */ @@ -3457,11 +3970,10 @@ struct matrix_cmd struct msave_command { struct msave_common *common; - char *varname_; struct matrix_expr *expr; const char *rowtype; - struct matrix_expr *factors; - struct matrix_expr *splits; + const struct matrix_expr *factors; + const struct matrix_expr *splits; } msave; @@ -3469,6 +3981,7 @@ struct matrix_cmd { struct matrix_state *state; struct file_handle *file; + char *encoding; struct stringi_set rowtypes; } mget; @@ -4230,32 +4743,18 @@ matrix_parse_break (struct matrix_state *s) static struct matrix_cmd * matrix_parse_display (struct matrix_state *s) { - bool dictionary = false; - bool status = false; - while (lex_token (s->lexer) == T_ID) + while (lex_token (s->lexer) != T_ENDCMD) { - if (lex_match_id (s->lexer, "DICTIONARY")) - dictionary = true; - else if (lex_match_id (s->lexer, "STATUS")) - status = true; - else + if (!lex_match_id (s->lexer, "DICTIONARY") + && !lex_match_id (s->lexer, "STATUS")) { lex_error_expecting (s->lexer, "DICTIONARY", "STATUS"); return NULL; } } - if (!dictionary && !status) - dictionary = status = true; struct matrix_cmd *cmd = xmalloc (sizeof *cmd); - *cmd = (struct matrix_cmd) { - .type = MCMD_DISPLAY, - .display = { - .state = s, - .dictionary = dictionary, - .status = status, - } - }; + *cmd = (struct matrix_cmd) { .type = MCMD_DISPLAY, .display = { s } }; return cmd; } @@ -4275,9 +4774,11 @@ matrix_cmd_execute_display (struct display_command *cmd) const struct matrix_state *s = cmd->state; struct pivot_table *table = pivot_table_create (N_("Matrix Variables")); - pivot_dimension_create ( - table, PIVOT_AXIS_COLUMN, N_("Property"), - N_("Rows"), N_("Columns"), N_("Size (kB)")); + struct pivot_dimension *attr_dimension + = pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Attribute")); + pivot_category_create_group (attr_dimension->root, N_("Dimension"), + N_("Rows"), N_("Columns")); + pivot_category_create_leaves (attr_dimension->root, N_("Size (kB)")); const struct matrix_var **vars = xmalloc (hmap_count (&s->vars) * sizeof *vars); size_t n_vars = 0; @@ -4302,6 +4803,7 @@ matrix_cmd_execute_display (struct display_command *cmd) for (size_t j = 0; j < sizeof values / sizeof *values; j++) pivot_table_put2 (table, j, i, pivot_value_new_integer (values[j])); } + free (vars); pivot_table_submit (table); } @@ -4344,18 +4846,199 @@ matrix_cmd_execute_release (struct release_command *cmd) } } +static struct save_file * +save_file_create (struct matrix_state *s, struct file_handle *fh, + struct string_array *variables, + struct matrix_expr *names, + struct stringi_set *strings) +{ + for (size_t i = 0; i < s->n_save_files; i++) + { + struct save_file *sf = s->save_files[i]; + if (fh_equal (sf->file, fh)) + { + fh_unref (fh); + + string_array_destroy (variables); + matrix_expr_destroy (names); + stringi_set_destroy (strings); + + return sf; + } + } + + struct save_file *sf = xmalloc (sizeof *sf); + *sf = (struct save_file) { + .file = fh, + .dataset = s->dataset, + .variables = *variables, + .names = names, + .strings = STRINGI_SET_INITIALIZER (sf->strings), + }; + + stringi_set_swap (&sf->strings, strings); + stringi_set_destroy (strings); + + s->save_files = xrealloc (s->save_files, + (s->n_save_files + 1) * sizeof *s->save_files); + s->save_files[s->n_save_files++] = sf; + return sf; +} + +static struct casewriter * +save_file_open (struct save_file *sf, gsl_matrix *m) +{ + if (sf->writer || sf->error) + { + if (sf->writer) + { + size_t n_variables = caseproto_get_n_widths ( + casewriter_get_proto (sf->writer)); + if (m->size2 != n_variables) + { + msg (ME, _("The first SAVE to %s within this matrix program " + "had %zu columns, so a %zu×%zu matrix cannot be " + "saved to it."), + sf->file == fh_inline_file () ? "*" : fh_get_name (sf->file), + n_variables, m->size1, m->size2); + return NULL; + } + } + return sf->writer; + } + + bool ok = true; + struct dictionary *dict = dict_create (get_default_encoding ()); + + /* Fill 'names' with user-specified names if there were any, either from + sf->variables or sf->names. */ + struct string_array names = { .n = 0 }; + if (sf->variables.n) + string_array_clone (&names, &sf->variables); + else if (sf->names) + { + gsl_matrix *nm = matrix_expr_evaluate (sf->names); + if (nm && is_vector (nm)) + { + gsl_vector nv = to_vector (nm); + for (size_t i = 0; i < nv.size; i++) + { + char *name = trimmed_string (gsl_vector_get (&nv, i)); + if (dict_id_is_valid (dict, name, true)) + string_array_append_nocopy (&names, name); + else + ok = false; + } + } + gsl_matrix_free (nm); + } + + struct stringi_set strings; + stringi_set_clone (&strings, &sf->strings); + + for (size_t i = 0; dict_get_var_cnt (dict) < m->size2; i++) + { + char tmp_name[64]; + const char *name; + if (i < names.n) + name = names.strings[i]; + else + { + snprintf (tmp_name, sizeof tmp_name, "COL%zu", i + 1); + name = tmp_name; + } + + int width = stringi_set_delete (&strings, name) ? 8 : 0; + struct variable *var = dict_create_var (dict, name, width); + if (!var) + { + msg (ME, _("Duplicate variable name %s in SAVE statement."), + name); + ok = false; + } + } + + if (!stringi_set_is_empty (&strings)) + { + size_t count = stringi_set_count (&strings); + const char *example = stringi_set_node_get_string ( + stringi_set_first (&strings)); + + if (count == 1) + msg (ME, _("The SAVE command STRINGS subcommand specifies an unknown " + "variable %s."), example); + else + msg (ME, ngettext ("The SAVE command STRINGS subcommand specifies %zu " + "unknown variable: %s.", + "The SAVE command STRINGS subcommand specifies %zu " + "unknown variables, including %s.", + count), + count, example); + ok = false; + } + stringi_set_destroy (&strings); + string_array_destroy (&names); + + if (!ok) + { + dict_unref (dict); + sf->error = true; + return NULL; + } + + if (sf->file == fh_inline_file ()) + sf->writer = autopaging_writer_create (dict_get_proto (dict)); + else + sf->writer = any_writer_open (sf->file, dict); + if (sf->writer) + sf->dict = dict; + else + { + dict_unref (dict); + sf->error = true; + } + return sf->writer; +} + +static void +save_file_destroy (struct save_file *sf) +{ + if (sf) + { + if (sf->file == fh_inline_file () && sf->writer && sf->dict) + { + dataset_set_dict (sf->dataset, sf->dict); + dataset_set_source (sf->dataset, casewriter_make_reader (sf->writer)); + } + else + { + casewriter_destroy (sf->writer); + dict_unref (sf->dict); + } + fh_unref (sf->file); + string_array_destroy (&sf->variables); + matrix_expr_destroy (sf->names); + stringi_set_destroy (&sf->strings); + free (sf); + } +} + static struct matrix_cmd * matrix_parse_save (struct matrix_state *s) { struct matrix_cmd *cmd = xmalloc (sizeof *cmd); *cmd = (struct matrix_cmd) { .type = MCMD_SAVE, - .save = { - .strings = STRINGI_SET_INITIALIZER (cmd->save.strings) - } + .save = { .expression = NULL }, }; + struct file_handle *fh = NULL; struct save_command *save = &cmd->save; + + struct string_array variables = STRING_ARRAY_INITIALIZER; + struct matrix_expr *names = NULL; + struct stringi_set strings = STRINGI_SET_INITIALIZER (strings); + save->expression = matrix_parse_exp (s); if (!save->expression) goto error; @@ -4366,11 +5049,11 @@ matrix_parse_save (struct matrix_state *s) { lex_match (s->lexer, T_EQUALS); - fh_unref (save->outfile); - save->outfile = (lex_match (s->lexer, T_ASTERISK) - ? fh_inline_file () - : fh_parse (s->lexer, FH_REF_FILE, s->session)); - if (!save->outfile) + fh_unref (fh); + fh = (lex_match (s->lexer, T_ASTERISK) + ? fh_inline_file () + : fh_parse (s->lexer, FH_REF_FILE, s->session)); + if (!fh) goto error; } else if (lex_match_id (s->lexer, "VARIABLES")) @@ -4386,10 +5069,8 @@ matrix_parse_save (struct matrix_state *s) if (!ok) goto error; - string_array_destroy (save->variables); - if (!save->variables) - save->variables = xmalloc (sizeof *save->variables); - *save->variables = (struct string_array) { + string_array_clear (&variables); + variables = (struct string_array) { .strings = names, .n = n, .allocated = n, @@ -4398,9 +5079,9 @@ matrix_parse_save (struct matrix_state *s) else if (lex_match_id (s->lexer, "NAMES")) { lex_match (s->lexer, T_EQUALS); - matrix_expr_destroy (save->names); - save->names = matrix_parse_exp (s); - if (!save->names) + matrix_expr_destroy (names); + names = matrix_parse_exp (s); + if (!names) goto error; } else if (lex_match_id (s->lexer, "STRINGS")) @@ -4408,7 +5089,7 @@ matrix_parse_save (struct matrix_state *s) lex_match (s->lexer, T_EQUALS); while (lex_token (s->lexer) == T_ID) { - stringi_set_insert (&save->strings, lex_tokcstr (s->lexer)); + stringi_set_insert (&strings, lex_tokcstr (s->lexer)); lex_get (s->lexer); if (!lex_match (s->lexer, T_COMMA)) break; @@ -4422,29 +5103,34 @@ matrix_parse_save (struct matrix_state *s) } } - if (!save->outfile) + if (!fh) { - if (s->prev_save_outfile) - save->outfile = fh_ref (s->prev_save_outfile); + if (s->prev_save_file) + fh = fh_ref (s->prev_save_file); else { lex_sbc_missing ("OUTFILE"); goto error; } } - fh_unref (s->prev_save_outfile); - s->prev_save_outfile = fh_ref (save->outfile); + fh_unref (s->prev_save_file); + s->prev_save_file = fh_ref (fh); - if (save->variables && save->names) + if (variables.n && names) { msg (SW, _("VARIABLES and NAMES both specified; ignoring NAMES.")); - matrix_expr_destroy (save->names); - save->names = NULL; + matrix_expr_destroy (names); + names = NULL; } + save->sf = save_file_create (s, fh, &variables, names, &strings); return cmd; error: + string_array_destroy (&variables); + matrix_expr_destroy (names); + stringi_set_destroy (&strings); + fh_unref (fh); matrix_cmd_destroy (cmd); return NULL; } @@ -4452,90 +5138,18 @@ error: static void matrix_cmd_execute_save (const struct save_command *save) { - assert (save->outfile != fh_inline_file ()); /* XXX not yet implemented */ - gsl_matrix *m = matrix_expr_evaluate (save->expression); if (!m) return; - bool ok = true; - struct dictionary *dict = dict_create (get_default_encoding ()); - struct string_array names = { .n = 0 }; - if (save->variables) - string_array_clone (&names, save->variables); - else if (save->names) - { - gsl_matrix *nm = matrix_expr_evaluate (save->names); - if (nm && is_vector (nm)) - { - gsl_vector nv = to_vector (nm); - for (size_t i = 0; i < nv.size; i++) - { - char *name = trimmed_string (gsl_vector_get (&nv, i)); - if (dict_id_is_valid (dict, name, true)) - string_array_append_nocopy (&names, name); - else - ok = false; - } - } - } - - struct stringi_set strings; - stringi_set_clone (&strings, &save->strings); - - for (size_t i = 0; dict_get_var_cnt (dict) < m->size2; i++) - { - char tmp_name[64]; - const char *name; - if (i < names.n) - name = names.strings[i]; - else - { - snprintf (tmp_name, sizeof tmp_name, "COL%zu", i + 1); - name = tmp_name; - } - - int width = stringi_set_delete (&strings, name) ? 8 : 0; - struct variable *var = dict_create_var (dict, name, width); - if (!var) - { - msg (SE, _("Duplicate variable name %s in SAVE statement."), - name); - ok = false; - } - } - - if (!stringi_set_is_empty (&strings)) - { - const char *example = stringi_set_node_get_string ( - stringi_set_first (&strings)); - msg (SE, ngettext ("STRINGS specified a variable %s, but no variable " - "with that name was found on SAVE.", - "STRINGS specified %2$zu variables, including %1$s, " - "whose names were not found on SAVE.", - stringi_set_count (&strings)), - example, stringi_set_count (&strings)); - ok = false; - } - stringi_set_destroy (&strings); - string_array_destroy (&names); - - if (!ok) - { - gsl_matrix_free (m); - dict_unref (dict); - return; - } - - struct casewriter *writer = any_writer_open (save->outfile, dict); + struct casewriter *writer = save_file_open (save->sf, m); if (!writer) { gsl_matrix_free (m); - dict_unref (dict); return; } - const struct caseproto *proto = dict_get_proto (dict); + const struct caseproto *proto = casewriter_get_proto (writer); for (size_t y = 0; y < m->size1; y++) { struct ccase *c = case_create (proto); @@ -4551,10 +5165,7 @@ matrix_cmd_execute_save (const struct save_command *save) } casewriter_write (writer, c); } - casewriter_destroy (writer); - gsl_matrix_free (m); - dict_unref (dict); } static struct matrix_cmd * @@ -5354,6 +5965,7 @@ matrix_parse_get (struct matrix_state *s) *cmd = (struct matrix_cmd) { .type = MCMD_GET, .get = { + .dataset = s->dataset, .user = { .treatment = MGET_ERROR }, .system = { .treatment = MGET_ERROR }, } @@ -5368,25 +5980,20 @@ matrix_parse_get (struct matrix_state *s) { if (lex_match_id (s->lexer, "FILE")) { - if (get->variables.n) - { - lex_error (s->lexer, _("FILE must precede VARIABLES")); - goto error; - } lex_match (s->lexer, T_EQUALS); fh_unref (get->file); - get->file = fh_parse (s->lexer, FH_REF_FILE, s->session); - if (!get->file) - goto error; + if (lex_match (s->lexer, T_ASTERISK)) + get->file = NULL; + else + { + get->file = fh_parse (s->lexer, FH_REF_FILE, s->session); + if (!get->file) + goto error; + } } else if (lex_match_id (s->lexer, "ENCODING")) { - if (get->variables.n) - { - lex_error (s->lexer, _("ENCODING must precede VARIABLES")); - goto error; - } lex_match (s->lexer, T_EQUALS); if (!lex_force_string (s->lexer)) goto error; @@ -5400,40 +6007,14 @@ matrix_parse_get (struct matrix_state *s) { lex_match (s->lexer, T_EQUALS); - struct dictionary *dict = NULL; - if (!get->file) - { - dict = dataset_dict (s->dataset); - if (dict_get_var_cnt (dict) == 0) - { - lex_error (s->lexer, _("GET cannot read empty active file.")); - goto error; - } - } - else - { - struct casereader *reader = any_reader_open_and_decode ( - get->file, get->encoding, &dict, NULL); - if (!reader) - goto error; - casereader_destroy (reader); - } - - struct variable **vars; - size_t n_vars; - bool ok = parse_variables (s->lexer, dict, &vars, &n_vars, - PV_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH); - if (!ok) + if (get->n_vars) { - dict_unref (dict); + lex_sbc_only_once ("VARIABLES"); goto error; } - string_array_clear (&get->variables); - for (size_t i = 0; i < n_vars; i++) - string_array_append (&get->variables, var_get_name (vars[i])); - free (vars); - dict_unref (dict); + if (!var_syntax_parse (s->lexer, &get->vars, &get->n_vars)) + goto error; } else if (lex_match_id (s->lexer, "NAMES")) { @@ -5502,59 +6083,33 @@ error: } static void -matrix_cmd_execute_get (struct get_command *get) +matrix_cmd_execute_get__ (struct get_command *get, struct casereader *reader, + const struct dictionary *dict) { - assert (get->file); /* XXX */ - - struct dictionary *dict; - struct casereader *reader = any_reader_open_and_decode ( - get->file, get->encoding, &dict, NULL); - if (!reader) - return; - - const struct variable **vars = xnmalloc ( - get->variables.n ? get->variables.n : dict_get_var_cnt (dict), - sizeof *vars); + struct variable **vars; size_t n_vars = 0; - if (get->variables.n) + if (get->n_vars) { - for (size_t i = 0; i < get->variables.n; i++) - { - const char *name = get->variables.strings[i]; - const struct variable *var = dict_lookup_var (dict, name); - if (!var) - { - msg (SE, _("GET: Data file does not contain variable %s."), - name); - dict_unref (dict); - free (vars); - return; - } - if (!var_is_numeric (var)) - { - msg (SE, _("GET: Variable %s is not numeric."), name); - dict_unref (dict); - free (vars); - return; - } - vars[n_vars++] = var; - } + if (!var_syntax_evaluate (get->vars, get->n_vars, dict, + &vars, &n_vars, PV_NUMERIC)) + return; } else { - for (size_t i = 0; i < dict_get_var_cnt (dict); i++) + n_vars = dict_get_var_cnt (dict); + vars = xnmalloc (n_vars, sizeof *vars); + for (size_t i = 0; i < n_vars; i++) { - const struct variable *var = dict_get_var (dict, i); + struct variable *var = dict_get_var (dict, i); if (!var_is_numeric (var)) { msg (SE, _("GET: Variable %s is not numeric."), var_get_name (var)); - dict_unref (dict); free (vars); return; } - vars[n_vars++] = var; + vars[i] = var; } } @@ -5632,7 +6187,6 @@ matrix_cmd_execute_get (struct get_command *get) if (keep) n_rows++; } - casereader_destroy (reader); if (!error) { m->size1 = n_rows; @@ -5640,9 +6194,56 @@ matrix_cmd_execute_get (struct get_command *get) } else gsl_matrix_free (m); - dict_unref (dict); free (vars); } + +static bool +matrix_open_casereader (const char *command_name, + struct file_handle *file, const char *encoding, + struct dataset *dataset, + struct casereader **readerp, struct dictionary **dictp) +{ + if (file) + { + *readerp = any_reader_open_and_decode (file, encoding, dictp, NULL); + return *readerp != NULL; + } + else + { + if (dict_get_var_cnt (dataset_dict (dataset)) == 0) + { + msg (ME, _("The %s command cannot read an empty active file."), + command_name); + return false; + } + *readerp = proc_open (dataset); + *dictp = dict_ref (dataset_dict (dataset)); + return true; + } +} + +static void +matrix_close_casereader (struct file_handle *file, struct dataset *dataset, + struct casereader *reader, struct dictionary *dict) +{ + dict_unref (dict); + casereader_destroy (reader); + if (!file) + proc_commit (dataset); +} + +static void +matrix_cmd_execute_get (struct get_command *get) +{ + struct casereader *r; + struct dictionary *d; + if (matrix_open_casereader ("GET", get->file, get->encoding, get->dataset, + &r, &d)) + { + matrix_cmd_execute_get__ (get, r, d); + matrix_close_casereader (get->file, get->dataset, r, d); + } +} static const char * match_rowtype (struct lexer *lexer) @@ -5668,8 +6269,6 @@ parse_var_names (struct lexer *lexer, struct string_array *sa) string_array_clear (sa); struct dictionary *dict = dict_create (get_default_encoding ()); - dict_create_var_assert (dict, "ROWTYPE_", 8); - dict_create_var_assert (dict, "VARNAME_", 8); char **names; size_t n_names; bool ok = parse_DATA_LIST_vars (lexer, dict, &names, &n_names, @@ -5678,6 +6277,17 @@ parse_var_names (struct lexer *lexer, struct string_array *sa) if (ok) { + for (size_t i = 0; i < n_names; i++) + if (ss_equals_case (ss_cstr (names[i]), ss_cstr ("ROWTYPE_")) + || ss_equals_case (ss_cstr (names[i]), ss_cstr ("VARNAME_"))) + { + msg (SE, _("Variable name %s is reserved."), names[i]); + for (size_t j = 0; j < n_names; j++) + free (names[i]); + free (names); + return false; + } + string_array_clear (sa); sa->strings = names; sa->n = sa->allocated = n_names; @@ -5686,7 +6296,7 @@ parse_var_names (struct lexer *lexer, struct string_array *sa) } static void -msave_common_uninit (struct msave_common *common) +msave_common_destroy (struct msave_common *common) { if (common) { @@ -5694,6 +6304,19 @@ msave_common_uninit (struct msave_common *common) string_array_destroy (&common->variables); string_array_destroy (&common->fnames); string_array_destroy (&common->snames); + + for (size_t i = 0; i < common->n_factors; i++) + matrix_expr_destroy (common->factors[i]); + free (common->factors); + + for (size_t i = 0; i < common->n_splits; i++) + matrix_expr_destroy (common->splits[i]); + free (common->splits); + + dict_unref (common->dict); + casewriter_destroy (common->writer); + + free (common); } } @@ -5723,16 +6346,19 @@ compare_variables (const char *keyword, static struct matrix_cmd * matrix_parse_msave (struct matrix_state *s) { - struct msave_common common = { .outfile = NULL }; + struct msave_common *common = xmalloc (sizeof *common); + *common = (struct msave_common) { .outfile = NULL }; + struct matrix_cmd *cmd = xmalloc (sizeof *cmd); *cmd = (struct matrix_cmd) { .type = MCMD_MSAVE, .msave = { .expr = NULL } }; + struct matrix_expr *splits = NULL; + struct matrix_expr *factors = NULL; + struct msave_command *msave = &cmd->msave; - if (lex_token (s->lexer) == T_ID) - msave->varname_ = ss_xstrdup (lex_tokss (s->lexer)); msave->expr = matrix_parse_exp (s); if (!msave->expr) - return NULL; + goto error; while (lex_match (s->lexer, T_SLASH)) { @@ -5748,42 +6374,42 @@ matrix_parse_msave (struct matrix_state *s) { lex_match (s->lexer, T_EQUALS); - fh_unref (common.outfile); - common.outfile = fh_parse (s->lexer, FH_REF_FILE, NULL); - if (!common.outfile) + fh_unref (common->outfile); + common->outfile = fh_parse (s->lexer, FH_REF_FILE, NULL); + if (!common->outfile) goto error; } else if (lex_match_id (s->lexer, "VARIABLES")) { - if (!parse_var_names (s->lexer, &common.variables)) + if (!parse_var_names (s->lexer, &common->variables)) goto error; } else if (lex_match_id (s->lexer, "FNAMES")) { - if (!parse_var_names (s->lexer, &common.fnames)) + if (!parse_var_names (s->lexer, &common->fnames)) goto error; } else if (lex_match_id (s->lexer, "SNAMES")) { - if (!parse_var_names (s->lexer, &common.snames)) + if (!parse_var_names (s->lexer, &common->snames)) goto error; } else if (lex_match_id (s->lexer, "SPLIT")) { lex_match (s->lexer, T_EQUALS); - matrix_expr_destroy (msave->splits); - msave->splits = matrix_parse_exp (s); - if (!msave->splits) + matrix_expr_destroy (splits); + splits = matrix_parse_exp (s); + if (!splits) goto error; } else if (lex_match_id (s->lexer, "FACTOR")) { lex_match (s->lexer, T_EQUALS); - matrix_expr_destroy (msave->factors); - msave->factors = matrix_parse_exp (s); - if (!msave->factors) + matrix_expr_destroy (factors); + factors = matrix_parse_exp (s); + if (!factors) goto error; } else @@ -5798,49 +6424,31 @@ matrix_parse_msave (struct matrix_state *s) lex_sbc_missing ("TYPE"); goto error; } - common.has_splits = msave->splits || common.snames.n; - common.has_factors = msave->factors || common.fnames.n; - - struct msave_common *c = s->common ? s->common : &common; - if (c->fnames.n && !msave->factors) - { - msg (SE, _("FNAMES requires FACTOR.")); - goto error; - } - if (c->snames.n && !msave->splits) - { - msg (SE, _("SNAMES requires SPLIT.")); - goto error; - } - if (c->has_factors && !common.has_factors) - { - msg (SE, _("%s is required because it was present on the first " - "MSAVE in this MATRIX command."), "FACTOR"); - goto error; - } - if (c->has_splits && !common.has_splits) - { - msg (SE, _("%s is required because it was present on the first " - "MSAVE in this MATRIX command."), "SPLIT"); - goto error; - } if (!s->common) { - if (!common.outfile) + if (common->fnames.n && !factors) + { + msg (SE, _("FNAMES requires FACTOR.")); + goto error; + } + if (common->snames.n && !splits) + { + msg (SE, _("SNAMES requires SPLIT.")); + goto error; + } + if (!common->outfile) { lex_sbc_missing ("OUTFILE"); goto error; } - s->common = xmemdup (&common, sizeof common); + s->common = common; } else { - if (common.outfile) + if (common->outfile) { - bool same = common.outfile == s->common->outfile; - fh_unref (common.outfile); - if (!same) + if (!fh_equal (common->outfile, s->common->outfile)) { msg (SE, _("OUTFILE must name the same file on each MSAVE " "within a single MATRIX command.")); @@ -5848,25 +6456,47 @@ matrix_parse_msave (struct matrix_state *s) } } if (!compare_variables ("VARIABLES", - &common.variables, &s->common->variables) - || !compare_variables ("FNAMES", &common.fnames, &s->common->fnames) - || !compare_variables ("SNAMES", &common.snames, &s->common->snames)) + &common->variables, &s->common->variables) + || !compare_variables ("FNAMES", &common->fnames, &s->common->fnames) + || !compare_variables ("SNAMES", &common->snames, &s->common->snames)) goto error; - msave_common_uninit (&common); + msave_common_destroy (common); } msave->common = s->common; - if (!msave->varname_) - msave->varname_ = xasprintf ("MAT%zu", ++s->common->n_varnames); + + struct msave_common *c = s->common; + if (factors) + { + if (c->n_factors >= c->allocated_factors) + c->factors = x2nrealloc (c->factors, &c->allocated_factors, + sizeof *c->factors); + c->factors[c->n_factors++] = factors; + } + if (c->n_factors > 0) + msave->factors = c->factors[c->n_factors - 1]; + + if (splits) + { + if (c->n_splits >= c->allocated_splits) + c->splits = x2nrealloc (c->splits, &c->allocated_splits, + sizeof *c->splits); + c->splits[c->n_splits++] = splits; + } + if (c->n_splits > 0) + msave->splits = c->splits[c->n_splits - 1]; + return cmd; error: - msave_common_uninit (&common); + matrix_expr_destroy (splits); + matrix_expr_destroy (factors); + msave_common_destroy (common); matrix_cmd_destroy (cmd); return NULL; } static gsl_vector * -matrix_expr_evaluate_vector (struct matrix_expr *e, const char *name) +matrix_expr_evaluate_vector (const struct matrix_expr *e, const char *name) { gsl_matrix *m = matrix_expr_evaluate (e); if (!m) @@ -5901,8 +6531,9 @@ msave_create_dict (const struct msave_common *common) const char *dup_split = msave_add_vars (dict, &common->snames); if (dup_split) { - msg (SE, _("Duplicate SPLIT variable name %s."), dup_split); - goto error; + /* Should not be possible because the parser ensures that the names are + unique. */ + NOT_REACHED (); } dict_create_var_assert (dict, "ROWTYPE_", 8); @@ -5946,10 +6577,10 @@ matrix_cmd_execute_msave (struct msave_command *msave) for (size_t i = 0; i < m->size2; i++) string_array_append_nocopy (&common->variables, xasprintf ("COL%zu", i + 1)); - - if (m->size2 != common->variables.n) + else if (m->size2 != common->variables.n) { - msg (SE, _("Matrix on MSAVE has %zu columns instead of required %zu."), + msg (SE, + _("Matrix on MSAVE has %zu columns but there are %zu variables."), m->size2, common->variables.n); goto error; } @@ -5964,11 +6595,10 @@ matrix_cmd_execute_msave (struct msave_command *msave) for (size_t i = 0; i < factors->size; i++) string_array_append_nocopy (&common->fnames, xasprintf ("FAC%zu", i + 1)); - - if (factors->size != common->fnames.n) + else if (factors->size != common->fnames.n) { msg (SE, _("There are %zu factor variables, " - "but %zu split values were supplied."), + "but %zu factor values were supplied."), common->fnames.n, factors->size); goto error; } @@ -5980,16 +6610,15 @@ matrix_cmd_execute_msave (struct msave_command *msave) if (!splits) goto error; - if (!common->fnames.n) + if (!common->snames.n) for (size_t i = 0; i < splits->size; i++) - string_array_append_nocopy (&common->fnames, + string_array_append_nocopy (&common->snames, xasprintf ("SPL%zu", i + 1)); - - if (splits->size != common->fnames.n) + else if (splits->size != common->snames.n) { msg (SE, _("There are %zu split variables, " "but %zu split values were supplied."), - common->fnames.n, splits->size); + common->snames.n, splits->size); goto error; } } @@ -6010,6 +6639,8 @@ matrix_cmd_execute_msave (struct msave_command *msave) common->dict = dict; } + bool matrix = (!strcmp (msave->rowtype, "COV") + || !strcmp (msave->rowtype, "CORR")); for (size_t y = 0; y < m->size1; y++) { struct ccase *c = case_create (dict_get_proto (common->dict)); @@ -6030,8 +6661,11 @@ matrix_cmd_execute_msave (struct msave_command *msave) *case_num_rw_idx (c, idx++) = gsl_vector_get (factors, i); /* VARNAME_. */ + const char *varname_ = (matrix && y < common->variables.n + ? common->variables.strings[y] + : ""); buf_copy_str_rpad (CHAR_CAST (char *, case_data_rw_idx (c, idx++)->s), 8, - msave->varname_, ' '); + varname_, ' '); /* Continuous variables. */ for (size_t x = 0; x < m->size2; x++) @@ -6049,11 +6683,18 @@ static struct matrix_cmd * matrix_parse_mget (struct matrix_state *s) { struct matrix_cmd *cmd = xmalloc (sizeof *cmd); - *cmd = (struct matrix_cmd) { .type = MCMD_MGET, .mget = { .state = s } }; + *cmd = (struct matrix_cmd) { + .type = MCMD_MGET, + .mget = { + .state = s, + .rowtypes = STRINGI_SET_INITIALIZER (cmd->mget.rowtypes), + }, + }; struct mget_command *mget = &cmd->mget; - for (;;) + lex_match (s->lexer, T_SLASH); + while (lex_token (s->lexer) != T_ENDCMD) { if (lex_match_id (s->lexer, "FILE")) { @@ -6064,6 +6705,17 @@ matrix_parse_mget (struct matrix_state *s) if (!mget->file) goto error; } + else if (lex_match_id (s->lexer, "ENCODING")) + { + lex_match (s->lexer, T_EQUALS); + if (!lex_force_string (s->lexer)) + goto error; + + free (mget->encoding); + mget->encoding = ss_xstrdup (lex_tokss (s->lexer)); + + lex_get (s->lexer); + } else if (lex_match_id (s->lexer, "TYPE")) { lex_match (s->lexer, T_EQUALS); @@ -6082,11 +6734,7 @@ matrix_parse_mget (struct matrix_state *s) lex_error_expecting (s->lexer, "FILE", "TYPE"); goto error; } - if (lex_token (s->lexer) == T_ENDCMD) - break; - - if (!lex_force_match (s->lexer, T_SLASH)) - goto error; + lex_match (s->lexer, T_SLASH); } return cmd; @@ -6115,34 +6763,82 @@ get_a8_var (const struct dictionary *d, const char *name) } static bool -is_rowtype (const union value *v, const char *rowtype) +var_changed (const struct ccase *ca, const struct ccase *cb, + const struct variable *var) +{ + return (ca && cb + ? !value_equal (case_data (ca, var), case_data (cb, var), + var_get_width (var)) + : ca || cb); +} + +static bool +vars_changed (const struct ccase *ca, const struct ccase *cb, + const struct dictionary *d, + size_t first_var, size_t n_vars) +{ + for (size_t i = 0; i < n_vars; i++) + { + const struct variable *v = dict_get_var (d, first_var + i); + if (var_changed (ca, cb, v)) + return true; + } + return false; +} + +static bool +vars_all_missing (const struct ccase *c, const struct dictionary *d, + size_t first_var, size_t n_vars) { - struct substring vs = ss_buffer (CHAR_CAST (char *, v->s), 8); - ss_rtrim (&vs, ss_cstr (" ")); - return ss_equals_case (vs, ss_cstr (rowtype)); + for (size_t i = 0; i < n_vars; i++) + if (case_num (c, dict_get_var (d, first_var + i)) != SYSMIS) + return false; + return true; } static void matrix_mget_commit_var (struct ccase **rows, size_t n_rows, const struct dictionary *d, const struct variable *rowtype_var, - struct matrix_state *s, size_t si, size_t fi, - size_t cs, size_t cn) + const struct stringi_set *accepted_rowtypes, + struct matrix_state *s, + size_t ss, size_t sn, size_t si, + size_t fs, size_t fn, size_t fi, + size_t cs, size_t cn, + struct pivot_table *pt, + struct pivot_dimension *var_dimension) { if (!n_rows) - return; + goto exit; - const union value *rowtype_ = case_data (rows[0], rowtype_var); - const char *name_prefix = (is_rowtype (rowtype_, "COV") ? "CV" - : is_rowtype (rowtype_, "CORR") ? "CR" - : is_rowtype (rowtype_, "MEAN") ? "MN" - : is_rowtype (rowtype_, "STDDEV") ? "SD" - : is_rowtype (rowtype_, "N") ? "NC" - : "CN"); + /* Is this a matrix for pooled data, either where there are no factor + variables or the factor variables are missing? */ + bool pooled = !fn || vars_all_missing (rows[0], d, fs, fn); + + struct substring rowtype = case_ss (rows[0], rowtype_var); + ss_rtrim (&rowtype, ss_cstr (" ")); + if (!stringi_set_is_empty (accepted_rowtypes) + && !stringi_set_contains_len (accepted_rowtypes, + rowtype.string, rowtype.length)) + goto exit; + + const char *prefix = (ss_equals_case (rowtype, ss_cstr ("COV")) ? "CV" + : ss_equals_case (rowtype, ss_cstr ("CORR")) ? "CR" + : ss_equals_case (rowtype, ss_cstr ("MEAN")) ? "MN" + : ss_equals_case (rowtype, ss_cstr ("STDDEV")) ? "SD" + : ss_equals_case (rowtype, ss_cstr ("N")) ? "NC" + : ss_equals_case (rowtype, ss_cstr ("COUNT")) ? "CN" + : NULL); + if (!prefix) + { + msg (SE, _("Matrix data file contains unknown ROWTYPE_ \"%.*s\"."), + (int) rowtype.length, rowtype.string); + goto exit; + } struct string name = DS_EMPTY_INITIALIZER; - ds_put_cstr (&name, name_prefix); - if (fi > 0) + ds_put_cstr (&name, prefix); + if (!pooled) ds_put_format (&name, "F%zu", fi); if (si > 0) ds_put_format (&name, "S%zu", si); @@ -6154,7 +6850,7 @@ matrix_mget_commit_var (struct ccase **rows, size_t n_rows, { msg (SW, _("Matrix data file contains variable with existing name %s."), ds_cstr (&name)); - goto exit; + goto exit_free_name; } gsl_matrix *m = gsl_matrix_alloc (n_rows, cn); @@ -6174,6 +6870,28 @@ matrix_mget_commit_var (struct ccase **rows, size_t n_rows, } } + int var_index = pivot_category_create_leaf ( + var_dimension->root, pivot_value_new_user_text (ds_cstr (&name), SIZE_MAX)); + double values[] = { n_rows, cn }; + for (size_t j = 0; j < sn; j++) + { + struct variable *var = dict_get_var (d, ss + j); + const union value *value = case_data (rows[0], var); + pivot_table_put2 (pt, j, var_index, + pivot_value_new_var_value (var, value)); + } + for (size_t j = 0; j < fn; j++) + { + struct variable *var = dict_get_var (d, fs + j); + const union value sysmis = { .f = SYSMIS }; + const union value *value = pooled ? &sysmis : case_data (rows[0], var); + pivot_table_put2 (pt, j + sn, var_index, + pivot_value_new_var_value (var, value)); + } + for (size_t j = 0; j < sizeof values / sizeof *values; j++) + pivot_table_put2 (pt, j + sn + fn, var_index, + pivot_value_new_integer (values[j])); + if (n_missing) msg (SE, ngettext ("Matrix data file variable %s contains a missing " "value, which was treated as zero.", @@ -6182,57 +6900,32 @@ matrix_mget_commit_var (struct ccase **rows, size_t n_rows, ds_cstr (&name), n_missing); mv->value = m; -exit: +exit_free_name: ds_destroy (&name); + +exit: for (size_t y = 0; y < n_rows; y++) case_unref (rows[y]); } -static bool -var_changed (const struct ccase *ca, const struct ccase *cb, - const struct variable *var) -{ - return !value_equal (case_data (ca, var), case_data (cb, var), - var_get_width (var)); -} - -static bool -vars_changed (const struct ccase *ca, const struct ccase *cb, - const struct dictionary *d, - size_t first_var, size_t n_vars) -{ - for (size_t i = 0; i < n_vars; i++) - { - const struct variable *v = dict_get_var (d, first_var + i); - if (var_changed (ca, cb, v)) - return true; - } - return false; -} - static void -matrix_cmd_execute_mget (struct mget_command *mget) +matrix_cmd_execute_mget__ (struct mget_command *mget, + struct casereader *r, const struct dictionary *d) { - struct dictionary *d; - struct casereader *r = any_reader_open_and_decode (mget->file, "UTF-8", - &d, NULL); - if (!r) - return; - const struct variable *rowtype_ = get_a8_var (d, "ROWTYPE_"); const struct variable *varname_ = get_a8_var (d, "VARNAME_"); if (!rowtype_ || !varname_) - goto exit; + return; if (var_get_dict_index (rowtype_) >= var_get_dict_index (varname_)) { msg (SE, _("ROWTYPE_ must precede VARNAME_ in matrix data file.")); - goto exit; + return; } if (var_get_dict_index (varname_) + 1 >= dict_get_var_cnt (d)) { msg (SE, _("Matrix data file contains no continuous variables.")); - goto exit; + return; } for (size_t i = 0; i < dict_get_var_cnt (d); i++) @@ -6243,7 +6936,7 @@ matrix_cmd_execute_mget (struct mget_command *mget) msg (SE, _("Matrix data file contains unexpected string variable %s."), var_get_name (v)); - goto exit; + return; } } @@ -6264,6 +6957,32 @@ matrix_cmd_execute_mget (struct mget_command *mget) size_t cn = dict_get_var_cnt (d) - cs; struct ccase *cc = NULL; + /* Pivot table. */ + struct pivot_table *pt = pivot_table_create ( + N_("Matrix Variables Created by MGET")); + struct pivot_dimension *attr_dimension = pivot_dimension_create ( + pt, PIVOT_AXIS_COLUMN, N_("Attribute")); + struct pivot_dimension *var_dimension = pivot_dimension_create ( + pt, PIVOT_AXIS_ROW, N_("Variable")); + if (sn > 0) + { + struct pivot_category *splits = pivot_category_create_group ( + attr_dimension->root, N_("Split Values")); + for (size_t i = 0; i < sn; i++) + pivot_category_create_leaf (splits, pivot_value_new_variable ( + dict_get_var (d, ss + i))); + } + if (fn > 0) + { + struct pivot_category *factors = pivot_category_create_group ( + attr_dimension->root, N_("Factors")); + for (size_t i = 0; i < fn; i++) + pivot_category_create_leaf (factors, pivot_value_new_variable ( + dict_get_var (d, fs + i))); + } + pivot_category_create_group (attr_dimension->root, N_("Dimensions"), + N_("Rows"), N_("Columns")); + /* Matrix. */ struct ccase **rows = NULL; size_t allocated_rows = 0; @@ -6272,25 +6991,30 @@ matrix_cmd_execute_mget (struct mget_command *mget) struct ccase *c; while ((c = casereader_read (r)) != NULL) { - bool sd = vars_changed (sc, c, d, ss, sn); - bool fd = sd || vars_changed (fc, c, d, fs, fn); - bool md = fd || !cc || var_changed (cc, c, rowtype_) || var_changed (cc, c, varname_); - if (sd) - { - si++; - case_unref (sc); - sc = case_ref (c); - } - if (fd) + bool row_has_factors = fn && !vars_all_missing (c, d, fs, fn); + + enum { - fi++; - case_unref (fc); - fc = case_ref (c); + SPLITS_CHANGED, + FACTORS_CHANGED, + ROWTYPE_CHANGED, + NOTHING_CHANGED } - if (md) + change + = (sn && (!sc || vars_changed (sc, c, d, ss, sn)) ? SPLITS_CHANGED + : fn && (!fc || vars_changed (fc, c, d, fs, fn)) ? FACTORS_CHANGED + : !cc || var_changed (cc, c, rowtype_) ? ROWTYPE_CHANGED + : NOTHING_CHANGED); + + if (change != NOTHING_CHANGED) { - matrix_mget_commit_var (rows, n_rows, d, rowtype_, - mget->state, si, fi, cs, cn); + matrix_mget_commit_var (rows, n_rows, d, + rowtype_, &mget->rowtypes, + mget->state, + ss, sn, si, + fs, fn, fi, + cs, cn, + pt, var_dimension); n_rows = 0; case_unref (cc); cc = case_ref (c); @@ -6299,13 +7023,61 @@ matrix_cmd_execute_mget (struct mget_command *mget) if (n_rows >= allocated_rows) rows = x2nrealloc (rows, &allocated_rows, sizeof *rows); rows[n_rows++] = c; + + if (change == SPLITS_CHANGED) + { + si++; + case_unref (sc); + sc = case_ref (c); + + /* Reset the factor number, if there are factors. */ + if (fn) + { + fi = 0; + if (row_has_factors) + fi++; + case_unref (fc); + fc = case_ref (c); + } + } + else if (change == FACTORS_CHANGED) + { + if (row_has_factors) + fi++; + case_unref (fc); + fc = case_ref (c); + } } - matrix_mget_commit_var (rows, n_rows, d, rowtype_, - mget->state, si, fi, cs, cn); + matrix_mget_commit_var (rows, n_rows, d, + rowtype_, &mget->rowtypes, + mget->state, + ss, sn, si, + fs, fn, fi, + cs, cn, + pt, var_dimension); free (rows); -exit: - casereader_destroy (r); + case_unref (sc); + case_unref (fc); + case_unref (cc); + + if (var_dimension->n_leaves) + pivot_table_submit (pt); + else + pivot_table_unref (pt); +} + +static void +matrix_cmd_execute_mget (struct mget_command *mget) +{ + struct casereader *r; + struct dictionary *d; + if (matrix_open_casereader ("MGET", mget->file, mget->encoding, + mget->state->dataset, &r, &d)) + { + matrix_cmd_execute_mget__ (mget, r, d); + matrix_close_casereader (mget->file, mget->state->dataset, r, d); + } } static bool @@ -6652,10 +7424,6 @@ matrix_cmd_destroy (struct matrix_cmd *cmd) case MCMD_SAVE: matrix_expr_destroy (cmd->save.expression); - fh_unref (cmd->save.outfile); - string_array_destroy (cmd->save.variables); - matrix_expr_destroy (cmd->save.names); - stringi_set_destroy (&cmd->save.strings); break; case MCMD_READ: @@ -6672,14 +7440,11 @@ matrix_cmd_destroy (struct matrix_cmd *cmd) matrix_lvalue_destroy (cmd->get.dst); fh_unref (cmd->get.file); free (cmd->get.encoding); - string_array_destroy (&cmd->get.variables); + var_syntax_destroy (cmd->get.vars, cmd->get.n_vars); break; case MCMD_MSAVE: - free (cmd->msave.varname_); matrix_expr_destroy (cmd->msave.expr); - matrix_expr_destroy (cmd->msave.factors); - matrix_expr_destroy (cmd->msave.splits); break; case MCMD_MGET: @@ -6773,6 +7538,7 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds) return CMD_FAILURE; struct matrix_state state = { + .dataset = ds, .session = dataset_session (ds), .lexer = lexer, .vars = HMAP_INITIALIZER (state.vars), @@ -6808,13 +7574,7 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds) free (var); } hmap_destroy (&state.vars); - fh_unref (state.prev_save_outfile); - if (state.common) - { - dict_unref (state.common->dict); - casewriter_destroy (state.common->writer); - free (state.common); - } + msave_common_destroy (state.common); fh_unref (state.prev_read_file); for (size_t i = 0; i < state.n_read_files; i++) read_file_destroy (state.read_files[i]); @@ -6823,6 +7583,10 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds) for (size_t i = 0; i < state.n_write_files; i++) write_file_destroy (state.write_files[i]); free (state.write_files); + fh_unref (state.prev_save_file); + for (size_t i = 0; i < state.n_save_files; i++) + save_file_destroy (state.save_files[i]); + free (state.save_files); return CMD_SUCCESS; }