From 8e018d25310cb53e5339b46e95f0abe02db83782 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Tue, 18 Jul 2006 04:57:01 +0000 Subject: [PATCH] Patch #5244. This patch cleans up DATA LIST and PRINT in preparation for re-implementing REPEATING DATA (bug #12859). It reduces the code duplication between DATA LIST and PRINT a great deal, actually reducing the total code in PSPP by a couple hundred lines. The result should be more maintainable as well as easier to read. --- Smake | 1 + src/data/ChangeLog | 20 + src/data/format.c | 52 +- src/data/format.def | 6 - src/data/format.h | 14 +- src/language/data-io/ChangeLog | 17 + src/language/data-io/automake.mk | 5 +- src/language/data-io/data-list.c | 851 +++++------------ src/language/data-io/placement-parser.c | 381 ++++++++ src/language/data-io/placement-parser.h | 37 + src/language/data-io/print-space.c | 145 +++ src/language/data-io/print.c | 1113 +++++------------------ src/language/dictionary/formats.c | 3 +- src/language/dictionary/numeric.c | 6 +- src/language/expressions/parse.c | 9 +- src/language/lexer/ChangeLog | 20 + src/language/lexer/automake.mk | 1 + src/language/lexer/format-parser.c | 192 ++-- src/language/lexer/format-parser.h | 34 + src/language/lexer/lexer.h | 3 - src/language/lexer/variable-parser.c | 78 +- src/language/lexer/variable-parser.h | 4 + src/language/utilities/set.q | 3 +- src/libpspp/ChangeLog | 12 + src/libpspp/message.c | 26 +- src/libpspp/message.h | 4 + src/libpspp/str.c | 37 + src/libpspp/str.h | 6 + tests/ChangeLog | 4 + tests/command/print.sh | 6 +- 30 files changed, 1430 insertions(+), 1660 deletions(-) create mode 100644 src/language/data-io/placement-parser.c create mode 100644 src/language/data-io/placement-parser.h create mode 100644 src/language/data-io/print-space.c create mode 100644 src/language/lexer/format-parser.h diff --git a/Smake b/Smake index 33abdc1e..bc331007 100644 --- a/Smake +++ b/Smake @@ -51,6 +51,7 @@ GNULIB_MODULES = \ xalloc \ xalloc-die \ xreadlink \ + xsize \ xstrndup \ xvasprintf diff --git a/src/data/ChangeLog b/src/data/ChangeLog index 694d3577..c9463256 100644 --- a/src/data/ChangeLog +++ b/src/data/ChangeLog @@ -1,3 +1,23 @@ +Sun Jul 16 19:52:03 2006 Ben Pfaff + + * format.c: (fmt_type_from_string) New function. + (fmt_to_string) Include decimals in output if the format has + decimals, even if the format type does not. This way, we can + accurately reproduce incorrect formats in user output. + (check_common_specifier) Make the check for a bad format type an + assertion, so we get bug reports if they show up. Fix message. + Check for decimal places with a format type that doesn't allow + them. + (check_input_specifier) Remove check for FMT_X, which has been + deleted. + (check_output_specifier) Ditto. + + * format.def: Remove FMT_T, FMT_X, FMT_DESCEND, FMT_NEWREC. + + * format.h: (macro FMT_TYPE_LEN_MAX) New macro. + (struct fmt_desc) Use FMT_TYPE_LEN_MAX in definition. + (enum fmt_parse_flags) Removed. + Mon Jul 17 18:26:21 WST 2006 John Darrington * casefile.c casefile.h: Converted to an abstract base class. diff --git a/src/data/format.c b/src/data/format.c index f4a61d34..01a8d317 100644 --- a/src/data/format.c +++ b/src/data/format.c @@ -44,14 +44,34 @@ struct fmt_desc formats[FMT_NUMBER_OF_FORMATS + 1] = /* Common formats. */ const struct fmt_spec f8_2 = {FMT_F, 8, 2}; +/* Tries to parse NAME as a format type. + If successful, stores the type in *TYPE and returns true. + On failure, returns false. */ +bool +fmt_type_from_string (const char *name, int *type) +{ + int i; + + for (i = 0; i < FMT_NUMBER_OF_FORMATS; i++) + if (!strcasecmp (name, formats[i].name)) + { + *type = i; + return true; + } + return false; +} + /* Converts F to its string representation (for instance, "F8.2") and - returns a pointer to a static buffer containing that string. */ + returns a pointer to a static buffer containing that string. + If F has decimals, then they are included in the output + string, even if F's format type does not, so that we can + accurately present incorrect formats to the user. */ char * fmt_to_string (const struct fmt_spec *f) { static char buf[32]; - if (formats[f->type].n_args >= 2) + if (formats[f->type].n_args >= 2 || f->d > 0) sprintf (buf, "%s%d.%d", formats[f->type].name, f->w, f->d); else sprintf (buf, "%s%d", formats[f->type].name, f->w); @@ -68,14 +88,7 @@ check_common_specifier (const struct fmt_spec *spec, bool emit_error) struct fmt_desc *f ; char *str; - if ( spec->type > FMT_NUMBER_OF_FORMATS ) - { - if (emit_error) - msg (SE, _("Format specifies a bad type (%d)"), spec->type); - - return false; - } - + assert (spec->type < FMT_NUMBER_OF_FORMATS); f = &formats[spec->type]; str = fmt_to_string (spec); @@ -91,10 +104,18 @@ check_common_specifier (const struct fmt_spec *spec, bool emit_error) { if (emit_error) msg (SE, _("Format %s specifies a bad number of " - "implied decimal places %d. Input format %s allows " + "implied decimal places %d. Format type %s allows " "up to 16 implied decimal places."), str, spec->d, f->name); return false; } + if (f->n_args <= 1 && spec->d) + { + if (emit_error) + msg (SE, _("Format %s specifies %d decimal places, but " + "format type %s does not allow for decimal places."), + str, spec->d, f->name); + return false; + } return true; } @@ -104,7 +125,7 @@ check_common_specifier (const struct fmt_spec *spec, bool emit_error) int check_input_specifier (const struct fmt_spec *spec, int emit_error) { - struct fmt_desc *f ; + struct fmt_desc *f; char *str ; if (!check_common_specifier (spec, emit_error)) @@ -112,10 +133,6 @@ check_input_specifier (const struct fmt_spec *spec, int emit_error) f = &formats[spec->type]; str = fmt_to_string (spec); - - - if (spec->type == FMT_X) - return 1; if (f->cat & FCAT_OUTPUT_ONLY) { if (emit_error) @@ -156,9 +173,6 @@ check_output_specifier (const struct fmt_spec *spec, int emit_error) f = &formats[spec->type]; str = fmt_to_string (spec); - - if (spec->type == FMT_X) - return 1; if (spec->w < f->Omin_w || spec->w > f->Omax_w) { if (emit_error) diff --git a/src/data/format.def b/src/data/format.def index 6ca17211..034b81d4 100644 --- a/src/data/format.def +++ b/src/data/format.def @@ -57,9 +57,3 @@ DEFFMT (FMT_TIME, "TIME", 2, 5, 40, 5, 40, 0001, FMT_TIME, 21) DEFFMT (FMT_DTIME, "DTIME", 2, 11, 40, 8, 40, 0001, FMT_DTIME, 25) DEFFMT (FMT_WKDAY, "WKDAY", 1, 2, 40, 2, 40, 0001, FMT_WKDAY, 26) DEFFMT (FMT_MONTH, "MONTH", 1, 3, 40, 3, 40, 0001, FMT_MONTH, 27) - -/* These aren't real formats. They're used by DATA LIST. */ -DEFFMT (FMT_T, "T", 1, 1,99999, 1,99999, 0000, FMT_T, -1) -DEFFMT (FMT_X, "X", 1, 1,99999, 1,99999, 0000, FMT_X, -1) -DEFFMT (FMT_DESCEND, "***", 1, 1,99999, 1,99999, 0000, -1, -1) -DEFFMT (FMT_NEWREC, "***", 1, 1,99999, 1,99999, 0000, -1, -1) diff --git a/src/data/format.h b/src/data/format.h index 10ae543e..e9d5e5e3 100644 --- a/src/data/format.h +++ b/src/data/format.h @@ -39,10 +39,14 @@ enum }; #undef DEFFMT +/* Length of longest format specifier name, + not including terminating null. */ +#define FMT_TYPE_LEN_MAX 8 + /* Describes one of the display formats above. */ struct fmt_desc { - char name[9]; /* `DATETIME' is the longest name. */ + char name[FMT_TYPE_LEN_MAX + 1]; /* Name, in all caps. */ int n_args; /* 1=width; 2=width.decimals. */ int Imin_w, Imax_w; /* Bounds on input width. */ int Omin_w, Omax_w; /* Bounds on output width. */ @@ -101,13 +105,6 @@ union value; /* Maximum length of formatted value, in characters. */ #define MAX_FORMATTED_LEN 256 -/* Flags for parsing formats. */ -enum fmt_parse_flags - { - FMTP_ALLOW_XT = 001, /* 1=Allow X and T formats. */ - FMTP_SUPPRESS_ERRORS = 002 /* 1=Do not emit error messages. */ - }; - /* Common formats. */ extern const struct fmt_spec f8_2; /* F8.2. */ @@ -122,6 +119,7 @@ int parse_string_as_format (const char *s, int len, const struct fmt_spec *fp, int fc, union value *v); int translate_fmt (int spss); bool data_out (char *s, const struct fmt_spec *fp, const union value *v); +bool fmt_type_from_string (const char *name, int *type); char *fmt_to_string (const struct fmt_spec *); void num_to_string (double v, char *s, int w, int d); struct fmt_spec make_input_format (int type, int w, int d); diff --git a/src/language/data-io/ChangeLog b/src/language/data-io/ChangeLog index d78105b8..d0eff570 100644 --- a/src/language/data-io/ChangeLog +++ b/src/language/data-io/ChangeLog @@ -1,3 +1,20 @@ +Sun Jul 16 19:57:10 2006 Ben Pfaff + + * automake.mk: (src_language_data_io_libdata_io_a_SOURCE) Add + print-space.c, placement-parser.c, placement-parser.h. + + * data-list.c: Basically rewrote the whole thing. Broke out a lot + of code into placement-parser.c. Code is much cleaner now. + + * placement-parser.c: New file. + + * placement-parser.h: New file. + + * print.c: Basically rewrote the whole thing. Broke out PRINT + SPACE into print-space.c. Code is much cleaner now. + + * print-space.c: New file. + Sat Jul 1 17:39:40 2006 Ben Pfaff Fix bug #11612, "q2c documentation does not agree with code". diff --git a/src/language/data-io/automake.mk b/src/language/data-io/automake.mk index 467a4c02..018c9558 100644 --- a/src/language/data-io/automake.mk +++ b/src/language/data-io/automake.mk @@ -8,12 +8,15 @@ src_language_data_io_libdata_io_a_SOURCES = \ src/language/data-io/inpt-pgm.c \ src/language/data-io/inpt-pgm.h \ src/language/data-io/print.c \ + src/language/data-io/print-space.c \ src/language/data-io/matrix-data.c \ src/language/data-io/data-reader.c \ src/language/data-io/data-reader.h \ src/language/data-io/data-writer.c \ src/language/data-io/data-writer.h \ - src/language/data-io/file-handle.h + src/language/data-io/file-handle.h \ + src/language/data-io/placement-parser.c \ + src/language/data-io/placement-parser.h src_language_data_io_built_sources = \ src/language/data-io/file-handle.c \ diff --git a/src/language/data-io/data-list.c b/src/language/data-io/data-list.c index 95b80368..e86c91b3 100644 --- a/src/language/data-io/data-list.c +++ b/src/language/data-io/data-list.c @@ -38,18 +38,22 @@ #include #include #include +#include +#include #include #include #include #include #include -#include +#include #include #include +#include #include #include #include "size_max.h" +#include "xsize.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -59,25 +63,26 @@ /* Describes how to parse one variable. */ struct dls_var_spec { - struct dls_var_spec *next; /* Next specification in list. */ + struct ll ll; /* List element. */ - /* Both free and fixed formats. */ + /* All parsers. */ struct fmt_spec input; /* Input format of this field. */ - struct variable *v; /* Associated variable. Used only in - parsing. Not safe later. */ int fv; /* First value in case. */ + char name[LONG_NAME_LEN + 1]; /* Var name for error messages and tables. */ /* Fixed format only. */ - int rec; /* Record number (1-based). */ - int fc, lc; /* Column numbers in record. */ - - /* Free format only. */ - char name[LONG_NAME_LEN + 1]; /* Name of variable. */ + int record; /* Record number (1-based). */ + int first_column; /* Column numbers in record. */ }; +static struct dls_var_spec * +ll_to_dls_var_spec (struct ll *ll) +{ + return ll_data (ll, struct dls_var_spec, ll); +} + /* Constants for DATA LIST type. */ -/* Must match table in cmd_data_list(). */ -enum +enum dls_type { DLS_FIXED, DLS_FREE, @@ -87,25 +92,23 @@ enum /* DATA LIST private data structure. */ struct data_list_pgm { - struct dls_var_spec *first, *last; /* Variable parsing specifications. */ + struct pool *pool; /* Used for all DATA LIST storage. */ + struct ll_list specs; /* List of dls_var_specs. */ struct dfm_reader *reader; /* Data file reader. */ - - int type; /* A DLS_* constant. */ + enum dls_type type; /* Type of DATA LIST construct. */ struct variable *end; /* Variable specified on END subcommand. */ - int rec_cnt; /* Number of records. */ - size_t case_size; /* Case size in bytes. */ + int record_cnt; /* Number of records. */ struct string delims; /* Field delimiters. */ }; static const struct case_source_class data_list_source_class; -static int parse_fixed (struct data_list_pgm *); -static int parse_free (struct dls_var_spec **, struct dls_var_spec **); -static void dump_fixed_table (const struct dls_var_spec *, - const struct file_handle *, int rec_cnt); +static bool parse_fixed (struct pool *tmp_pool, struct data_list_pgm *); +static bool parse_free (struct pool *tmp_pool, struct data_list_pgm *); +static void dump_fixed_table (const struct ll_list *, + const struct file_handle *, int record_cnt); static void dump_free_table (const struct data_list_pgm *, const struct file_handle *); -static void destroy_dls_var_spec (struct dls_var_spec *); static trns_free_func data_list_trns_free; static trns_proc_func data_list_trns_proc; @@ -116,17 +119,22 @@ cmd_data_list (void) struct data_list_pgm *dls; int table = -1; /* Print table if nonzero, -1=undecided. */ struct file_handle *fh = fh_inline_file (); + struct pool *tmp_pool; + bool ok; if (!in_input_program ()) discard_variables (); - dls = xmalloc (sizeof *dls); + dls = pool_create_container (struct data_list_pgm, pool); + ll_init (&dls->specs); dls->reader = NULL; dls->type = -1; dls->end = NULL; - dls->rec_cnt = 0; + dls->record_cnt = 0; ds_init_empty (&dls->delims); - dls->first = dls->last = NULL; + ds_register_pool (&dls->delims, dls->pool); + + tmp_pool = pool_create_subpool (dls->pool); while (token != '/') { @@ -143,7 +151,7 @@ cmd_data_list (void) lex_match ('('); if (!lex_force_int ()) goto error; - dls->rec_cnt = lex_integer (); + dls->record_cnt = lex_integer (); lex_get (); lex_match (')'); } @@ -204,7 +212,7 @@ cmd_data_list (void) else if (token == T_STRING && ds_length (&tokstr) == 1) { delim = ds_first (&tokstr); - lex_get(); + lex_get (); } else { @@ -226,32 +234,26 @@ cmd_data_list (void) } } - dls->case_size = dict_get_case_size (default_dict); fh_set_default_handle (fh); if (dls->type == -1) dls->type = DLS_FIXED; if (table == -1) - { - if (dls->type == DLS_FREE) - table = 0; - else - table = 1; - } + table = dls->type != DLS_FREE; - if (dls->type == DLS_FIXED) - { - if (!parse_fixed (dls)) - goto error; - if (table) - dump_fixed_table (dls->first, fh, dls->rec_cnt); - } - else + ok = (dls->type == DLS_FIXED ? parse_fixed : parse_free) (tmp_pool, dls); + if (!ok) + goto error; + + if (lex_end_of_command () != CMD_SUCCESS) + goto error; + + if (table) { - if (!parse_free (&dls->first, &dls->last)) - goto error; - if (table) + if (dls->type == DLS_FIXED) + dump_fixed_table (&dls->specs, fh, dls->record_cnt); + else dump_free_table (dls, fh); } @@ -264,537 +266,172 @@ cmd_data_list (void) else proc_set_source (create_case_source (&data_list_source_class, dls)); + pool_destroy (tmp_pool); + return CMD_SUCCESS; error: data_list_trns_free (dls); return CMD_CASCADING_FAILURE; } - -/* Adds SPEC to the linked list with head at FIRST and tail at - LAST. */ -static void -append_var_spec (struct dls_var_spec **first, struct dls_var_spec **last, - struct dls_var_spec *spec) -{ - spec->next = NULL; - - if (*first == NULL) - *first = spec; - else - (*last)->next = spec; - *last = spec; -} /* Fixed-format parsing. */ -/* Used for chaining together fortran-like format specifiers. */ -struct fmt_list - { - struct fmt_list *next; - int count; - struct fmt_spec f; - struct fmt_list *down; - }; - -/* State of parsing DATA LIST. */ -struct fixed_parsing_state - { - char **name; /* Variable names. */ - size_t name_cnt; /* Number of names. */ - - int recno; /* Index of current record. */ - int sc; /* 1-based column number of starting column for - next field to output. */ - }; - -static int fixed_parse_compatible (struct fixed_parsing_state *, - struct dls_var_spec **, - struct dls_var_spec **); -static int fixed_parse_fortran (struct fixed_parsing_state *, - struct dls_var_spec **, - struct dls_var_spec **); - /* Parses all the variable specifications for DATA LIST FIXED, - storing them into DLS. Returns nonzero if successful. */ -static int -parse_fixed (struct data_list_pgm *dls) + storing them into DLS. Uses TMP_POOL for data that is not + needed once parsing is complete. Returns true only if + successful. */ +static bool +parse_fixed (struct pool *tmp_pool, struct data_list_pgm *dls) { - struct fixed_parsing_state fx; - size_t i; - - fx.recno = 0; - fx.sc = 1; + int last_nonempty_record; + int record = 0; + int column = 1; while (token != '.') { - while (lex_match ('/')) - { - fx.recno++; - if (lex_is_integer ()) - { - if (lex_integer () < fx.recno) - { - msg (SE, _("The record number specified, %ld, is " - "before the previous record, %d. Data " - "fields must be listed in order of " - "increasing record number."), - lex_integer (), fx.recno - 1); - return 0; - } - - fx.recno = lex_integer (); - lex_get (); - } - fx.sc = 1; - } - - if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE)) - return 0; - - if (lex_is_number ()) - { - if (!fixed_parse_compatible (&fx, &dls->first, &dls->last)) - goto fail; - } - else if (token == '(') - { - if (!fixed_parse_fortran (&fx, &dls->first, &dls->last)) - goto fail; - } - else - { - msg (SE, _("SPSS-like or FORTRAN-like format " - "specification expected after variable names.")); - goto fail; - } + char **names; + size_t name_cnt, name_idx; + struct fmt_spec *formats, *f; + size_t format_cnt; + + /* Parse everything. */ + if (!parse_record_placement (&record, &column) + || !parse_DATA_LIST_vars_pool (tmp_pool, &names, &name_cnt, PV_NONE) + || !parse_var_placements (tmp_pool, name_cnt, &formats, &format_cnt)) + return false; - for (i = 0; i < fx.name_cnt; i++) - free (fx.name[i]); - free (fx.name); + /* Create variables and var specs. */ + name_idx = 0; + for (f = formats; f < &formats[format_cnt]; f++) + if (!execute_placement_format (f, &record, &column)) + { + char *name; + int width; + struct variable *v; + struct dls_var_spec *spec; + + name = names[name_idx++]; + + /* Create variable. */ + width = get_format_var_width (f); + v = dict_create_var (default_dict, name, width); + if (v != NULL) + { + /* Success. */ + struct fmt_spec output; + convert_fmt_ItoO (f, &output); + v->print = output; + v->write = output; + } + else + { + /* Failure. + This can be acceptable if we're in INPUT + PROGRAM, but only if the existing variable has + the same width as the one we would have + created. */ + if (!in_input_program ()) + { + msg (SE, _("%s is a duplicate variable name."), name); + return false; + } + + v = dict_lookup_var_assert (default_dict, name); + if ((width != 0) != (v->width != 0)) + { + msg (SE, _("There is already a variable %s of a " + "different type."), + name); + return false; + } + if (width != 0 && width != v->width) + { + msg (SE, _("There is already a string variable %s of a " + "different width."), name); + return false; + } + } + + /* Create specifier for parsing the variable. */ + spec = pool_alloc (dls->pool, sizeof *spec); + spec->input = *f; + spec->fv = v->fv; + spec->record = record; + spec->first_column = column; + strcpy (spec->name, v->name); + ll_push_tail (&dls->specs, &spec->ll); + + column += f->w; + } + assert (name_idx == name_cnt); } - if (dls->first == NULL) + if (ll_is_empty (&dls->specs)) { msg (SE, _("At least one variable must be specified.")); - return 0; + return false; } - if (dls->rec_cnt && dls->last->rec > dls->rec_cnt) + + last_nonempty_record = ll_to_dls_var_spec (ll_tail (&dls->specs))->record; + if (dls->record_cnt && last_nonempty_record > dls->record_cnt) { msg (SE, _("Variables are specified on records that " "should not exist according to RECORDS subcommand.")); - return 0; - } - else if (!dls->rec_cnt) - dls->rec_cnt = dls->last->rec; - return lex_end_of_command () == CMD_SUCCESS; - -fail: - for (i = 0; i < fx.name_cnt; i++) - free (fx.name[i]); - free (fx.name); - return 0; -} - -/* Parses a variable specification in the form 1-10 (A) based on - FX and adds specifications to the linked list with head at - FIRST and tail at LAST. */ -static int -fixed_parse_compatible (struct fixed_parsing_state *fx, - struct dls_var_spec **first, struct dls_var_spec **last) -{ - struct fmt_spec input; - int fc, lc; - int width; - int i; - - /* First column. */ - if (!lex_force_int ()) - return 0; - fc = lex_integer (); - if (fc < 1) - { - msg (SE, _("Column positions for fields must be positive.")); - return 0; - } - lex_get (); - - /* Last column. */ - lex_negative_to_dash (); - if (lex_match ('-')) - { - if (!lex_force_int ()) - return 0; - lc = lex_integer (); - if (lc < 1) - { - msg (SE, _("Column positions for fields must be positive.")); - return 0; - } - else if (lc < fc) - { - msg (SE, _("The ending column for a field must be " - "greater than the starting column.")); - return 0; - } - - lex_get (); - } - else - lc = fc; - - /* Divide columns evenly. */ - input.w = (lc - fc + 1) / fx->name_cnt; - if ((lc - fc + 1) % fx->name_cnt) - { - msg (SE, _("The %d columns %d-%d " - "can't be evenly divided into %d fields."), - lc - fc + 1, fc, lc, fx->name_cnt); - return 0; - } - - /* Format specifier. */ - if (lex_match ('(')) - { - struct fmt_desc *fdp; - - if (token == T_ID) - { - const char *cp; - - input.type = parse_format_specifier_name (&cp, 0); - if (input.type == -1) - return 0; - if (*cp) - { - msg (SE, _("A format specifier on this line " - "has extra characters on the end.")); - return 0; - } - - lex_get (); - lex_match (','); - } - else - input.type = FMT_F; - - if (lex_is_integer ()) - { - if (lex_integer () < 1) - { - msg (SE, _("The value for number of decimal places " - "must be at least 1.")); - return 0; - } - - input.d = lex_integer (); - lex_get (); - } - else - input.d = 0; - - fdp = &formats[input.type]; - if (fdp->n_args < 2 && input.d) - { - msg (SE, _("Input format %s doesn't accept decimal places."), - fdp->name); - return 0; - } - - if (input.d > 16) - input.d = 16; - - if (!lex_force_match (')')) - return 0; + return false; } - else - { - input.type = FMT_F; - input.d = 0; - } - if (!check_input_specifier (&input, 1)) - return 0; - - /* Start column for next specification. */ - fx->sc = lc + 1; - - /* Width of variables to create. */ - if (input.type == FMT_A || input.type == FMT_AHEX) - width = input.w; - else - width = 0; - - /* Create variables and var specs. */ - for (i = 0; i < fx->name_cnt; i++) - { - struct dls_var_spec *spec; - struct variable *v; - - v = dict_create_var (default_dict, fx->name[i], width); - if (v != NULL) - { - convert_fmt_ItoO (&input, &v->print); - v->write = v->print; - } - else - { - v = dict_lookup_var_assert (default_dict, fx->name[i]); - if (!in_input_program ()) - { - msg (SE, _("%s is a duplicate variable name."), fx->name[i]); - return 0; - } - if ((width != 0) != (v->width != 0)) - { - msg (SE, _("There is already a variable %s of a " - "different type."), - fx->name[i]); - return 0; - } - if (width != 0 && width != v->width) - { - msg (SE, _("There is already a string variable %s of a " - "different width."), fx->name[i]); - return 0; - } - } - - spec = xmalloc (sizeof *spec); - spec->input = input; - spec->v = v; - spec->fv = v->fv; - spec->rec = fx->recno; - spec->fc = fc + input.w * i; - spec->lc = spec->fc + input.w - 1; - append_var_spec (first, last, spec); - } - return 1; -} - -/* Destroy format list F and, if RECURSE is nonzero, all its - sublists. */ -static void -destroy_fmt_list (struct fmt_list *f, int recurse) -{ - struct fmt_list *next; - - for (; f; f = next) - { - next = f->next; - if (recurse && f->f.type == FMT_DESCEND) - destroy_fmt_list (f->down, 1); - free (f); - } -} - -/* Takes a hierarchically structured fmt_list F as constructed by - fixed_parse_fortran(), and flattens it, adding the variable - specifications to the linked list with head FIRST and tail - LAST. NAME_IDX is used to take values from the list of names - in FX; it should initially point to a value of 0. */ -static int -dump_fmt_list (struct fixed_parsing_state *fx, struct fmt_list *f, - struct dls_var_spec **first, struct dls_var_spec **last, - int *name_idx) -{ - int i; - - for (; f; f = f->next) - if (f->f.type == FMT_X) - fx->sc += f->count; - else if (f->f.type == FMT_T) - fx->sc = f->f.w; - else if (f->f.type == FMT_NEWREC) - { - fx->recno += f->count; - fx->sc = 1; - } - else - for (i = 0; i < f->count; i++) - if (f->f.type == FMT_DESCEND) - { - if (!dump_fmt_list (fx, f->down, first, last, name_idx)) - return 0; - } - else - { - struct dls_var_spec *spec; - int width; - struct variable *v; - - if (formats[f->f.type].cat & FCAT_STRING) - width = f->f.w; - else - width = 0; - if (*name_idx >= fx->name_cnt) - { - msg (SE, _("The number of format " - "specifications exceeds the given number of " - "variable names.")); - return 0; - } - - v = dict_create_var (default_dict, fx->name[(*name_idx)++], width); - if (!v) - { - msg (SE, _("%s is a duplicate variable name."), fx->name[i]); - return 0; - } - - spec = xmalloc (sizeof *spec); - spec->v = v; - spec->input = f->f; - spec->fv = v->fv; - spec->rec = fx->recno; - spec->fc = fx->sc; - spec->lc = fx->sc + f->f.w - 1; - append_var_spec (first, last, spec); - - convert_fmt_ItoO (&spec->input, &v->print); - v->write = v->print; - - fx->sc += f->f.w; - } - return 1; -} - -/* Recursively parses a FORTRAN-like format specification into - the linked list with head FIRST and tail TAIL. LEVEL is the - level of recursion, starting from 0. Returns the parsed - specification if successful, or a null pointer on failure. */ -static struct fmt_list * -fixed_parse_fortran_internal (struct fixed_parsing_state *fx, - struct dls_var_spec **first, - struct dls_var_spec **last) -{ - struct fmt_list *head = NULL; - struct fmt_list *tail = NULL; - - lex_force_match ('('); - while (token != ')') - { - /* New fmt_list. */ - struct fmt_list *new = xmalloc (sizeof *new); - new->next = NULL; - - /* Append new to list. */ - if (head != NULL) - tail->next = new; - else - head = new; - tail = new; - - /* Parse count. */ - if (lex_is_integer ()) - { - new->count = lex_integer (); - lex_get (); - } - else - new->count = 1; + else if (!dls->record_cnt) + dls->record_cnt = last_nonempty_record; - /* Parse format specifier. */ - if (token == '(') - { - new->f.type = FMT_DESCEND; - new->down = fixed_parse_fortran_internal (fx, first, last); - if (new->down == NULL) - goto fail; - } - else if (lex_match ('/')) - new->f.type = FMT_NEWREC; - else if (!parse_format_specifier (&new->f, FMTP_ALLOW_XT) - || !check_input_specifier (&new->f, 1)) - goto fail; - - lex_match (','); - } - lex_force_match (')'); - - return head; - -fail: - destroy_fmt_list (head, 0); - - return NULL; -} - -/* Parses a FORTRAN-like format specification into the linked - list with head FIRST and tail LAST. Returns nonzero if - successful. */ -static int -fixed_parse_fortran (struct fixed_parsing_state *fx, - struct dls_var_spec **first, struct dls_var_spec **last) -{ - struct fmt_list *list; - int name_idx; - - list = fixed_parse_fortran_internal (fx, first, last); - if (list == NULL) - return 0; - - name_idx = 0; - dump_fmt_list (fx, list, first, last, &name_idx); - destroy_fmt_list (list, 1); - if (name_idx < fx->name_cnt) - { - msg (SE, _("There aren't enough format specifications " - "to match the number of variable names given.")); - return 0; - } - - return 1; + return true; } /* Displays a table giving information on fixed-format variable parsing on DATA LIST. */ -/* FIXME: The `Columns' column should be divided into three columns, - one for the starting column, one for the dash, one for the ending - column; then right-justify the starting column and left-justify the - ending column. */ static void -dump_fixed_table (const struct dls_var_spec *specs, - const struct file_handle *fh, int rec_cnt) +dump_fixed_table (const struct ll_list *specs, + const struct file_handle *fh, int record_cnt) { - const struct dls_var_spec *spec; + size_t spec_cnt; struct tab_table *t; - int i; + struct dls_var_spec *spec; + int row; - for (i = 0, spec = specs; spec; spec = spec->next) - i++; - t = tab_create (4, i + 1, 0); + spec_cnt = ll_count (specs); + t = tab_create (4, spec_cnt + 1, 0); tab_columns (t, TAB_COL_DOWN, 1); tab_headers (t, 0, 0, 1, 0); tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable")); tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record")); tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns")); tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format")); - tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, i); + tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, spec_cnt); tab_hline (t, TAL_2, 0, 3, 1); tab_dim (t, tab_natural_dimensions); - for (i = 1, spec = specs; spec; spec = spec->next, i++) + row = 1; + ll_for_each (spec, struct dls_var_spec, ll, specs) { - tab_text (t, 0, i, TAB_LEFT, spec->v->name); - tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec); - tab_text (t, 2, i, TAT_PRINTF, "%3d-%3d", - spec->fc, spec->lc); - tab_text (t, 3, i, TAB_LEFT | TAB_FIX, - fmt_to_string (&spec->input)); + tab_text (t, 0, row, TAB_LEFT, spec->name); + tab_text (t, 1, row, TAT_PRINTF, "%d", spec->record); + tab_text (t, 2, row, TAT_PRINTF, "%3d-%3d", + spec->first_column, spec->first_column + spec->input.w - 1); + tab_text (t, 3, row, TAB_LEFT | TAB_FIX, fmt_to_string (&spec->input)); + row++; } tab_title (t, ngettext ("Reading %d record from %s.", - "Reading %d records from %s.", rec_cnt), - rec_cnt, fh_get_name (fh)); + "Reading %d records from %s.", record_cnt), + record_cnt, fh_get_name (fh)); tab_submit (t); } /* Free-format parsing. */ /* Parses variable specifications for DATA LIST FREE and adds - them to the linked list with head FIRST and tail LAST. - Returns nonzero only if successful. */ -static int -parse_free (struct dls_var_spec **first, struct dls_var_spec **last) + them to DLS. Uses TMP_POOL for data that is not needed once + parsing is complete. Returns true only if successful. */ +static bool +parse_free (struct pool *tmp_pool, struct data_list_pgm *dls) { lex_get (); while (token != '.') @@ -802,23 +439,17 @@ parse_free (struct dls_var_spec **first, struct dls_var_spec **last) struct fmt_spec input, output; char **name; size_t name_cnt; - int width; size_t i; - if (!parse_DATA_LIST_vars (&name, &name_cnt, PV_NONE)) + if (!parse_DATA_LIST_vars_pool (tmp_pool, &name, &name_cnt, PV_NONE)) return 0; if (lex_match ('(')) { - if (!parse_format_specifier (&input, 0) + if (!parse_format_specifier (&input) || !check_input_specifier (&input, 1) || !lex_force_match (')')) - { - for (i = 0; i < name_cnt; i++) - free (name[i]); - free (name); - return 0; - } + return NULL; convert_fmt_ItoO (&input, &output); } else @@ -828,37 +459,29 @@ parse_free (struct dls_var_spec **first, struct dls_var_spec **last) output = *get_format (); } - if (input.type == FMT_A || input.type == FMT_AHEX) - width = input.w; - else - width = 0; for (i = 0; i < name_cnt; i++) { struct dls_var_spec *spec; struct variable *v; - v = dict_create_var (default_dict, name[i], width); - - if (!v) + v = dict_create_var (default_dict, name[i], + get_format_var_width (&input)); + if (v == NULL) { msg (SE, _("%s is a duplicate variable name."), name[i]); return 0; } v->print = v->write = output; - spec = xmalloc (sizeof *spec); + spec = pool_alloc (dls->pool, sizeof *spec); spec->input = input; - spec->v = v; spec->fv = v->fv; - str_copy_trunc (spec->name, sizeof spec->name, v->name); - append_var_spec (first, last, spec); + strcpy (spec->name, v->name); + ll_push_tail (&dls->specs, &spec->ll); } - for (i = 0; i < name_cnt; i++) - free (name[i]); - free (name); } - return lex_end_of_command () == CMD_SUCCESS; + return true; } /* Displays a table giving information on free-format variable parsing @@ -868,32 +491,28 @@ dump_free_table (const struct data_list_pgm *dls, const struct file_handle *fh) { struct tab_table *t; - int i; - - { - struct dls_var_spec *spec; - for (i = 0, spec = dls->first; spec; spec = spec->next) - i++; - } + struct dls_var_spec *spec; + size_t spec_cnt; + int row; + + spec_cnt = ll_count (&dls->specs); - t = tab_create (2, i + 1, 0); + t = tab_create (2, spec_cnt + 1, 0); tab_columns (t, TAB_COL_DOWN, 1); tab_headers (t, 0, 0, 1, 0); tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable")); tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format")); - tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, i); + tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, spec_cnt); tab_hline (t, TAL_2, 0, 1, 1); tab_dim (t, tab_natural_dimensions); - - { - struct dls_var_spec *spec; - - for (i = 1, spec = dls->first; spec; spec = spec->next, i++) - { - tab_text (t, 0, i, TAB_LEFT, spec->v->name); - tab_text (t, 1, i, TAB_LEFT | TAB_FIX, fmt_to_string (&spec->input)); - } - } + + row = 1; + ll_for_each (spec, struct dls_var_spec, ll, &dls->specs) + { + tab_text (t, 0, row, TAB_LEFT, spec->name); + tab_text (t, 1, row, TAB_LEFT | TAB_FIX, fmt_to_string (&spec->input)); + row++; + } tab_title (t, _("Reading free-form data from %s."), fh_get_name (fh)); @@ -1013,38 +632,40 @@ read_from_data_list (const struct data_list_pgm *dls, struct ccase *c) static bool read_from_data_list_fixed (const struct data_list_pgm *dls, struct ccase *c) { - struct dls_var_spec *var_spec = dls->first; - int i; + struct dls_var_spec *spec; + int row; - if (dfm_eof (dls->reader)) - return false; - for (i = 1; i <= dls->rec_cnt; i++) + if (dfm_eof (dls->reader)) + return false; + + spec = ll_to_dls_var_spec (ll_head (&dls->specs)); + for (row = 1; row <= dls->record_cnt; row++) { struct substring line; - + if (dfm_eof (dls->reader)) - { - /* Note that this can't occur on the first record. */ - msg (SW, _("Partial case of %d of %d records discarded."), - i - 1, dls->rec_cnt); - return false; - } + { + msg (SW, _("Partial case of %d of %d records discarded."), + row - 1, dls->record_cnt); + return false; + } dfm_expand_tabs (dls->reader); line = dfm_get_record (dls->reader); - for (; var_spec && i == var_spec->rec; var_spec = var_spec->next) - { - struct data_in di; + ll_for_each_continue (spec, struct dls_var_spec, ll, &dls->specs) + { + struct data_in di; - data_in_finite_line (&di, ss_data (line), ss_length (line), - var_spec->fc, var_spec->lc); - di.v = case_data_rw (c, var_spec->fv); - di.flags = DI_IMPLIED_DECIMALS; - di.f1 = var_spec->fc; - di.format = var_spec->input; + data_in_finite_line (&di, ss_data (line), ss_length (line), + spec->first_column, + spec->first_column + spec->input.w - 1); + di.v = case_data_rw (c, spec->fv); + di.flags = DI_IMPLIED_DECIMALS; + di.f1 = spec->first_column; + di.format = spec->input; - data_in (&di); - } + data_in (&di); + } dfm_forward_record (dls->reader); } @@ -1058,9 +679,9 @@ read_from_data_list_fixed (const struct data_list_pgm *dls, struct ccase *c) static bool read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c) { - struct dls_var_spec *var_spec; + struct dls_var_spec *spec; - for (var_spec = dls->first; var_spec; var_spec = var_spec->next) + ll_for_each (spec, struct dls_var_spec, ll, &dls->specs) { struct substring field; struct data_in di; @@ -1072,19 +693,19 @@ read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c) dfm_forward_record (dls->reader); if (dfm_eof (dls->reader)) { - if (var_spec != dls->first) + if (&spec->ll != ll_head (&dls->specs)) msg (SW, _("Partial case discarded. The first variable " - "missing was %s."), var_spec->name); + "missing was %s."), spec->name); return false; } } di.s = ss_data (field); di.e = ss_end (field); - di.v = case_data_rw (c, var_spec->fv); + di.v = case_data_rw (c, spec->fv); di.flags = 0; di.f1 = dfm_get_column (dls->reader, ss_data (field)); - di.format = var_spec->input; + di.format = spec->input; data_in (&di); } return true; @@ -1096,12 +717,12 @@ read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c) static bool read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c) { - struct dls_var_spec *var_spec; + struct dls_var_spec *spec; if (dfm_eof (dls->reader)) return false; - for (var_spec = dls->first; var_spec; var_spec = var_spec->next) + ll_for_each (spec, struct dls_var_spec, ll, &dls->specs) { struct substring field; struct data_in di; @@ -1112,24 +733,24 @@ read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c) msg (SW, _("Missing value(s) for all variables from %s onward. " "These will be filled with the system-missing value " "or blanks, as appropriate."), - var_spec->name); - for (; var_spec; var_spec = var_spec->next) + spec->name); + ll_for_each_continue (spec, struct dls_var_spec, ll, &dls->specs) { - int width = get_format_var_width (&var_spec->input); + int width = get_format_var_width (&spec->input); if (width == 0) - case_data_rw (c, var_spec->fv)->f = SYSMIS; + case_data_rw (c, spec->fv)->f = SYSMIS; else - memset (case_data_rw (c, var_spec->fv)->s, ' ', width); + memset (case_data_rw (c, spec->fv)->s, ' ', width); } break; } di.s = ss_data (field); di.e = ss_end (field); - di.v = case_data_rw (c, var_spec->fv); + di.v = case_data_rw (c, spec->fv); di.flags = 0; di.f1 = dfm_get_column (dls->reader, ss_data (field)); - di.format = var_spec->input; + di.format = spec->input; data_in (&di); } @@ -1137,30 +758,14 @@ read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c) return true; } -/* Destroys SPEC. */ -static void -destroy_dls_var_spec (struct dls_var_spec *spec) -{ - struct dls_var_spec *next; - - while (spec != NULL) - { - next = spec->next; - free (spec); - spec = next; - } -} - /* Destroys DATA LIST transformation DLS. Returns true if successful, false if an I/O error occurred. */ static bool data_list_trns_free (void *dls_) { struct data_list_pgm *dls = dls_; - ds_destroy (&dls->delims); - destroy_dls_var_spec (dls->first); dfm_close_reader (dls->reader); - free (dls); + pool_destroy (dls->pool); return true; } diff --git a/src/language/data-io/placement-parser.c b/src/language/data-io/placement-parser.c new file mode 100644 index 00000000..b0c32936 --- /dev/null +++ b/src/language/data-io/placement-parser.c @@ -0,0 +1,381 @@ +/* PSPP - computes sample statistics. + Copyright (C) 2006 Free Software Foundation, Inc. + Written by Ben Pfaff . + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +#include + +#include + +#include + +#include +#include +#include +#include +#include + +#include "xalloc.h" +#include "xsize.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) + +/* Extensions to the format specifiers used only for + placement. */ +enum + { + PRS_TYPE_T = -1, /* Tab to absolute column. */ + PRS_TYPE_X = -2, /* Skip columns. */ + PRS_TYPE_NEW_REC = -3 /* Next record. */ + }; + +static bool fixed_parse_columns (struct pool *, size_t var_cnt, + struct fmt_spec **, size_t *); +static bool fixed_parse_fortran (struct pool *, + struct fmt_spec **, size_t *); + +/* Parses Fortran-like or column-based specifications for placing + variable data in fixed positions in columns and rows, that is, + formats like those parsed by DATA LIST or PRINT. Returns true + only if successful. + + If successful, formats for VAR_CNT variables are stored in + *FORMATS, and the number of formats required is stored in + *FORMAT_CNT. *FORMAT_CNT may be greater than VAR_CNT because + of T, X, and / "formats", but success guarantees that exactly + VAR_CNT variables will be placed by the output formats. The + caller should call execute_placement_format to process those + "formats" in interpreting the output. + + Uses POOL for allocation. When the caller is finished + interpreting *FORMATS, POOL may be destroyed. */ +bool +parse_var_placements (struct pool *pool, size_t var_cnt, + struct fmt_spec **formats, size_t *format_cnt) +{ + assert (var_cnt > 0); + if (lex_is_number ()) + return fixed_parse_columns (pool, var_cnt, formats, format_cnt); + else if (lex_match ('(')) + { + size_t assignment_cnt; + size_t i; + + if (!fixed_parse_fortran (pool, formats, format_cnt)) + return false; + + assignment_cnt = 0; + for (i = 0; i < *format_cnt; i++) + assignment_cnt += (*formats)[i].type >= 0; + + if (assignment_cnt != var_cnt) + { + msg (SE, _("Number of variables specified (%d) " + "differs from number of variable formats (%d)."), + (int) var_cnt, (int) assignment_cnt); + return false; + } + + return true; + } + else + { + msg (SE, _("SPSS-like or Fortran-like format " + "specification expected after variable names.")); + return false; + } +} + +/* Implements parse_var_placements for column-based formats. */ +static bool +fixed_parse_columns (struct pool *pool, size_t var_cnt, + struct fmt_spec **formats, size_t *format_cnt) +{ + struct fmt_spec format; + int fc, lc; + size_t i; + + if (!parse_column_range (&fc, &lc, NULL)) + return false; + + /* Divide columns evenly. */ + format.w = (lc - fc + 1) / var_cnt; + if ((lc - fc + 1) % var_cnt) + { + msg (SE, _("The %d columns %d-%d " + "can't be evenly divided into %d fields."), + lc - fc + 1, fc, lc, var_cnt); + return false; + } + + /* Format specifier. */ + if (lex_match ('(')) + { + /* Get format type. */ + if (token == T_ID) + { + if (!parse_format_specifier_name (&format.type)) + return false; + lex_match (','); + } + else + format.type = FMT_F; + + /* Get decimal places. */ + if (lex_is_integer ()) + { + format.d = lex_integer (); + lex_get (); + } + else + format.d = 0; + + if (!lex_force_match (')')) + return false; + } + else + { + format.type = FMT_F; + format.d = 0; + } + if (!check_input_specifier (&format, 1)) + return false; + + *formats = pool_nalloc (pool, var_cnt + 1, sizeof **formats); + *format_cnt = var_cnt + 1; + (*formats)[0].type = PRS_TYPE_T; + (*formats)[0].w = fc; + for (i = 1; i <= var_cnt; i++) + (*formats)[i] = format; + return true; +} + +/* Implements parse_var_placements for Fortran-like formats. */ +static bool +fixed_parse_fortran (struct pool *pool, + struct fmt_spec **formats, size_t *format_cnt) +{ + size_t formats_allocated = 0; + size_t formats_used = 0; + + *formats = NULL; + while (!lex_match (')')) + { + struct fmt_spec f; + struct fmt_spec *new_formats; + size_t new_format_cnt; + size_t count; + size_t formats_needed; + + /* Parse count. */ + if (lex_is_integer ()) + { + count = lex_integer (); + lex_get (); + } + else + count = 1; + + /* Parse format specifier. */ + if (lex_match ('(')) + { + /* Call ourselves recursively to handle parentheses. */ + if (!fixed_parse_fortran (pool, &new_formats, &new_format_cnt)) + return false; + } + else + { + new_formats = &f; + new_format_cnt = 1; + if (lex_match ('/')) + f.type = PRS_TYPE_NEW_REC; + else + { + char type[FMT_TYPE_LEN_MAX + 1]; + + if (!parse_abstract_format_specifier (type, &f.w, &f.d)) + return false; + + if (!strcasecmp (type, "T")) + f.type = PRS_TYPE_T; + else if (!strcasecmp (type, "X")) + { + f.type = PRS_TYPE_X; + f.w = count; + count = 1; + } + else + { + if (!fmt_type_from_string (type, &f.type)) + { + msg (SE, _("Unknown format type \"%s\"."), type); + return false; + } + if (!check_input_specifier (&f, 1)) + return false; + } + } + } + + /* Add COUNT copies of the NEW_FORMAT_CNT formats in + NEW_FORMATS to FORMATS. */ + if (new_format_cnt != 0 + && size_overflow_p (xtimes (xsum (formats_used, + xtimes (count, new_format_cnt)), + sizeof *formats))) + xalloc_die (); + formats_needed = count * new_format_cnt; + if (formats_used + formats_needed > formats_allocated) + { + formats_allocated = formats_used + formats_needed; + *formats = pool_2nrealloc (pool, *formats, &formats_allocated, + sizeof **formats); + } + for (; count > 0; count--) + { + memcpy (&(*formats)[formats_used], new_formats, + sizeof **formats * new_format_cnt); + formats_used += new_format_cnt; + } + + lex_match (','); + } + + *format_cnt = formats_used; + return true; +} + +/* Checks whether FORMAT represents one of the special "formats" + for T, X, or /. If so, updates *RECORD or *COLUMN (or both) + as appropriate, and returns true. Otherwise, returns false + without any side effects. */ +bool +execute_placement_format (const struct fmt_spec *format, + int *record, int *column) +{ + switch (format->type) + { + case PRS_TYPE_X: + *column += format->w; + return true; + + case PRS_TYPE_T: + *column = format->w; + return true; + + case PRS_TYPE_NEW_REC: + (*record)++; + *column = 1; + return true; + + default: + assert (format->type >= 0 && format->type < FMT_NUMBER_OF_FORMATS); + return false; + } +} + +/* Parse a column or a range of columns, specified as a single + integer or two integer delimited by a dash. Stores the range + in *FIRST_COLUMN and *LAST_COLUMN. (If only a single integer + is given, it is stored in both.) If RANGE_SPECIFIED is + non-null, then *RANGE_SPECIFIED is set to true if the syntax + contained a dash, false otherwise. Returns true if + successful, false if the syntax was invalid or the values + specified did not make sense. */ +bool +parse_column_range (int *first_column, int *last_column, + bool *range_specified) +{ + /* First column. */ + if (!lex_force_int ()) + return false; + *first_column = lex_integer (); + if (*first_column < 1) + { + msg (SE, _("Column positions for fields must be positive.")); + return false; + } + lex_get (); + + /* Last column. */ + lex_negative_to_dash (); + if (lex_match ('-')) + { + if (!lex_force_int ()) + return false; + *last_column = lex_integer (); + if (*last_column < 1) + { + msg (SE, _("Column positions for fields must be positive.")); + return false; + } + else if (*last_column < *first_column) + { + msg (SE, _("The ending column for a field must be " + "greater than the starting column.")); + return false; + } + + if (range_specified) + *range_specified = true; + lex_get (); + } + else + { + *last_column = *first_column; + if (range_specified) + *range_specified = false; + } + + return true; +} + +/* Parses a (possibly empty) sequence of slashes, each of which + may be followed by an integer. A slash on its own increases + *RECORD by 1 and sets *COLUMN to 1. A slash followed by an + integer sets *RECORD to the integer, as long as that increases + *RECORD, and sets *COLUMN to 1. + + Returns true if successful, false on syntax error. */ +bool +parse_record_placement (int *record, int *column) +{ + while (lex_match ('/')) + { + if (lex_is_integer ()) + { + if (lex_integer () <= *record) + { + msg (SE, _("The record number specified, %ld, is at or " + "before the previous record, %d. Data " + "fields must be listed in order of " + "increasing record number."), + lex_integer (), *record); + return false; + } + *record = lex_integer (); + lex_get (); + } + else + (*record)++; + *column = 1; + } + assert (*record >= 1); + + return true; +} diff --git a/src/language/data-io/placement-parser.h b/src/language/data-io/placement-parser.h new file mode 100644 index 00000000..bb59a9ce --- /dev/null +++ b/src/language/data-io/placement-parser.h @@ -0,0 +1,37 @@ +/* PSPP - computes sample statistics. + Copyright (C) 2006 Free Software Foundation, Inc. + Written by Ben Pfaff . + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +#ifndef LANGUAGE_DATA_IO_PLACEMENT_PARSER_H +#define LANGUAGE_DATA_IO_PLACEMENT_PARSER_H 1 + +#include +#include + +struct fmt_spec; +struct pool; + +bool parse_record_placement (int *record, int *column); +bool parse_var_placements (struct pool *, size_t var_cnt, + struct fmt_spec **, size_t *format_cnt); +bool execute_placement_format (const struct fmt_spec *, + int *record, int *column); +bool parse_column_range (int *first_column, int *last_column, + bool *range_specified); + +#endif /* language/data-io/placement-parser.h */ diff --git a/src/language/data-io/print-space.c b/src/language/data-io/print-space.c new file mode 100644 index 00000000..1388f297 --- /dev/null +++ b/src/language/data-io/print-space.c @@ -0,0 +1,145 @@ +/* PSPP - computes sample statistics. + Copyright (C) 2006 Free Software Foundation, Inc. + Written by Ben Pfaff . + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "xalloc.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) + +/* PRINT SPACE transformation. */ +struct print_space_trns + { + struct dfm_writer *writer; /* Output data file. */ + struct expression *expr; /* Number of lines; NULL means 1. */ + } +print_space_trns; + +static trns_proc_func print_space_trns_proc; +static trns_free_func print_space_trns_free; + +int +cmd_print_space (void) +{ + struct print_space_trns *trns; + struct file_handle *handle; + struct expression *expr; + struct dfm_writer *writer; + + if (lex_match_id ("OUTFILE")) + { + lex_match ('='); + + handle = fh_parse (FH_REF_FILE); + if (handle == NULL) + return CMD_FAILURE; + lex_get (); + } + else + handle = NULL; + + if (token != '.') + { + expr = expr_parse (default_dict, EXPR_NUMBER); + if (token != '.') + { + expr_free (expr); + lex_error (_("expecting end of command")); + return CMD_FAILURE; + } + } + else + expr = NULL; + + if (handle != NULL) + { + writer = dfm_open_writer (handle); + if (writer == NULL) + { + expr_free (expr); + return CMD_FAILURE; + } + } + else + writer = NULL; + + trns = xmalloc (sizeof *trns); + trns->writer = writer; + trns->expr = expr; + + add_transformation (print_space_trns_proc, print_space_trns_free, trns); + return CMD_SUCCESS; +} + +/* Executes a PRINT SPACE transformation. */ +static int +print_space_trns_proc (void *t_, struct ccase *c, + int case_num UNUSED) +{ + struct print_space_trns *trns = t_; + int n; + + n = 1; + if (trns->expr) + { + double f = expr_evaluate_num (trns->expr, c, case_num); + if (f == SYSMIS) + msg (SW, _("The expression on PRINT SPACE evaluated to the " + "system-missing value.")); + else if (f < 0 || f > INT_MAX) + msg (SW, _("The expression on PRINT SPACE evaluated to %g."), f); + else + n = f; + } + + while (n--) + if (trns->writer == NULL) + som_blank_line (); + else + dfm_put_record (trns->writer, "\n", 1); + + if (trns->writer != NULL && dfm_write_error (trns->writer)) + return TRNS_ERROR; + return TRNS_CONTINUE; +} + +/* Frees a PRINT SPACE transformation. + Returns true if successful, false if an I/O error occurred. */ +static bool +print_space_trns_free (void *trns_) +{ + struct print_space_trns *trns = trns_; + bool ok = dfm_close_writer (trns->writer); + expr_free (trns->expr); + free (trns); + return ok; +} diff --git a/src/language/data-io/print.c b/src/language/data-io/print.c index 3c8db1cb..f6ba6e90 100644 --- a/src/language/data-io/print.c +++ b/src/language/data-io/print.c @@ -17,8 +17,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -/* FIXME: seems like a lot of code duplication with data-list.c. */ - #include #include @@ -30,15 +28,17 @@ #include #include #include -#include +#include +#include #include #include #include #include #include -#include +#include #include #include +#include #include #include @@ -46,73 +46,60 @@ #define _(msgid) gettext (msgid) /* Describes what to do when an output field is encountered. */ -enum +enum field_type { - PRT_ERROR, /* Invalid value. */ - PRT_NEWLINE, /* Newline. */ - PRT_CONST, /* Constant string. */ - PRT_VAR, /* Variable. */ - PRT_SPACE /* A single space. */ + PRT_LITERAL, /* Literal string. */ + PRT_VAR /* Variable. */ }; /* Describes how to output one field. */ struct prt_out_spec { - struct prt_out_spec *next; - int type; /* PRT_* constant. */ - int fc; /* 0-based first column. */ - union - { - char *c; /* PRT_CONST: Associated string. */ - struct - { - struct variable *v; /* PRT_VAR: Associated variable. */ - struct fmt_spec f; /* PRT_VAR: Output spec. */ - } - v; - } - u; + /* All fields. */ + struct ll ll; /* In struct print_trns `specs' list. */ + enum field_type type; /* What type of field this is. */ + int record; /* 1-based record number. */ + int first_column; /* 0-based first column. */ + + /* PRT_VAR only. */ + struct variable *var; /* Associated variable. */ + struct fmt_spec format; /* Output spec. */ + bool add_space; /* Add trailing space? */ + + /* PRT_LITERAL only. */ + struct string string; /* String to output. */ }; -/* Enums for use with print_trns's `options' field. */ -enum - { - PRT_CMD_MASK = 1, /* Command type mask. */ - PRT_PRINT = 0, /* PRINT transformation identifier. */ - PRT_WRITE = 1, /* WRITE transformation identifier. */ - PRT_EJECT = 002, /* Can be combined with CMD_PRINT only. */ - PRT_BINARY = 004 /* File is binary, omit newlines. */ - }; +static inline struct prt_out_spec * +ll_to_prt_out_spec (struct ll *ll) +{ + return ll_data (ll, struct prt_out_spec, ll); +} /* PRINT, PRINT EJECT, WRITE private data structure. */ struct print_trns { + struct pool *pool; /* Stores related data. */ + bool eject; /* Eject page before printing? */ + bool omit_new_lines; /* Omit new-line characters? */ struct dfm_writer *writer; /* Output file, NULL=listing file. */ - int options; /* PRT_* bitmapped field. */ - struct prt_out_spec *spec; /* Output specifications. */ - int max_width; /* Maximum line width including null. */ - char *line; /* Buffer for sticking lines in. */ + struct ll_list specs; /* List of struct prt_out_specs. */ + size_t record_cnt; /* Number of records to write. */ + struct string line; /* Output buffer. */ }; -/* PRT_PRINT or PRT_WRITE. */ -int which_cmd; - -/* Holds information on parsing the data file. */ -static struct print_trns prt; - -/* Last prt_out_spec in the chain. Used for building the linked-list. */ -static struct prt_out_spec *next; - -/* Number of records. */ -static int nrec; +enum which_formats + { + PRINT, + WRITE + }; -static int internal_cmd_print (int flags); +static int internal_cmd_print (enum which_formats, bool eject); static trns_proc_func print_trns_proc; static trns_free_func print_trns_free; -static int parse_specs (void); -static void dump_table (const struct file_handle *); -static void append_var_spec (struct prt_out_spec *); -static void alloc_line (void); +static bool parse_specs (struct pool *tmp_pool, struct print_trns *, + enum which_formats); +static void dump_table (struct print_trns *, const struct file_handle *); /* Basic parsing. */ @@ -120,44 +107,45 @@ static void alloc_line (void); int cmd_print (void) { - return internal_cmd_print (PRT_PRINT); + return internal_cmd_print (PRINT, false); } /* Parses PRINT EJECT command. */ int cmd_print_eject (void) { - return internal_cmd_print (PRT_PRINT | PRT_EJECT); + return internal_cmd_print (PRINT, true); } /* Parses WRITE command. */ int cmd_write (void) { - return internal_cmd_print (PRT_WRITE); + return internal_cmd_print (WRITE, false); } -/* Parses the output commands. F is PRT_PRINT, PRT_WRITE, or - PRT_PRINT|PRT_EJECT. */ +/* Parses the output commands. */ static int -internal_cmd_print (int f) +internal_cmd_print (enum which_formats which_formats, bool eject) { - int table = 0; /* Print table? */ - struct print_trns *trns = NULL; /* malloc()'d transformation. */ + bool print_table = 0; + struct print_trns *trns; struct file_handle *fh = NULL; + struct pool *tmp_pool; /* Fill in prt to facilitate error-handling. */ - prt.writer = NULL; - prt.options = f; - prt.spec = NULL; - prt.line = NULL; - next = NULL; - nrec = 0; + trns = pool_create_container (struct print_trns, pool); + trns->eject = eject; + trns->writer = NULL; + trns->record_cnt = 0; + ll_init (&trns->specs); + ds_init_empty (&trns->line); + ds_register_pool (&trns->line, trns->pool); - which_cmd = f & PRT_CMD_MASK; + tmp_pool = pool_create_subpool (trns->pool); /* Parse the command options. */ - while (!lex_match ('/')) + while (token != '/') { if (lex_match_id ("OUTFILE")) { @@ -173,14 +161,14 @@ internal_cmd_print (int f) lex_match ('('); if (!lex_force_int ()) goto error; - nrec = lex_integer (); + trns->record_cnt = lex_integer (); lex_get (); lex_match (')'); } else if (lex_match_id ("TABLE")) - table = 1; + print_table = true; else if (lex_match_id ("NOTABLE")) - table = 0; + print_table = false; else { lex_error (_("expecting a valid subcommand")); @@ -189,616 +177,192 @@ internal_cmd_print (int f) } /* Parse variables and strings. */ - if (!parse_specs ()) + if (!parse_specs (tmp_pool, trns, which_formats)) + goto error; + + if (lex_end_of_command () != CMD_SUCCESS) goto error; if (fh != NULL) { - prt.writer = dfm_open_writer (fh); - if (prt.writer == NULL) + trns->writer = dfm_open_writer (fh); + if (trns->writer == NULL) goto error; - if (fh_get_mode (fh) == FH_MODE_BINARY) - prt.options |= PRT_BINARY; + trns->omit_new_lines = (which_formats == WRITE + && fh_get_mode (fh) == FH_MODE_BINARY); } /* Output the variable table if requested. */ - if (table) - dump_table (fh); - - /* Count the maximum line width. Allocate linebuffer if - applicable. */ - alloc_line (); + if (print_table) + dump_table (trns, fh); /* Put the transformation in the queue. */ - trns = xmalloc (sizeof *trns); - memcpy (trns, &prt, sizeof *trns); add_transformation (print_trns_proc, print_trns_free, trns); + pool_destroy (tmp_pool); + return CMD_SUCCESS; error: - print_trns_free (&prt); + print_trns_free (trns); return CMD_FAILURE; } - -/* Appends the field output specification SPEC to the list maintained - in prt. */ -static void -append_var_spec (struct prt_out_spec *spec) -{ - if (next == 0) - prt.spec = next = xmalloc (sizeof *spec); - else - next = next->next = xmalloc (sizeof *spec); - - memcpy (next, spec, sizeof *spec); - next->next = NULL; -} -/* Field parsing. Mostly stolen from data-list.c. */ - -/* Used for chaining together fortran-like format specifiers. */ -struct fmt_list -{ - struct fmt_list *next; - int count; - struct fmt_spec f; - struct fmt_list *down; -}; - -/* Used as "local" variables among the fixed-format parsing funcs. If - it were guaranteed that PSPP were going to be compiled by gcc, - I'd make all these functions a single set of nested functions. */ -static struct - { - struct variable **v; /* variable list */ - size_t nv; /* number of variables in list */ - size_t cv; /* number of variables from list used up so far - by the FORTRAN-like format specifiers */ - - int recno; /* current 1-based record number */ - int sc; /* 1-based starting column for next variable */ - - struct prt_out_spec spec; /* next format spec to append to list */ - int fc, lc; /* first, last 1-based column number of current - var */ - - int level; /* recursion level for FORTRAN-like format - specifiers */ - } -fx; - -static int fixed_parse_compatible (void); -static struct fmt_list *fixed_parse_fortran (void); - -static int parse_string_argument (void); -static int parse_variable_argument (void); +static bool parse_string_argument (struct print_trns *, + int record, int *column); +static bool parse_variable_argument (struct print_trns *, + struct pool *tmp_pool, + int *record, int *column, + enum which_formats); /* Parses all the variable and string specifications on a single PRINT, PRINT EJECT, or WRITE command into the prt structure. Returns success. */ -static int -parse_specs (void) +static bool +parse_specs (struct pool *tmp_pool, struct print_trns *trns, + enum which_formats which_formats) { - /* Return code from called function. */ - int code; - - fx.recno = 1; - fx.sc = 1; + int record = 0; + int column = 1; while (token != '.') { - while (lex_match ('/')) - { - int prev_recno = fx.recno; - - fx.recno++; - if (lex_is_number ()) - { - if (!lex_force_int ()) - return 0; - if (lex_integer () < fx.recno) - { - msg (SE, _("The record number specified, %ld, is " - "before the previous record, %d. Data " - "fields must be listed in order of " - "increasing record number."), - lex_integer (), fx.recno - 1); - return 0; - } - fx.recno = lex_integer (); - lex_get (); - } - - fx.spec.type = PRT_NEWLINE; - while (prev_recno++ < fx.recno) - append_var_spec (&fx.spec); - - fx.sc = 1; - } + bool ok; + + if (!parse_record_placement (&record, &column)) + return false; if (token == T_STRING) - code = parse_string_argument (); + ok = parse_string_argument (trns, record, &column); else - code = parse_variable_argument (); - if (!code) + ok = parse_variable_argument (trns, tmp_pool, &record, &column, + which_formats); + if (!ok) return 0; } - fx.spec.type = PRT_NEWLINE; - append_var_spec (&fx.spec); - if (!nrec) - nrec = fx.recno; - else if (fx.recno > nrec) - { - msg (SE, _("Variables are specified on records that " - "should not exist according to RECORDS subcommand.")); - return 0; - } - - if (token != '.') - { - lex_error (_("expecting end of command")); - return 0; - } - - return 1; + if (trns->record_cnt != 0 && trns->record_cnt != record) + msg (SW, _("Output calls for %d records but %d specified on RECORDS " + "subcommand."), + record, trns->record_cnt); + trns->record_cnt = record; + + return true; } /* Parses a string argument to the PRINT commands. Returns success. */ -static int -parse_string_argument (void) +static bool +parse_string_argument (struct print_trns *trns, int record, int *column) { - fx.spec.type = PRT_CONST; - fx.spec.fc = fx.sc - 1; - fx.spec.u.c = ds_xstrdup (&tokstr); + struct prt_out_spec *spec = pool_alloc (trns->pool, sizeof *spec); + spec->type = PRT_LITERAL; + spec->record = record; + spec->first_column = *column; + ds_init_string (&spec->string, &tokstr); + ds_register_pool (&spec->string, trns->pool); lex_get (); /* Parse the included column range. */ if (lex_is_number ()) { - /* Width of column range in characters. */ - int c_len; + int first_column, last_column; + bool range_specified; - /* Width of constant string in characters. */ - int s_len; + if (!parse_column_range (&first_column, &last_column, &range_specified)) + return false; - /* 1-based index of last column in range. */ - int lc; - - if (!lex_is_integer () || lex_integer () <= 0) - { - msg (SE, _("%g is not a valid column location."), tokval); - goto fail; - } - fx.spec.fc = lex_integer () - 1; - - lex_get (); - lex_negative_to_dash (); - if (lex_match ('-')) - { - if (!lex_is_integer ()) - { - msg (SE, _("Column location expected following `%d-'."), - fx.spec.fc + 1); - goto fail; - } - if (lex_integer () <= 0) - { - msg (SE, _("%g is not a valid column location."), tokval); - goto fail; - } - if (lex_integer () < fx.spec.fc + 1) - { - msg (SE, _("%d-%ld is not a valid column range. The second " - "column must be greater than or equal to the first."), - fx.spec.fc + 1, lex_integer ()); - goto fail; - } - lc = lex_integer () - 1; - - lex_get (); - } - else - /* If only a starting location is specified then the field is - the width of the provided string. */ - lc = fx.spec.fc + strlen (fx.spec.u.c) - 1; - - /* Apply the range. */ - c_len = lc - fx.spec.fc + 1; - s_len = strlen (fx.spec.u.c); - if (s_len > c_len) - fx.spec.u.c[c_len] = 0; - else if (s_len < c_len) - { - fx.spec.u.c = xrealloc (fx.spec.u.c, c_len + 1); - memset (&fx.spec.u.c[s_len], ' ', c_len - s_len); - fx.spec.u.c[c_len] = 0; - } - - fx.sc = lc + 1; + spec->first_column = first_column; + if (range_specified) + ds_set_length (&spec->string, last_column - first_column + 1, ' '); } - else - /* If nothing is provided then the field is the width of the - provided string. */ - fx.sc += strlen (fx.spec.u.c); - - append_var_spec (&fx.spec); - return 1; + *column = spec->first_column + ds_length (&spec->string); -fail: - free (fx.spec.u.c); - return 0; + ll_push_tail (&trns->specs, &spec->ll); + return true; } /* Parses a variable argument to the PRINT commands by passing it off to fixed_parse_compatible() or fixed_parse_fortran() as appropriate. Returns success. */ -static int -parse_variable_argument (void) +static bool +parse_variable_argument (struct print_trns *trns, struct pool *tmp_pool, + int *record, int *column, + enum which_formats which_formats) { - if (!parse_variables (default_dict, &fx.v, &fx.nv, PV_DUPLICATE)) - return 0; + struct variable **vars; + size_t var_cnt, var_idx; + struct fmt_spec *formats, *f; + size_t format_cnt; + bool add_space; + + if (!parse_variables_pool (tmp_pool, + default_dict, &vars, &var_cnt, PV_DUPLICATE)) + return false; - if (lex_is_number ()) + if (lex_is_number () || token == '(') { - if (!fixed_parse_compatible ()) - goto fail; - } - else if (token == '(') - { - fx.level = 0; - fx.cv = 0; - if (!fixed_parse_fortran ()) - goto fail; + if (!parse_var_placements (tmp_pool, var_cnt, &formats, &format_cnt)) + return false; + add_space = false; } else { - /* User wants dictionary format specifiers. */ size_t i; lex_match ('*'); - for (i = 0; i < fx.nv; i++) - { - /* Variable. */ - fx.spec.type = PRT_VAR; - fx.spec.fc = fx.sc - 1; - fx.spec.u.v.v = fx.v[i]; - fx.spec.u.v.f = fx.v[i]->print; - append_var_spec (&fx.spec); - fx.sc += fx.v[i]->print.w; - - /* Space. */ - fx.spec.type = PRT_SPACE; - fx.spec.fc = fx.sc - 1; - append_var_spec (&fx.spec); - fx.sc++; - } - } - - free (fx.v); - return 1; - -fail: - free (fx.v); - return 0; -} - -/* Verifies that FORMAT doesn't need a variable wider than WIDTH. - Returns true iff that is the case. */ -static bool -check_string_width (const struct fmt_spec *format, const struct variable *v) -{ - if (get_format_var_width (format) > v->width) - { - msg (SE, _("Variable %s has width %d so it cannot be output " - "as format %s."), - v->name, v->width, fmt_to_string (format)); - return false; + + formats = pool_nmalloc (tmp_pool, var_cnt, sizeof *formats); + format_cnt = var_cnt; + for (i = 0; i < var_cnt; i++) + { + struct variable *v = vars[i]; + formats[i] = which_formats == PRINT ? v->print : v->write; + } + add_space = true; } - return true; -} -/* Parses a column specification for parse_specs(). */ -static int -fixed_parse_compatible (void) -{ - int individual_var_width; - int type; - size_t i; - - type = fx.v[0]->type; - for (i = 1; i < fx.nv; i++) - if (type != fx.v[i]->type) + var_idx = 0; + for (f = formats; f < &formats[format_cnt]; f++) + if (!execute_placement_format (f, record, column)) { - msg (SE, _("%s is not of the same type as %s. To specify " - "variables of different types in the same variable " - "list, use a FORTRAN-like format specifier."), - fx.v[i]->name, fx.v[0]->name); - return 0; - } - - if (!lex_force_int ()) - return 0; - fx.fc = lex_integer () - 1; - if (fx.fc < 0) - { - msg (SE, _("Column positions for fields must be positive.")); - return 0; - } - lex_get (); - - lex_negative_to_dash (); - if (lex_match ('-')) - { - if (!lex_force_int ()) - return 0; - fx.lc = lex_integer () - 1; - if (fx.lc < 0) - { - msg (SE, _("Column positions for fields must be positive.")); - return 0; - } - else if (fx.lc < fx.fc) - { - msg (SE, _("The ending column for a field must not " - "be less than the starting column.")); - return 0; - } - lex_get (); - } - else - fx.lc = fx.fc; - - fx.spec.u.v.f.w = fx.lc - fx.fc + 1; - if (lex_match ('(')) - { - struct fmt_desc *fdp; - - if (token == T_ID) - { - const char *cp; - - fx.spec.u.v.f.type = parse_format_specifier_name (&cp, 0); - if (fx.spec.u.v.f.type == -1) - return 0; - if (*cp) - { - msg (SE, _("A format specifier on this line " - "has extra characters on the end.")); - return 0; - } - lex_get (); - lex_match (','); - } - else - fx.spec.u.v.f.type = FMT_F; + struct variable *var; + struct prt_out_spec *spec; - if (lex_is_number ()) - { - if (!lex_force_int ()) - return 0; - if (lex_integer () < 1) - { - msg (SE, _("The value for number of decimal places " - "must be at least 1.")); - return 0; - } - fx.spec.u.v.f.d = lex_integer (); - lex_get (); - } - else - fx.spec.u.v.f.d = 0; - - fdp = &formats[fx.spec.u.v.f.type]; - if (fdp->n_args < 2 && fx.spec.u.v.f.d) - { - msg (SE, _("Input format %s doesn't accept decimal places."), - fdp->name); - return 0; - } - if (fx.spec.u.v.f.d > 16) - fx.spec.u.v.f.d = 16; - - if (!lex_force_match (')')) - return 0; - } - else - { - fx.spec.u.v.f.type = FMT_F; - fx.spec.u.v.f.d = 0; - } - - fx.sc = fx.lc + 1; - - if ((fx.lc - fx.fc + 1) % fx.nv) - { - msg (SE, _("The %d columns %d-%d can't be evenly divided into %u " - "fields."), - fx.lc - fx.fc + 1, fx.fc + 1, fx.lc + 1, (unsigned) fx.nv); - return 0; - } - - individual_var_width = (fx.lc - fx.fc + 1) / fx.nv; - fx.spec.u.v.f.w = individual_var_width; - if (!check_output_specifier (&fx.spec.u.v.f, true) - || !check_specifier_type (&fx.spec.u.v.f, type, true)) - return 0; - if (type == ALPHA) - { - for (i = 0; i < fx.nv; i++) - if (!check_string_width (&fx.spec.u.v.f, fx.v[i])) + var = vars[var_idx++]; + if (!check_specifier_width (f, var->width, true)) return false; - } - - fx.spec.type = PRT_VAR; - for (i = 0; i < fx.nv; i++) - { - fx.spec.fc = fx.fc + individual_var_width * i; - fx.spec.u.v.v = fx.v[i]; - append_var_spec (&fx.spec); - } - return 1; -} -/* Destroy a format list and, optionally, all its sublists. */ -static void -destroy_fmt_list (struct fmt_list *f, int recurse) -{ - struct fmt_list *next; - - for (; f; f = next) - { - next = f->next; - if (recurse && f->f.type == FMT_DESCEND) - destroy_fmt_list (f->down, 1); - free (f); - } -} + spec = pool_alloc (trns->pool, sizeof *spec); + spec->type = PRT_VAR; + spec->record = *record; + spec->first_column = *column; + spec->var = var; + spec->format = *f; + spec->add_space = add_space; + ll_push_tail (&trns->specs, &spec->ll); -/* Recursively puts the format list F (which represents a set of - FORTRAN-like format specifications, like 4(F10,2X)) into the - structure prt. */ -static int -dump_fmt_list (struct fmt_list *f) -{ - int i; - - for (; f; f = f->next) - if (f->f.type == FMT_X) - fx.sc += f->count; - else if (f->f.type == FMT_T) - fx.sc = f->f.w; - else if (f->f.type == FMT_NEWREC) - { - fx.recno += f->count; - fx.sc = 1; - fx.spec.type = PRT_NEWLINE; - for (i = 0; i < f->count; i++) - append_var_spec (&fx.spec); + *column += f->w + add_space; } - else - for (i = 0; i < f->count; i++) - if (f->f.type == FMT_DESCEND) - { - if (!dump_fmt_list (f->down)) - return 0; - } - else - { - struct variable *v; - - if (fx.cv >= fx.nv) - { - msg (SE, _("The number of format " - "specifications exceeds the number of variable " - "names given.")); - return 0; - } - - v = fx.v[fx.cv++]; - if (!check_output_specifier (&f->f, true) - || !check_specifier_type (&f->f, v->type, true) - || !check_string_width (&f->f, v)) - return false; - - fx.spec.type = PRT_VAR; - fx.spec.u.v.v = v; - fx.spec.u.v.f = f->f; - fx.spec.fc = fx.sc - 1; - append_var_spec (&fx.spec); - - fx.sc += f->f.w; - } - return 1; -} - -/* Recursively parses a list of FORTRAN-like format specifiers. Calls - itself to parse nested levels of parentheses. Returns to its - original caller NULL, to indicate error, non-NULL, but nothing - useful, to indicate success (it returns a free()'d block). */ -static struct fmt_list * -fixed_parse_fortran (void) -{ - struct fmt_list *head = NULL; - struct fmt_list *fl = NULL; - - lex_get (); /* skip opening parenthesis */ - while (token != ')') - { - if (fl) - fl = fl->next = xmalloc (sizeof *fl); - else - head = fl = xmalloc (sizeof *fl); - - if (lex_is_number ()) - { - if (!lex_is_integer ()) - goto fail; - fl->count = lex_integer (); - lex_get (); - } - else - fl->count = 1; - - if (token == '(') - { - fl->f.type = FMT_DESCEND; - fx.level++; - fl->down = fixed_parse_fortran (); - fx.level--; - if (!fl->down) - goto fail; - } - else if (lex_match ('/')) - fl->f.type = FMT_NEWREC; - else if (!parse_format_specifier (&fl->f, FMTP_ALLOW_XT) - || !check_output_specifier (&fl->f, 1)) - goto fail; + assert (var_idx == var_cnt); - lex_match (','); - } - fl->next = NULL; - lex_get (); - - if (fx.level) - return head; - - fl->next = NULL; - dump_fmt_list (head); - destroy_fmt_list (head, 1); - if (fx.cv < fx.nv) - { - msg (SE, _("There aren't enough format specifications " - "to match the number of variable names given.")); - goto fail; - } - return head; - -fail: - fl->next = NULL; - destroy_fmt_list (head, 0); - - return NULL; + return true; } /* Prints the table produced by the TABLE subcommand to the listing file. */ static void -dump_table (const struct file_handle *fh) +dump_table (struct print_trns *trns, const struct file_handle *fh) { struct prt_out_spec *spec; struct tab_table *t; - int recno; - int nspec; + int spec_cnt; + int row; - for (nspec = 0, spec = prt.spec; spec; spec = spec->next) - if (spec->type == PRT_CONST || spec->type == PRT_VAR) - nspec++; - t = tab_create (4, nspec + 1, 0); + spec_cnt = ll_count (&trns->specs); + t = tab_create (4, spec_cnt + 1, 0); tab_columns (t, TAB_COL_DOWN, 1); - tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, nspec); + tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, spec_cnt); tab_hline (t, TAL_2, 0, 3, 1); tab_headers (t, 0, 0, 1, 0); tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable")); @@ -806,304 +370,115 @@ dump_table (const struct file_handle *fh) tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns")); tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format")); tab_dim (t, tab_natural_dimensions); - for (nspec = recno = 0, spec = prt.spec; spec; spec = spec->next) - switch (spec->type) - { - case PRT_NEWLINE: - recno++; - break; - case PRT_CONST: - { - int len = strlen (spec->u.c); - nspec++; - tab_text (t, 0, nspec, TAB_LEFT | TAB_FIX | TAT_PRINTF, - "\"%s\"", spec->u.c); - tab_text (t, 1, nspec, TAT_PRINTF, "%d", recno + 1); - tab_text (t, 2, nspec, TAT_PRINTF, "%3d-%3d", - spec->fc + 1, spec->fc + len); - tab_text (t, 3, nspec, TAB_LEFT | TAB_FIX | TAT_PRINTF, - "A%d", len); - break; - } - case PRT_VAR: - { - nspec++; - tab_text (t, 0, nspec, TAB_LEFT, spec->u.v.v->name); - tab_text (t, 1, nspec, TAT_PRINTF, "%d", recno + 1); - tab_text (t, 2, nspec, TAT_PRINTF, "%3d-%3d", - spec->fc + 1, spec->fc + spec->u.v.f.w); - tab_text (t, 3, nspec, TAB_LEFT | TAB_FIX, - fmt_to_string (&spec->u.v.f)); - break; + row = 1; + ll_for_each (spec, struct prt_out_spec, ll, &trns->specs) + { + int width; + switch (spec->type) + { + case PRT_LITERAL: + tab_text (t, 0, row, TAB_LEFT | TAB_FIX | TAT_PRINTF, "\"%.*s\"", + (int) ds_length (&spec->string), ds_data (&spec->string)); + width = ds_length (&spec->string); + break; + case PRT_VAR: + tab_text (t, 0, row, TAB_LEFT, spec->var->name); + tab_text (t, 3, row, TAB_LEFT | TAB_FIX, + fmt_to_string (&spec->format)); + width = spec->format.w; + break; + default: + NOT_REACHED (); } - case PRT_SPACE: - break; - case PRT_ERROR: - NOT_REACHED (); - } + tab_text (t, 1, row, TAT_PRINTF, "%d", spec->record); + tab_text (t, 2, row, TAT_PRINTF, "%3d-%3d", + spec->first_column, spec->first_column + width - 1); + row++; + } if (fh != NULL) tab_title (t, ngettext ("Writing %d record to %s.", - "Writing %d records to %s.", recno), - recno, fh_get_name (fh)); + "Writing %d records to %s.", trns->record_cnt), + trns->record_cnt, fh_get_name (fh)); else tab_title (t, ngettext ("Writing %d record.", - "Writing %d records.", recno), recno); + "Writing %d records.", trns->record_cnt), + trns->record_cnt); tab_submit (t); } - -/* Calculates the maximum possible line width and allocates a buffer - big enough to contain it */ -static void -alloc_line (void) -{ - /* Cumulative maximum line width (excluding null terminator) so far. */ - int w = 0; - - /* Width required by current this prt_out_spec. */ - int pot_w; /* Potential w. */ - - /* Iterator. */ - struct prt_out_spec *i; - - for (i = prt.spec; i; i = i->next) - { - switch (i->type) - { - case PRT_NEWLINE: - pot_w = 0; - break; - case PRT_CONST: - pot_w = i->fc + strlen (i->u.c); - break; - case PRT_VAR: - pot_w = i->fc + i->u.v.f.w; - break; - case PRT_SPACE: - pot_w = i->fc + 1; - break; - case PRT_ERROR: - default: - NOT_REACHED (); - } - if (pot_w > w) - w = pot_w; - } - prt.max_width = w + 2; - prt.line = xmalloc (prt.max_width); -} /* Transformation. */ +static void flush_records (struct print_trns *, + int target_record, int *record); + /* Performs the transformation inside print_trns T on case C. */ static int print_trns_proc (void *trns_, struct ccase *c, int case_num UNUSED) { - /* Transformation. */ - struct print_trns *t = trns_; - - /* Iterator. */ - struct prt_out_spec *i; - - /* Line buffer. */ - char *buf = t->line; - - /* Length of the line in buf. */ - int len = 0; - memset (buf, ' ', t->max_width); + struct print_trns *trns = trns_; + struct prt_out_spec *spec; + int record; - if (t->options & PRT_EJECT) + if (trns->eject) som_eject_page (); - /* Note that a field written to a place where a field has - already been written truncates the record. `PRINT /A B - (T10,F8,T1,F8).' only outputs B. */ - for (i = t->spec; i; i = i->next) - switch (i->type) - { - case PRT_NEWLINE: - if (t->writer == NULL) - { - buf[len] = 0; - tab_output_text (TAB_FIX | TAT_NOWRAP, buf); - } - else - { - if ((t->options & PRT_CMD_MASK) == PRT_PRINT - || !(t->options & PRT_BINARY)) - buf[len++] = '\n'; - - dfm_put_record (t->writer, buf, len); - } - - memset (buf, ' ', t->max_width); - len = 0; - break; - - case PRT_CONST: - /* FIXME: Should be revised to keep track of the string's - length outside the loop, probably in i->u.c[0]. */ - memcpy (&buf[i->fc], i->u.c, strlen (i->u.c)); - len = i->fc + strlen (i->u.c); - break; - - case PRT_VAR: - data_out (&buf[i->fc], &i->u.v.f, case_data (c, i->u.v.v->fv)); - len = i->fc + i->u.v.f.w; - break; - - case PRT_SPACE: - /* PRT_SPACE always immediately follows PRT_VAR. */ - buf[len++] = ' '; - break; - - case PRT_ERROR: - NOT_REACHED (); - } - - if (t->writer != NULL && dfm_write_error (t->writer)) - return TRNS_ERROR; - return TRNS_CONTINUE; -} - -/* Frees all the data inside print_trns PRT. Does not free PRT. */ -static bool -print_trns_free (void *prt_) -{ - struct print_trns *prt = prt_; - struct prt_out_spec *i, *n; - bool ok = true; - - for (i = prt->spec; i; i = n) + record = 1; + ds_clear (&trns->line); + ll_for_each (spec, struct prt_out_spec, ll, &trns->specs) { - switch (i->type) - { - case PRT_CONST: - free (i->u.c); - /* fall through */ - case PRT_NEWLINE: - case PRT_VAR: - case PRT_SPACE: - /* nothing to do */ - break; - case PRT_ERROR: - NOT_REACHED (); - } - n = i->next; - free (i); - } - if (prt->writer != NULL) - ok = dfm_close_writer (prt->writer); - free (prt->line); - return ok; -} - -/* PRINT SPACE. */ - -/* PRINT SPACE transformation. */ -struct print_space_trns -{ - struct dfm_writer *writer; /* Output data file. */ - struct expression *e; /* Number of lines; NULL=1. */ -} -print_space_trns; - -static trns_proc_func print_space_trns_proc; -static trns_free_func print_space_trns_free; - -int -cmd_print_space (void) -{ - struct print_space_trns *t; - struct file_handle *fh; - struct expression *e; - struct dfm_writer *writer; - - if (lex_match_id ("OUTFILE")) - { - lex_match ('='); - - fh = fh_parse (FH_REF_FILE); - if (fh == NULL) - return CMD_FAILURE; - lex_get (); - } - else - fh = NULL; - - if (token != '.') - { - e = expr_parse (default_dict, EXPR_NUMBER); - if (token != '.') - { - expr_free (e); - lex_error (_("expecting end of command")); - return CMD_FAILURE; - } - } - else - e = NULL; - - if (fh != NULL) - { - writer = dfm_open_writer (fh); - if (writer == NULL) + flush_records (trns, spec->record, &record); + + ds_set_length (&trns->line, spec->first_column - 1, ' '); + if (spec->type == PRT_VAR) { - expr_free (e); - return CMD_FAILURE; - } + data_out (ds_put_uninit (&trns->line, spec->format.w), + &spec->format, case_data (c, spec->var->fv)); + if (spec->add_space) + ds_put_char (&trns->line, ' '); + } + else + ds_put_substring (&trns->line, ds_ss (&spec->string)); } - else - writer = NULL; + flush_records (trns, trns->record_cnt + 1, &record); - t = xmalloc (sizeof *t); - t->writer = writer; - t->e = e; - - add_transformation (print_space_trns_proc, print_space_trns_free, t); - return CMD_SUCCESS; + if (trns->writer != NULL && dfm_write_error (trns->writer)) + return TRNS_ERROR; + return TRNS_CONTINUE; } -/* Executes a PRINT SPACE transformation. */ -static int -print_space_trns_proc (void *t_, struct ccase *c, - int case_num UNUSED) +static void +flush_records (struct print_trns *trns, int target_record, int *record) { - struct print_space_trns *t = t_; - int n; - - n = 1; - if (t->e) + while (target_record > *record) { - double f = expr_evaluate_num (t->e, c, case_num); - if (f == SYSMIS) - msg (SW, _("The expression on PRINT SPACE evaluated to the " - "system-missing value.")); - else if (f < 0 || f > INT_MAX) - msg (SW, _("The expression on PRINT SPACE evaluated to %g."), f); + if (trns->writer == NULL) + tab_output_text (TAB_FIX | TAT_NOWRAP, ds_cstr (&trns->line)); else - n = f; - } + { + if (!trns->omit_new_lines) + ds_put_char (&trns->line, '\n'); - while (n--) - if (t->writer == NULL) - som_blank_line (); - else - dfm_put_record (t->writer, "\n", 1); + dfm_put_record (trns->writer, + ds_data (&trns->line), ds_length (&trns->line)); + } + ds_clear (&trns->line); - if (t->writer != NULL && dfm_write_error (t->writer)) - return TRNS_ERROR; - return TRNS_CONTINUE; + (*record)++; + } } -/* Frees a PRINT SPACE transformation. - Returns true if successful, false if an I/O error occurred. */ +/* Frees TRNS. */ static bool -print_space_trns_free (void *trns_) +print_trns_free (void *trns_) { - struct print_space_trns *trns = trns_; - bool ok = dfm_close_writer (trns->writer); - expr_free (trns->e); - free (trns); + struct print_trns *trns = trns_; + bool ok = true; + + if (trns->writer != NULL) + ok = dfm_close_writer (trns->writer); + pool_destroy (trns->pool); + return ok; } + diff --git a/src/language/dictionary/formats.c b/src/language/dictionary/formats.c index 7b01b2b8..c5676234 100644 --- a/src/language/dictionary/formats.c +++ b/src/language/dictionary/formats.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -91,7 +92,7 @@ internal_cmd_formats (int which) msg (SE, _("`(' expected after variable list")); goto fail; } - if (!parse_format_specifier (&f, 0) + if (!parse_format_specifier (&f) || !check_output_specifier (&f, true) || !check_specifier_type (&f, NUMERIC, true)) goto fail; diff --git a/src/language/dictionary/numeric.c b/src/language/dictionary/numeric.c index 82b58c02..71de3d87 100644 --- a/src/language/dictionary/numeric.c +++ b/src/language/dictionary/numeric.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -56,7 +57,7 @@ cmd_numeric (void) /* Get the optional format specification. */ if (lex_match ('(')) { - if (!parse_format_specifier (&f, 0)) + if (!parse_format_specifier (&f)) goto fail; if (formats[f.type].cat & FCAT_STRING) { @@ -126,8 +127,7 @@ cmd_string (void) if (!parse_DATA_LIST_vars (&v, &nv, PV_NONE)) return CMD_FAILURE; - if (!lex_force_match ('(') - || !parse_format_specifier (&f, 0)) + if (!lex_force_match ('(') || !parse_format_specifier (&f)) goto fail; if (!(formats[f.type].cat & FCAT_STRING)) { diff --git a/src/language/expressions/parse.c b/src/language/expressions/parse.c index bb567d2c..66d31d59 100644 --- a/src/language/expressions/parse.c +++ b/src/language/expressions/parse.c @@ -29,6 +29,7 @@ #include #include #include "helpers.h" +#include #include #include #include @@ -812,7 +813,13 @@ parse_primary (struct expression *e) { /* Try to parse it as a format specifier. */ struct fmt_spec fmt; - if (parse_format_specifier (&fmt, FMTP_SUPPRESS_ERRORS)) + bool ok; + + msg_disable (); + ok = parse_format_specifier (&fmt); + msg_enable (); + + if (ok) return expr_allocate_format (e, &fmt); /* All attempts failed. */ diff --git a/src/language/lexer/ChangeLog b/src/language/lexer/ChangeLog index 2bc5b87e..90eb0e21 100644 --- a/src/language/lexer/ChangeLog +++ b/src/language/lexer/ChangeLog @@ -1,3 +1,23 @@ +Sun Jul 16 21:03:34 2006 Ben Pfaff + + * format-parser.h: New file. Moved prototypes of format-parser.c + functions here, from lexer.h. + + * format-parser.c: (parse_format_specifier_name) Rewrote and + changed semantics. + (parse_abstract_format_specifier) New function. + (parse_format_specifier) Rewrote in terms of + parse_abstract_format_specifier. Removed "options" parameter, so + callers had to be updated. Callers that didn't want messages + emitted were changed to use the new msg_disable/msg_enable + functions. + + * variables-parser.c: (parse_variables_pool) New function. + (register_vars_pool) New function. + (parse_DATA_LIST_vars_pool) New function. + (parse_mixed_vars_pool) Use register_vars_pool. Assert that + PV_APPEND is not in the options. + Sat Jul 1 17:40:38 2006 Ben Pfaff Fix bug #11612, "q2c documentation does not agree with code". diff --git a/src/language/lexer/automake.mk b/src/language/lexer/automake.mk index c2a32d77..780eae59 100644 --- a/src/language/lexer/automake.mk +++ b/src/language/lexer/automake.mk @@ -8,6 +8,7 @@ src_language_lexer_liblexer_a_SOURCES = \ src/language/lexer/subcommand-list.c \ src/language/lexer/subcommand-list.h \ src/language/lexer/format-parser.c \ + src/language/lexer/format-parser.h \ src/language/lexer/range-parser.c \ src/language/lexer/range-parser.h \ src/language/lexer/variable-parser.c \ diff --git a/src/language/lexer/format-parser.c b/src/language/lexer/format-parser.c index aa4729b0..2541e5b5 100644 --- a/src/language/lexer/format-parser.c +++ b/src/language/lexer/format-parser.c @@ -18,145 +18,117 @@ 02110-1301, USA. */ #include -#include + +#include + #include -#include #include -#include + #include "lexer.h" +#include +#include +#include #include #include -#include + +#include "size_max.h" #include "gettext.h" #define _(msgid) gettext (msgid) - -/* Parses the alphabetic prefix of the current token as a format - specifier name. Returns the corresponding format specifier - type if successful, or -1 on failure. If ALLOW_XT is zero, - then X and T format specifiers are not allowed. If CP is - nonzero, then *CP is set to the first non-alphabetic character - in the current token on success or to a null pointer on - failure. */ -int -parse_format_specifier_name (const char **cp, enum fmt_parse_flags flags) +/* Parses a token taking the form of a format specifier and + returns true only if successful. Emits an error message on + failure. Stores a null-terminated string representing the + format type in TYPE, and the width and number of decimal + places in *WIDTH and *DECIMALS. + + TYPE is not checked as to whether it is really the name of a + format. Both width and decimals are considered optional. If + missing, *WIDTH or *DECIMALS or both will be set to 0. */ +bool +parse_abstract_format_specifier (char type[FMT_TYPE_LEN_MAX + 1], + int *width, int *decimals) { - char *sp, *ep; - int idx; + struct substring s; + struct substring type_ss, width_ss, decimals_ss; + bool has_decimals; - sp = ep = ds_cstr (&tokstr); - while (isalpha ((unsigned char) *ep)) - ep++; + if (token != T_ID) + goto error; - if (sp != ep) - { - /* Find format. */ - for (idx = 0; idx < FMT_NUMBER_OF_FORMATS; idx++) - if (strlen (formats[idx].name) == ep - sp - && !buf_compare_case (formats[idx].name, sp, ep - sp)) - break; - - /* Check format. */ - if (idx < FMT_NUMBER_OF_FORMATS) - { - if (!(flags & FMTP_ALLOW_XT) && (idx == FMT_T || idx == FMT_X)) - { - if (!(flags & FMTP_SUPPRESS_ERRORS)) - msg (SE, _("X and T format specifiers not allowed here.")); - idx = -1; - } - } - else - { - /* No match. */ - if (!(flags & FMTP_SUPPRESS_ERRORS)) - msg (SE, _("%.*s is not a valid data format."), - (int) (ep - sp), ds_cstr (&tokstr)); - idx = -1; - } - } - else - { - lex_error ("expecting data format"); - idx = -1; - } - - if (cp != NULL) + /* Extract pieces. */ + s = ds_ss (&tokstr); + ss_get_chars (&s, ss_span (s, ss_cstr (CC_LETTERS)), &type_ss); + ss_get_chars (&s, ss_span (s, ss_cstr (CC_DIGITS)), &width_ss); + if (ss_match_char (&s, '.')) { - if (idx != -1) - *cp = ep; - else - *cp = NULL; + has_decimals = true; + ss_get_chars (&s, ss_span (s, ss_cstr (CC_DIGITS)), &decimals_ss); } + else + has_decimals = false; + + /* Check pieces. */ + if (ss_is_empty (type_ss) || ss_length (type_ss) > FMT_TYPE_LEN_MAX) + goto error; + if (has_decimals && ss_is_empty (decimals_ss)) + goto error; + if (!ss_is_empty (s)) + goto error; + + /* Return pieces. + These uses of strtol are valid only because we know that + their substrings are followed by non-digit characters. */ + str_copy_buf_trunc (type, FMT_TYPE_LEN_MAX + 1, + ss_data (type_ss), ss_length (type_ss)); + *width = strtol (ss_data (width_ss), NULL, 10); + *decimals = has_decimals ? strtol (ss_data (decimals_ss), NULL, 10) : 0; - return idx; -} + lex_get (); + return true; + error: + lex_error (_("expecting valid format specifier")); + return false; +} /* Parses a format specifier from the token stream and returns - nonzero only if successful. Emits an error message on - failure. Allows X and T format specifiers only if ALLOW_XT is - nonzero. The caller should call check_input_specifier() or + true only if successful. Emits an error message on + failure. The caller should call check_input_specifier() or check_output_specifier() on the parsed format as necessary. */ -int -parse_format_specifier (struct fmt_spec *input, enum fmt_parse_flags flags) +bool +parse_format_specifier (struct fmt_spec *format) { - struct fmt_spec spec; - struct fmt_desc *f; - const char *cp; - char *cp2; - int type, w, d; + char type[FMT_TYPE_LEN_MAX + 1]; - if (token != T_ID) - { - if (!(flags & FMTP_SUPPRESS_ERRORS)) - msg (SE, _("Format specifier expected.")); - return 0; - } - type = parse_format_specifier_name (&cp, flags); - if (type == -1) - return 0; - f = &formats[type]; + if (!parse_abstract_format_specifier (type, &format->w, &format->d)) + return false; - w = strtol (cp, &cp2, 10); - if (cp2 == cp && type != FMT_X) - { - if (!(flags & FMTP_SUPPRESS_ERRORS)) - msg (SE, _("Data format %s does not specify a width."), - ds_cstr (&tokstr)); - return 0; - } - if ( w > MAX_STRING ) + if (!fmt_type_from_string (type, &format->type)) { - msg (SE, _("String variable width may not exceed %d"), MAX_STRING); - return 0; + msg (SE, _("Unknown format type \"%s\"."), type); + return false; } + + return true; +} - cp = cp2; - if (f->n_args > 1 && *cp == '.') +/* Parses a token containing just the name of a format type and + returns true if successful. */ +bool +parse_format_specifier_name (int *type) +{ + if (token != T_ID) { - cp++; - d = strtol (cp, &cp2, 10); - cp = cp2; + lex_error (_("expecting format type")); + return false; } - else - d = 0; - - if (*cp) + if (!fmt_type_from_string (ds_cstr (&tokstr), type)) { - if (!(flags & FMTP_SUPPRESS_ERRORS)) - msg (SE, _("Data format %s is not valid."), ds_cstr (&tokstr)); - return 0; + msg (SE, _("Unknown format type \"%s\"."), ds_cstr (&tokstr)); + return false; } lex_get (); - - spec.type = type; - spec.w = w; - spec.d = d; - *input = spec; - - return 1; + return true; } - diff --git a/src/language/lexer/format-parser.h b/src/language/lexer/format-parser.h new file mode 100644 index 00000000..981a5485 --- /dev/null +++ b/src/language/lexer/format-parser.h @@ -0,0 +1,34 @@ +/* PSPP - computes sample statistics. + Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. + Written by Ben Pfaff . + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +#ifndef LANGUAGE_LEXER_FORMAT_PARSER_H +#define LANGUAGE_LEXER_FORMAT_PARSER_H 1 + +#include + +#include + +struct fmt_spec; + +bool parse_abstract_format_specifier (char type[FMT_TYPE_LEN_MAX + 1], + int *width, int *decimals); +bool parse_format_specifier (struct fmt_spec *); +bool parse_format_specifier_name (int *type); + +#endif /* language/lexer/format-parser.h. */ diff --git a/src/language/lexer/lexer.h b/src/language/lexer/lexer.h index 007c1362..36d79d3a 100644 --- a/src/language/lexer/lexer.h +++ b/src/language/lexer/lexer.h @@ -88,7 +88,4 @@ void lex_negative_to_dash (void); void lex_reset_eof (void); void lex_skip_comment (void); -int parse_format_specifier (struct fmt_spec *input, enum fmt_parse_flags); -int parse_format_specifier_name (const char **cp, enum fmt_parse_flags); - #endif /* !lexer_h */ diff --git a/src/language/lexer/variable-parser.c b/src/language/lexer/variable-parser.c index 904ebe75..98b70ea2 100644 --- a/src/language/lexer/variable-parser.c +++ b/src/language/lexer/variable-parser.c @@ -120,6 +120,29 @@ parse_variables (const struct dictionary *d, struct variable ***var, return success; } +/* Parses a set of variables from dictionary D given options + OPTS. Resulting list of variables stored in *VARS and the + number of variables into *VAR_CNT. Returns nonzero only if + successful. Same behavior as parse_variables, except that all + allocations are taken from the given POOL. */ +int +parse_variables_pool (struct pool *pool, const struct dictionary *dict, + struct variable ***vars, size_t *var_cnt, int opts) +{ + int retval; + + /* PV_APPEND is unsafe because parse_variables would free the + existing names on failure, but those names are presumably + already in the pool, which would attempt to re-free it + later. */ + assert (!(opts & PV_APPEND)); + + retval = parse_variables (dict, vars, var_cnt, opts); + if (retval) + pool_register (pool, free, *vars); + return retval; +} + /* Parses a variable name from VS. If successful, sets *IDX to the variable's index in VS, *CLASS to the variable's dictionary class, and returns nonzero. Returns zero on @@ -472,6 +495,40 @@ fail: return success; } +/* Registers each of the NAMES[0...NNAMES - 1] in POOL, as well + as NAMES itself. */ +static void +register_vars_pool (struct pool *pool, char **names, size_t nnames) +{ + size_t i; + + for (i = 0; i < nnames; i++) + pool_register (pool, free, names[i]); + pool_register (pool, free, names); +} + +/* Parses a list of variable names according to the DATA LIST + version of the TO convention. Same args as + parse_DATA_LIST_vars(), except that all allocations are taken + from the given POOL. */ +int +parse_DATA_LIST_vars_pool (struct pool *pool, + char ***names, size_t *nnames, int pv_opts) +{ + int retval; + + /* PV_APPEND is unsafe because parse_DATA_LIST_vars would free + the existing names on failure, but those names are + presumably already in the pool, which would attempt to + re-free it later. */ + assert (!(pv_opts & PV_APPEND)); + + retval = parse_DATA_LIST_vars (names, nnames, pv_opts); + if (retval) + register_vars_pool (pool, *names, *nnames); + return retval; +} + /* Parses a list of variables where some of the variables may be existing and the rest are to be created. Same args as parse_DATA_LIST_vars(). */ @@ -520,24 +577,25 @@ fail: /* Parses a list of variables where some of the variables may be existing and the rest are to be created. Same args as - parse_DATA_LIST_vars(), except that all allocations are taken + parse_mixed_vars(), except that all allocations are taken from the given POOL. */ int parse_mixed_vars_pool (struct pool *pool, char ***names, size_t *nnames, int pv_opts) { - int retval = parse_mixed_vars (names, nnames, pv_opts); - if (retval) - { - size_t i; + int retval; - for (i = 0; i < *nnames; i++) - pool_register (pool, free, (*names)[i]); - pool_register (pool, free, *names); - } + /* PV_APPEND is unsafe because parse_mixed_vars_pool would free + the existing names on failure, but those names are + presumably already in the pool, which would attempt to + re-free it later. */ + assert (!(pv_opts & PV_APPEND)); + + retval = parse_mixed_vars (names, nnames, pv_opts); + if (retval) + register_vars_pool (pool, *names, *nnames); return retval; } - /* A set of variables. */ struct var_set diff --git a/src/language/lexer/variable-parser.h b/src/language/lexer/variable-parser.h index f7520285..cacb524e 100644 --- a/src/language/lexer/variable-parser.h +++ b/src/language/lexer/variable-parser.h @@ -59,9 +59,13 @@ struct variable *parse_variable (void); struct variable *parse_dict_variable (const struct dictionary *); int parse_variables (const struct dictionary *, struct variable ***, size_t *, int opts); +int parse_variables_pool (struct pool *, const struct dictionary *, + struct variable ***, size_t *, int opts); int parse_var_set_vars (const struct var_set *, struct variable ***, size_t *, int opts); int parse_DATA_LIST_vars (char ***names, size_t *cnt, int opts); +int parse_DATA_LIST_vars_pool (struct pool *, + char ***names, size_t *cnt, int opts); int parse_mixed_vars (char ***names, size_t *cnt, int opts); int parse_mixed_vars_pool (struct pool *, char ***names, size_t *cnt, int opts); diff --git a/src/language/utilities/set.q b/src/language/utilities/set.q index 4a603de7..f44a7a93 100644 --- a/src/language/utilities/set.q +++ b/src/language/utilities/set.q @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -410,7 +411,7 @@ stc_custom_format (struct cmd_set *cmd UNUSED, void *aux UNUSED) struct fmt_spec fmt; lex_match ('='); - if (!parse_format_specifier (&fmt, 0)) + if (!parse_format_specifier (&fmt)) return 0; if ((formats[fmt.type].cat & FCAT_STRING) != 0) { diff --git a/src/libpspp/ChangeLog b/src/libpspp/ChangeLog index 5888895b..1cb917d1 100644 --- a/src/libpspp/ChangeLog +++ b/src/libpspp/ChangeLog @@ -1,3 +1,15 @@ +Sun Jul 16 21:07:35 2006 Ben Pfaff + + * message.c: (static int messages_disabled) New variable. + (msg_emit) Don't emit the message if messages are disabled. + (msg_disable) New function. + (msg_enable) New function. + + * str.c: (free_string) New function. + (ds_register_pool) New function. + (ds_unregister_pool) New function. + (ds_set_length) New function. + Mon Jul 10 17:26:58 WST 2006 John Darrington * llx.c: #included compiler.h and removed explicit preprocessor cruft. diff --git a/src/libpspp/message.c b/src/libpspp/message.c index eaad1a69..1a4601fc 100644 --- a/src/libpspp/message.c +++ b/src/libpspp/message.c @@ -21,6 +21,7 @@ #include +#include #include #include #include @@ -41,6 +42,9 @@ static void (*msg_handler) (const struct msg *); static void (*msg_location) (struct msg_locator *); +/* Disables emitting messages if positive. */ +static int messages_disabled; + /* Public functions. */ /* Writes error message in CLASS, with text FORMAT, formatted with @@ -93,16 +97,34 @@ msg_destroy(struct msg *m) free(m); } - /* Emits M as an error message. Frees allocated data in M. */ void msg_emit (struct msg *m) { msg_location (&m->where); - msg_handler (m); + if (!messages_disabled) + msg_handler (m); free (m->text); } + +/* Disables message output until the next call to msg_enable. If + this function is called multiple times, msg_enable must be + called an equal number of times before messages are actually + re-enabled. */ +void +msg_disable (void) +{ + messages_disabled++; +} + +/* Enables message output that was disabled by msg_disable. */ +void +msg_enable (void) +{ + assert (messages_disabled > 0); + messages_disabled--; +} /* Private functions. */ diff --git a/src/libpspp/message.h b/src/libpspp/message.h index 604f185d..e345ea65 100644 --- a/src/libpspp/message.h +++ b/src/libpspp/message.h @@ -99,6 +99,10 @@ void msg (enum msg_class, const char *format, ...) PRINTF_FORMAT (2, 3); void msg_emit (struct msg *); +/* Enable and disable messages. */ +void msg_enable (void); +void msg_disable (void); + /* Error context. */ void msg_set_command_name (const char *); const char *msg_get_command_name (void); diff --git a/src/libpspp/str.c b/src/libpspp/str.c index 6d25c339..cf0069dd 100644 --- a/src/libpspp/str.c +++ b/src/libpspp/str.c @@ -27,6 +27,7 @@ #include #include +#include #include "minmax.h" #include "size_max.h" @@ -676,6 +677,30 @@ ds_swap (struct string *a, struct string *b) *b = tmp; } +/* Helper function for ds_register_pool. */ +static void +free_string (void *st_) +{ + struct string *st = st_; + ds_destroy (st); +} + +/* Arranges for ST to be destroyed automatically as part of + POOL. */ +void +ds_register_pool (struct string *st, struct pool *pool) +{ + pool_register (pool, free_string, st); +} + +/* Cancels the arrangement for ST to be destroyed automatically + as part of POOL. */ +void +ds_unregister_pool (struct string *st, struct pool *pool) +{ + pool_unregister (pool, st); +} + /* Copies SRC into DST. DST and SRC may be the same string. */ void @@ -860,6 +885,18 @@ ds_rpad (struct string *st, size_t length, char pad) ds_put_char_multiple (st, pad, length - st->ss.length); } +/* Sets the length of ST to exactly NEW_LENGTH, + either by truncating characters from the end, + or by padding on the right with PAD. */ +void +ds_set_length (struct string *st, size_t new_length, char pad) +{ + if (st->ss.length < new_length) + ds_rpad (st, new_length, pad); + else + st->ss.length = new_length; +} + /* Returns true if ST is empty, false otherwise. */ bool ds_is_empty (const struct string *st) diff --git a/src/libpspp/str.h b/src/libpspp/str.h index c3c008a6..fd6e739b 100644 --- a/src/libpspp/str.h +++ b/src/libpspp/str.h @@ -151,6 +151,11 @@ void ds_init_cstr (struct string *, const char *); void ds_destroy (struct string *); void ds_swap (struct string *, struct string *); +/* Pools. */ +struct pool; +void ds_register_pool (struct string *, struct pool *); +void ds_unregister_pool (struct string *, struct pool *); + /* Replacement. */ void ds_assign_string (struct string *, const struct string *); void ds_assign_substring (struct string *, struct substring); @@ -172,6 +177,7 @@ bool ds_separate (const struct string *src, struct substring delimiters, bool ds_tokenize (const struct string *src, struct substring delimiters, size_t *save_idx, struct substring *token); void ds_rpad (struct string *, size_t length, char pad); +void ds_set_length (struct string *, size_t new_length, char pad); /* Extracting substrings. */ struct substring ds_ss (const struct string *); diff --git a/tests/ChangeLog b/tests/ChangeLog index 7bc31257..24bde940 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,7 @@ +Sun Jul 16 21:08:51 2006 Ben Pfaff + + * command/print.sh: Update output to match PRINT revisions. + Wed Jul 12 10:07:52 2006 Ben Pfaff * automake.mk: Put ll-test, llx-test in check_PROGRAMS instead of diff --git a/tests/command/print.sh b/tests/command/print.sh index a7e5b655..9caea4e3 100755 --- a/tests/command/print.sh +++ b/tests/command/print.sh @@ -120,11 +120,11 @@ diff -b $TEMPDIR/pspp.list - << EOF |Variable|Record|Columns|Format| #========#======#=======#======# |A | 1| 1- 8|F8.2 | -|"/" | 1| 9- 9|A1 | +|"/" | 1| 9- 9| | |B | 1| 10- 17|E8.2 | -|"/" | 1| 18- 18|A1 | +|"/" | 1| 18- 18| | |C | 1| 19- 28|N10.0 | -|"/" | 1| 29- 29|A1 | +|"/" | 1| 29- 29| | +--------+------+-------+------+ A B C D -------- -------- -------- -------- -- 2.30.2