X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fdata-io%2Fdata-list.c;h=92ad0793b04659ff965278e474e9a3894b41a7b4;hb=b74d09af5e07f954c18e7cdb8aca3af47fa10208;hp=6609e32b91b120badaac966fae443cd76b5fcbe0;hpb=77e551d23575da6b89f866612ab39c2b0497c9be;p=pspp-builds.git diff --git a/src/language/data-io/data-list.c b/src/language/data-io/data-list.c index 6609e32b..92ad0793 100644 --- a/src/language/data-io/data-list.c +++ b/src/language/data-io/data-list.c @@ -18,42 +18,42 @@ 02110-1301, USA. */ #include -#include -#include + #include #include #include #include -#include + +#include #include -#include -#include +#include #include -#include #include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include +#include +#include +#include #include -#include #include #include -#include -#include -#include "data-list.h" +#include "size_max.h" #include "gettext.h" #define _(msgid) gettext (msgid) /* Utility function. */ -/* FIXME: Either REPEATING DATA must be the last transformation, or we - must multiplex the transformations that follow (i.e., perform them - for every case that we produce from a repetition instance). - Currently we do neither. We should do one or the other. */ - /* Describes how to parse one variable. */ struct dls_var_spec { @@ -92,8 +92,7 @@ struct data_list_pgm struct variable *end; /* Variable specified on END subcommand. */ int rec_cnt; /* Number of records. */ size_t case_size; /* Case size in bytes. */ - char *delims; /* Delimiters if any; not null-terminated. */ - size_t delim_cnt; /* Number of delimiter, or 0 for spaces. */ + struct string delims; /* Field delimiters. */ }; static const struct case_source_class data_list_source_class; @@ -105,12 +104,10 @@ static void dump_fixed_table (const struct dls_var_spec *, static void dump_free_table (const struct data_list_pgm *, const struct file_handle *); static void destroy_dls_var_spec (struct dls_var_spec *); + static trns_free_func data_list_trns_free; static trns_proc_func data_list_trns_proc; -/* Message title for REPEATING DATA. */ -#define RPD_ERR "REPEATING DATA: " - int cmd_data_list (void) { @@ -118,7 +115,7 @@ cmd_data_list (void) int table = -1; /* Print table if nonzero, -1=undecided. */ struct file_handle *fh = fh_inline_file (); - if (!case_source_is_complex (vfm_source)) + if (!in_input_program ()) discard_variables (); dls = xmalloc (sizeof *dls); @@ -126,8 +123,7 @@ cmd_data_list (void) dls->type = -1; dls->end = NULL; dls->rec_cnt = 0; - dls->delims = NULL; - dls->delim_cnt = 0; + ds_init_empty (&dls->delims); dls->first = dls->last = NULL; while (token != '/') @@ -138,13 +134,6 @@ cmd_data_list (void) fh = fh_parse (FH_REF_FILE | FH_REF_INLINE); if (fh == NULL) goto error; - if (case_source_is_class (vfm_source, &file_type_source_class) - && fh != fh_get_default_handle ()) - { - msg (SE, _("DATA LIST must use the same file " - "as the enclosing FILE TYPE.")); - goto error; - } } else if (lex_match_id ("RECORDS")) { @@ -210,9 +199,9 @@ cmd_data_list (void) if (lex_match_id ("TAB")) delim = '\t'; - else if (token == T_STRING && tokstr.length == 1) + else if (token == T_STRING && ds_length (&tokstr) == 1) { - delim = tokstr.string[0]; + delim = ds_first (&tokstr); lex_get(); } else @@ -221,8 +210,7 @@ cmd_data_list (void) goto error; } - dls->delims = xrealloc (dls->delims, dls->delim_cnt + 1); - dls->delims[dls->delim_cnt++] = delim; + ds_put_char (&dls->delims, delim); lex_match (','); } @@ -269,10 +257,10 @@ cmd_data_list (void) if (dls->reader == NULL) goto error; - if (vfm_source != NULL) + if (in_input_program ()) add_transformation (data_list_trns_proc, data_list_trns_free, dls); else - vfm_source = create_case_source (&data_list_source_class, dls); + proc_set_source (create_case_source (&data_list_source_class, dls)); return CMD_SUCCESS; @@ -544,13 +532,11 @@ fixed_parse_compatible (struct fixed_parsing_state *fx, { convert_fmt_ItoO (&input, &v->print); v->write = v->print; - if (!case_source_is_complex (vfm_source)) - v->init = 0; } else { v = dict_lookup_var_assert (default_dict, fx->name[i]); - if (vfm_source == NULL) + if (!in_input_program ()) { msg (SE, _("%s is a duplicate variable name."), fx->name[i]); return 0; @@ -652,9 +638,6 @@ dump_fmt_list (struct fixed_parsing_state *fx, struct fmt_list *f, return 0; } - if (!case_source_is_complex (vfm_source)) - v->init = 0; - spec = xmalloc (sizeof *spec); spec->v = v; spec->input = f->f; @@ -861,9 +844,6 @@ parse_free (struct dls_var_spec **first, struct dls_var_spec **last) } v->print = v->write = output; - if (!case_source_is_complex (vfm_source)) - v->init = 0; - spec = xmalloc (sizeof *spec); spec->input = input; spec->v = v; @@ -922,109 +902,73 @@ dump_free_table (const struct data_list_pgm *dls, /* Extracts a field from the current position in the current record. Fields can be unquoted or quoted with single- or - double-quote characters. *FIELD is set to the field content. + double-quote characters. + + *FIELD is set to the field content. The caller must not + or destroy this constant string. + After parsing the field, sets the current position in the record to just past the field and any trailing delimiter. - END_BLANK is used internally; it should be initialized by the - caller to 0 and left alone afterward. Returns 0 on failure or - a 1-based column number indicating the beginning of the field - on success. */ -static int -cut_field (const struct data_list_pgm *dls, struct fixed_string *field, - int *end_blank) + Returns 0 on failure or a 1-based column number indicating the + beginning of the field on success. */ +static bool +cut_field (const struct data_list_pgm *dls, struct substring *field) { - struct fixed_string line; - char *cp; - size_t column_start; + struct substring line, p; if (dfm_eof (dls->reader)) - return 0; - if (dls->delim_cnt == 0) + return false; + if (ds_is_empty (&dls->delims)) dfm_expand_tabs (dls->reader); - dfm_get_record (dls->reader, &line); + line = p = dfm_get_record (dls->reader); - cp = ls_c_str (&line); - if (dls->delim_cnt == 0) + if (ds_is_empty (&dls->delims)) { + bool missing_quote = false; + /* Skip leading whitespace. */ - while (cp < ls_end (&line) && isspace ((unsigned char) *cp)) - cp++; - if (cp >= ls_end (&line)) - return 0; + ss_ltrim (&p, ss_cstr (CC_SPACES)); + if (ss_is_empty (p)) + return false; /* Handle actual data, whether quoted or unquoted. */ - if (*cp == '\'' || *cp == '"') - { - int quote = *cp; - - field->string = ++cp; - while (cp < ls_end (&line) && *cp != quote) - cp++; - field->length = cp - field->string; - if (cp < ls_end (&line)) - cp++; - else - msg (SW, _("Quoted string missing terminating `%c'."), quote); - } + if (ss_match_char (&p, '\'')) + missing_quote = !ss_get_until (&p, '\'', field); + else if (ss_match_char (&p, '"')) + missing_quote = !ss_get_until (&p, '"', field); else - { - field->string = cp; - while (cp < ls_end (&line) - && !isspace ((unsigned char) *cp) && *cp != ',') - cp++; - field->length = cp - field->string; - } + ss_get_chars (&p, ss_cspan (p, ss_cstr ("," CC_SPACES)), field); + if (missing_quote) + msg (SW, _("Quoted string extends beyond end of line.")); /* Skip trailing whitespace and a single comma if present. */ - while (cp < ls_end (&line) && isspace ((unsigned char) *cp)) - cp++; - if (cp < ls_end (&line) && *cp == ',') - cp++; + ss_ltrim (&p, ss_cstr (CC_SPACES)); + ss_match_char (&p, ','); + + dfm_forward_columns (dls->reader, ss_length (line) - ss_length (p)); } else { - if (cp >= ls_end (&line)) + if (!ss_is_empty (p)) + ss_get_chars (&p, ss_cspan (p, ds_ss (&dls->delims)), field); + else if (dfm_columns_past_end (dls->reader) == 0) { - int column = dfm_column_start (dls->reader); - /* A blank line or a line that ends in \t has a + /* A blank line or a line that ends in a delimiter has a trailing blank field. */ - if (column == 1 || (column > 1 && cp[-1] == '\t')) - { - if (*end_blank == 0) - { - *end_blank = 1; - field->string = ls_end (&line); - field->length = 0; - dfm_forward_record (dls->reader); - return column; - } - else - { - *end_blank = 0; - return 0; - } - } - else - return 0; + *field = p; } else - { - field->string = cp; - while (cp < ls_end (&line) - && memchr (dls->delims, *cp, dls->delim_cnt) == NULL) - cp++; - field->length = cp - field->string; - if (cp < ls_end (&line)) - cp++; - } + return false; + + /* Advance past the field. + + Also advance past a trailing delimiter, regardless of + whether one actually existed. If we "skip" a delimiter + that was not actually there, then we will return + end-of-line on our next call, which is what we want. */ + dfm_forward_columns (dls->reader, ss_length (line) - ss_length (p) + 1); } - - dfm_forward_columns (dls->reader, field->string - line.string); - column_start = dfm_column_start (dls->reader); - - dfm_forward_columns (dls->reader, cp - field->string); - - return column_start; + return true; } static bool read_from_data_list_fixed (const struct data_list_pgm *, @@ -1074,7 +1018,7 @@ read_from_data_list_fixed (const struct data_list_pgm *dls, struct ccase *c) return false; for (i = 1; i <= dls->rec_cnt; i++) { - struct fixed_string line; + struct substring line; if (dfm_eof (dls->reader)) { @@ -1084,13 +1028,13 @@ read_from_data_list_fixed (const struct data_list_pgm *dls, struct ccase *c) return false; } dfm_expand_tabs (dls->reader); - dfm_get_record (dls->reader, &line); + line = dfm_get_record (dls->reader); for (; var_spec && i == var_spec->rec; var_spec = var_spec->next) { struct data_in di; - data_in_finite_line (&di, ls_c_str (&line), ls_length (&line), + data_in_finite_line (&di, ss_data (line), ss_length (line), var_spec->fc, var_spec->lc); di.v = case_data_rw (c, var_spec->fv); di.flags = DI_IMPLIED_DECIMALS; @@ -1113,20 +1057,15 @@ static bool read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c) { struct dls_var_spec *var_spec; - int end_blank = 0; for (var_spec = dls->first; var_spec; var_spec = var_spec->next) { - struct fixed_string field; - int column; + struct substring field; + struct data_in di; /* Cut out a field and read in a new record if necessary. */ - for (;;) + while (!cut_field (dls, &field)) { - column = cut_field (dls, &field, &end_blank); - if (column != 0) - break; - if (!dfm_eof (dls->reader)) dfm_forward_record (dls->reader); if (dfm_eof (dls->reader)) @@ -1138,17 +1077,13 @@ read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c) } } - { - struct data_in di; - - di.s = ls_c_str (&field); - di.e = ls_end (&field); - di.v = case_data_rw (c, var_spec->fv); - di.flags = 0; - di.f1 = column; - di.format = var_spec->input; - data_in (&di); - } + di.s = ss_data (field); + di.e = ss_end (field); + di.v = case_data_rw (c, var_spec->fv); + di.flags = 0; + di.f1 = dfm_get_column (dls->reader, ss_data (field)); + di.format = var_spec->input; + data_in (&di); } return true; } @@ -1160,19 +1095,16 @@ static bool read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c) { struct dls_var_spec *var_spec; - int end_blank = 0; if (dfm_eof (dls->reader)) return false; for (var_spec = dls->first; var_spec; var_spec = var_spec->next) { - struct fixed_string field; - int column; + struct substring field; + struct data_in di; - /* Cut out a field and check for end-of-line. */ - column = cut_field (dls, &field, &end_blank); - if (column == 0) + if (!cut_field (dls, &field)) { if (get_undefined ()) msg (SW, _("Missing value(s) for all variables from %s onward. " @@ -1190,17 +1122,13 @@ read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c) break; } - { - struct data_in di; - - di.s = ls_c_str (&field); - di.e = ls_end (&field); - di.v = case_data_rw (c, var_spec->fv); - di.flags = 0; - di.f1 = column; - di.format = var_spec->input; - data_in (&di); - } + di.s = ss_data (field); + di.e = ss_end (field); + di.v = case_data_rw (c, var_spec->fv); + di.flags = 0; + di.f1 = dfm_get_column (dls->reader, ss_data (field)); + di.format = var_spec->input; + data_in (&di); } dfm_forward_record (dls->reader); @@ -1227,7 +1155,7 @@ static bool data_list_trns_free (void *dls_) { struct data_list_pgm *dls = dls_; - free (dls->delims); + ds_destroy (&dls->delims); destroy_dls_var_spec (dls->first); dfm_close_reader (dls->reader); free (dls); @@ -1250,7 +1178,7 @@ data_list_trns_proc (void *dls_, struct ccase *c, int case_num UNUSED) retval = TRNS_ERROR; } else - retval = TRNS_DROP_CASE; + retval = TRNS_END_FILE; /* If there was an END subcommand handle it. */ if (dls->end != NULL) @@ -1259,7 +1187,7 @@ data_list_trns_proc (void *dls_, struct ccase *c, int case_num UNUSED) if (retval == TRNS_DROP_CASE) { *end = 1.0; - retval = TRNS_CONTINUE; + retval = TRNS_END_FILE; } else *end = 0.0; @@ -1307,756 +1235,3 @@ static const struct case_source_class data_list_source_class = data_list_source_read, data_list_source_destroy, }; - -/* REPEATING DATA. */ - -/* Represents a number or a variable. */ -struct rpd_num_or_var - { - int num; /* Value, or 0. */ - struct variable *var; /* Variable, if number==0. */ - }; - -/* REPEATING DATA private data structure. */ -struct repeating_data_trns - { - struct dls_var_spec *first, *last; /* Variable parsing specifications. */ - struct dfm_reader *reader; /* Input file, never NULL. */ - - struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */ - struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */ - struct rpd_num_or_var occurs; /* OCCURS= subcommand. */ - struct rpd_num_or_var length; /* LENGTH= subcommand. */ - struct rpd_num_or_var cont_beg; /* CONTINUED=, before the dash. */ - struct rpd_num_or_var cont_end; /* CONTINUED=, after the dash. */ - - /* ID subcommand. */ - int id_beg, id_end; /* Beginning & end columns. */ - struct variable *id_var; /* DATA LIST variable. */ - struct fmt_spec id_spec; /* Input format spec. */ - union value *id_value; /* ID value. */ - - write_case_func *write_case; - write_case_data wc_data; - }; - -static trns_free_func repeating_data_trns_free; -static int parse_num_or_var (struct rpd_num_or_var *, const char *); -static int parse_repeating_data (struct dls_var_spec **, - struct dls_var_spec **); -static void find_variable_input_spec (struct variable *v, - struct fmt_spec *spec); - -int cmd_repeating_data (void); - -/* Parses the REPEATING DATA command. */ -int -cmd_repeating_data (void) -{ - struct repeating_data_trns *rpd; - int table = 1; /* Print table? */ - bool saw_starts = false; /* Saw STARTS subcommand? */ - bool saw_occurs = false; /* Saw OCCURS subcommand? */ - bool saw_length = false; /* Saw LENGTH subcommand? */ - bool saw_continued = false; /* Saw CONTINUED subcommand? */ - bool saw_id = false; /* Saw ID subcommand? */ - struct file_handle *const fh = fh_get_default_handle (); - - assert (case_source_is_complex (vfm_source)); - - rpd = xmalloc (sizeof *rpd); - rpd->reader = dfm_open_reader (fh); - rpd->first = rpd->last = NULL; - rpd->starts_beg.num = 0; - rpd->starts_beg.var = NULL; - rpd->starts_end = rpd->occurs = rpd->length = rpd->cont_beg - = rpd->cont_end = rpd->starts_beg; - rpd->id_beg = rpd->id_end = 0; - rpd->id_var = NULL; - rpd->id_value = NULL; - - lex_match ('/'); - - for (;;) - { - if (lex_match_id ("FILE")) - { - struct file_handle *file; - lex_match ('='); - file = fh_parse (FH_REF_FILE | FH_REF_INLINE); - if (file == NULL) - goto error; - if (file != fh) - { - msg (SE, _("REPEATING DATA must use the same file as its " - "corresponding DATA LIST or FILE TYPE.")); - goto error; - } - } - else if (lex_match_id ("STARTS")) - { - lex_match ('='); - if (saw_starts) - { - msg (SE, _("%s subcommand given multiple times."),"STARTS"); - goto error; - } - saw_starts = true; - - if (!parse_num_or_var (&rpd->starts_beg, "STARTS beginning column")) - goto error; - - lex_negative_to_dash (); - if (lex_match ('-')) - { - if (!parse_num_or_var (&rpd->starts_end, "STARTS ending column")) - goto error; - } else { - /* Otherwise, rpd->starts_end is uninitialized. We - will initialize it later from the record length - of the file. We can't do so now because the - file handle may not be specified yet. */ - } - - if (rpd->starts_beg.num != 0 && rpd->starts_end.num != 0 - && rpd->starts_beg.num > rpd->starts_end.num) - { - msg (SE, _("STARTS beginning column (%d) exceeds " - "STARTS ending column (%d)."), - rpd->starts_beg.num, rpd->starts_end.num); - goto error; - } - } - else if (lex_match_id ("OCCURS")) - { - lex_match ('='); - if (saw_occurs) - { - msg (SE, _("%s subcommand given multiple times."),"OCCURS"); - goto error; - } - saw_occurs = true; - - if (!parse_num_or_var (&rpd->occurs, "OCCURS")) - goto error; - } - else if (lex_match_id ("LENGTH")) - { - lex_match ('='); - if (saw_length) - { - msg (SE, _("%s subcommand given multiple times."),"LENGTH"); - goto error; - } - saw_length = true; - - if (!parse_num_or_var (&rpd->length, "LENGTH")) - goto error; - } - else if (lex_match_id ("CONTINUED")) - { - lex_match ('='); - if (saw_continued) - { - msg (SE, _("%s subcommand given multiple times."),"CONTINUED"); - goto error; - } - saw_continued = true; - - if (!lex_match ('/')) - { - if (!parse_num_or_var (&rpd->cont_beg, - "CONTINUED beginning column")) - goto error; - - lex_negative_to_dash (); - if (lex_match ('-') - && !parse_num_or_var (&rpd->cont_end, - "CONTINUED ending column")) - goto error; - - if (rpd->cont_beg.num != 0 && rpd->cont_end.num != 0 - && rpd->cont_beg.num > rpd->cont_end.num) - { - msg (SE, _("CONTINUED beginning column (%d) exceeds " - "CONTINUED ending column (%d)."), - rpd->cont_beg.num, rpd->cont_end.num); - goto error; - } - } - else - rpd->cont_beg.num = 1; - } - else if (lex_match_id ("ID")) - { - lex_match ('='); - if (saw_id) - { - msg (SE, _("%s subcommand given multiple times."),"ID"); - goto error; - } - saw_id = true; - - if (!lex_force_int ()) - goto error; - if (lex_integer () < 1) - { - msg (SE, _("ID beginning column (%ld) must be positive."), - lex_integer ()); - goto error; - } - rpd->id_beg = lex_integer (); - - lex_get (); - lex_negative_to_dash (); - - if (lex_match ('-')) - { - if (!lex_force_int ()) - goto error; - if (lex_integer () < 1) - { - msg (SE, _("ID ending column (%ld) must be positive."), - lex_integer ()); - goto error; - } - if (lex_integer () < rpd->id_end) - { - msg (SE, _("ID ending column (%ld) cannot be less than " - "ID beginning column (%d)."), - lex_integer (), rpd->id_beg); - goto error; - } - - rpd->id_end = lex_integer (); - lex_get (); - } - else rpd->id_end = rpd->id_beg; - - if (!lex_force_match ('=')) - goto error; - rpd->id_var = parse_variable (); - if (rpd->id_var == NULL) - goto error; - - find_variable_input_spec (rpd->id_var, &rpd->id_spec); - rpd->id_value = xnmalloc (rpd->id_var->nv, sizeof *rpd->id_value); - } - else if (lex_match_id ("TABLE")) - table = 1; - else if (lex_match_id ("NOTABLE")) - table = 0; - else if (lex_match_id ("DATA")) - break; - else - { - lex_error (NULL); - goto error; - } - - if (!lex_force_match ('/')) - goto error; - } - - /* Comes here when DATA specification encountered. */ - if (!saw_starts || !saw_occurs) - { - if (!saw_starts) - msg (SE, _("Missing required specification STARTS.")); - if (!saw_occurs) - msg (SE, _("Missing required specification OCCURS.")); - goto error; - } - - /* Enforce ID restriction. */ - if (saw_id && !saw_continued) - { - msg (SE, _("ID specified without CONTINUED.")); - goto error; - } - - /* Calculate and check starts_end, cont_end if necessary. */ - if (rpd->starts_end.num == 0 && rpd->starts_end.var == NULL) - { - rpd->starts_end.num = fh_get_record_width (fh); - if (rpd->starts_beg.num != 0 - && rpd->starts_beg.num > rpd->starts_end.num) - { - msg (SE, _("STARTS beginning column (%d) exceeds " - "default STARTS ending column taken from file's " - "record width (%d)."), - rpd->starts_beg.num, rpd->starts_end.num); - goto error; - } - } - if (rpd->cont_end.num == 0 && rpd->cont_end.var == NULL) - { - rpd->cont_end.num = fh_get_record_width (fh); - if (rpd->cont_beg.num != 0 - && rpd->cont_beg.num > rpd->cont_end.num) - { - msg (SE, _("CONTINUED beginning column (%d) exceeds " - "default CONTINUED ending column taken from file's " - "record width (%d)."), - rpd->cont_beg.num, rpd->cont_end.num); - goto error; - } - } - - lex_match ('='); - if (!parse_repeating_data (&rpd->first, &rpd->last)) - goto error; - - /* Calculate length if necessary. */ - if (!saw_length) - { - struct dls_var_spec *iter; - - for (iter = rpd->first; iter; iter = iter->next) - if (iter->lc > rpd->length.num) - rpd->length.num = iter->lc; - assert (rpd->length.num != 0); - } - - if (table) - dump_fixed_table (rpd->first, fh, rpd->last->rec); - - add_transformation (repeating_data_trns_proc, repeating_data_trns_free, rpd); - - return lex_end_of_command (); - - error: - repeating_data_trns_free (rpd); - return CMD_CASCADING_FAILURE; -} - -/* Finds the input format specification for variable V and puts - it in SPEC. Because of the way that DATA LIST is structured, - this is nontrivial. */ -static void -find_variable_input_spec (struct variable *v, struct fmt_spec *spec) -{ - size_t i; - - for (i = 0; i < n_trns; i++) - { - struct transformation *trns = &t_trns[i]; - - if (trns->proc == data_list_trns_proc) - { - struct data_list_pgm *pgm = trns->private; - struct dls_var_spec *iter; - - for (iter = pgm->first; iter; iter = iter->next) - if (iter->v == v) - { - *spec = iter->input; - return; - } - } - } - - assert (0); -} - -/* Parses a number or a variable name from the syntax file and puts - the results in VALUE. Ensures that the number is at least 1; else - emits an error based on MESSAGE. Returns nonzero only if - successful. */ -static int -parse_num_or_var (struct rpd_num_or_var *value, const char *message) -{ - if (token == T_ID) - { - value->num = 0; - value->var = parse_variable (); - if (value->var == NULL) - return 0; - if (value->var->type == ALPHA) - { - msg (SE, _("String variable not allowed here.")); - return 0; - } - } - else if (lex_is_integer ()) - { - value->num = lex_integer (); - - if (value->num < 1) - { - msg (SE, _("%s (%d) must be at least 1."), message, value->num); - return 0; - } - - lex_get (); - } else { - msg (SE, _("Variable or integer expected for %s."), message); - return 0; - } - return 1; -} - -/* Parses data specifications for repeating data groups, adding - them to the linked list with head FIRST and tail LAST. - Returns nonzero only if successful. */ -static int -parse_repeating_data (struct dls_var_spec **first, struct dls_var_spec **last) -{ - struct fixed_parsing_state fx; - size_t i; - - fx.recno = 0; - fx.sc = 1; - - while (token != '.') - { - if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE)) - return 0; - - if (lex_is_number ()) - { - if (!fixed_parse_compatible (&fx, first, last)) - goto fail; - } - else if (token == '(') - { - if (!fixed_parse_fortran (&fx, first, last)) - goto fail; - } - else - { - msg (SE, _("SPSS-like or FORTRAN-like format " - "specification expected after variable names.")); - goto fail; - } - - for (i = 0; i < fx.name_cnt; i++) - free (fx.name[i]); - free (fx.name); - } - - return 1; - - fail: - for (i = 0; i < fx.name_cnt; i++) - free (fx.name[i]); - free (fx.name); - return 0; -} - -/* Obtains the real value for rpd_num_or_var N in case C and returns - it. The valid range is nonnegative numbers, but numbers outside - this range can be returned and should be handled by the caller as - invalid. */ -static int -realize_value (struct rpd_num_or_var *n, struct ccase *c) -{ - if (n->var != NULL) - { - double v = case_num (c, n->var->fv); - return v != SYSMIS && v >= INT_MIN && v <= INT_MAX ? v : -1; - } - else - return n->num; -} - -/* Parameter record passed to rpd_parse_record(). */ -struct rpd_parse_info - { - struct repeating_data_trns *trns; /* REPEATING DATA transformation. */ - const char *line; /* Line being parsed. */ - size_t len; /* Line length. */ - int beg, end; /* First and last column of first occurrence. */ - int ofs; /* Column offset between repeated occurrences. */ - struct ccase *c; /* Case to fill in. */ - int verify_id; /* Zero to initialize ID, nonzero to verify it. */ - int max_occurs; /* Max number of occurrences to parse. */ - }; - -/* Parses one record of repeated data and outputs corresponding - cases. Returns number of occurrences parsed up to the - maximum specified in INFO. */ -static int -rpd_parse_record (const struct rpd_parse_info *info) -{ - struct repeating_data_trns *t = info->trns; - int cur = info->beg; - int occurrences; - - /* Handle record ID values. */ - if (t->id_beg != 0) - { - union value id_temp[MAX_ELEMS_PER_VALUE]; - - /* Parse record ID into V. */ - { - struct data_in di; - - data_in_finite_line (&di, info->line, info->len, t->id_beg, t->id_end); - di.v = info->verify_id ? id_temp : t->id_value; - di.flags = 0; - di.f1 = t->id_beg; - di.format = t->id_spec; - - if (!data_in (&di)) - return 0; - } - - if (info->verify_id - && compare_values (id_temp, t->id_value, t->id_var->width) != 0) - { - char expected_str [MAX_FORMATTED_LEN + 1]; - char actual_str [MAX_FORMATTED_LEN + 1]; - - data_out (expected_str, &t->id_var->print, t->id_value); - expected_str[t->id_var->print.w] = '\0'; - - data_out (actual_str, &t->id_var->print, id_temp); - actual_str[t->id_var->print.w] = '\0'; - - tmsg (SE, RPD_ERR, - _("Encountered mismatched record ID \"%s\" expecting \"%s\"."), - actual_str, expected_str); - - return 0; - } - } - - /* Iterate over the set of expected occurrences and record each of - them as a separate case. FIXME: We need to execute any - transformations that follow the current one. */ - { - int warned = 0; - - for (occurrences = 0; occurrences < info->max_occurs; ) - { - if (cur + info->ofs > info->end + 1) - break; - occurrences++; - - { - struct dls_var_spec *var_spec = t->first; - - for (; var_spec; var_spec = var_spec->next) - { - int fc = var_spec->fc - 1 + cur; - int lc = var_spec->lc - 1 + cur; - - if (fc > info->len && !warned && var_spec->input.type != FMT_A) - { - warned = 1; - - tmsg (SW, RPD_ERR, - _("Variable %s starting in column %d extends " - "beyond physical record length of %d."), - var_spec->v->name, fc, info->len); - } - - { - struct data_in di; - - data_in_finite_line (&di, info->line, info->len, fc, lc); - di.v = case_data_rw (info->c, var_spec->fv); - di.flags = 0; - di.f1 = fc + 1; - di.format = var_spec->input; - - if (!data_in (&di)) - return 0; - } - } - } - - cur += info->ofs; - - if (!t->write_case (t->wc_data)) - return 0; - } - } - - return occurrences; -} - -/* Reads one set of repetitions of the elements in the REPEATING - DATA structure. Returns TRNS_CONTINUE on success, - TRNS_DROP_CASE on end of file or on failure, or TRNS_NEXT_CASE. */ -int -repeating_data_trns_proc (void *trns_, struct ccase *c, int case_num UNUSED) -{ - struct repeating_data_trns *t = trns_; - - struct fixed_string line; /* Current record. */ - - int starts_beg; /* Starting column. */ - int starts_end; /* Ending column. */ - int occurs; /* Number of repetitions. */ - int length; /* Length of each occurrence. */ - int cont_beg; /* Starting column for continuation lines. */ - int cont_end; /* Ending column for continuation lines. */ - - int occurs_left; /* Number of occurrences remaining. */ - - int code; /* Return value from rpd_parse_record(). */ - - int skip_first_record = 0; - - dfm_push (t->reader); - - /* Read the current record. */ - dfm_reread_record (t->reader, 1); - dfm_expand_tabs (t->reader); - if (dfm_eof (t->reader)) - return TRNS_DROP_CASE; - dfm_get_record (t->reader, &line); - dfm_forward_record (t->reader); - - /* Calculate occurs, length. */ - occurs_left = occurs = realize_value (&t->occurs, c); - if (occurs <= 0) - { - tmsg (SE, RPD_ERR, _("Invalid value %d for OCCURS."), occurs); - return TRNS_NEXT_CASE; - } - starts_beg = realize_value (&t->starts_beg, c); - if (starts_beg <= 0) - { - tmsg (SE, RPD_ERR, _("Beginning column for STARTS (%d) must be " - "at least 1."), - starts_beg); - return TRNS_NEXT_CASE; - } - starts_end = realize_value (&t->starts_end, c); - if (starts_end < starts_beg) - { - tmsg (SE, RPD_ERR, _("Ending column for STARTS (%d) is less than " - "beginning column (%d)."), - starts_end, starts_beg); - skip_first_record = 1; - } - length = realize_value (&t->length, c); - if (length < 0) - { - tmsg (SE, RPD_ERR, _("Invalid value %d for LENGTH."), length); - length = 1; - occurs = occurs_left = 1; - } - cont_beg = realize_value (&t->cont_beg, c); - if (cont_beg < 0) - { - tmsg (SE, RPD_ERR, _("Beginning column for CONTINUED (%d) must be " - "at least 1."), - cont_beg); - return TRNS_DROP_CASE; - } - cont_end = realize_value (&t->cont_end, c); - if (cont_end < cont_beg) - { - tmsg (SE, RPD_ERR, _("Ending column for CONTINUED (%d) is less than " - "beginning column (%d)."), - cont_end, cont_beg); - return TRNS_DROP_CASE; - } - - /* Parse the first record. */ - if (!skip_first_record) - { - struct rpd_parse_info info; - info.trns = t; - info.line = ls_c_str (&line); - info.len = ls_length (&line); - info.beg = starts_beg; - info.end = starts_end; - info.ofs = length; - info.c = c; - info.verify_id = 0; - info.max_occurs = occurs_left; - code = rpd_parse_record (&info); - if (!code) - return TRNS_DROP_CASE; - occurs_left -= code; - } - else if (cont_beg == 0) - return TRNS_NEXT_CASE; - - /* Make sure, if some occurrences are left, that we have - continuation records. */ - if (occurs_left > 0 && cont_beg == 0) - { - tmsg (SE, RPD_ERR, - _("Number of repetitions specified on OCCURS (%d) " - "exceed number of repetitions available in " - "space on STARTS (%d), and CONTINUED not specified."), - occurs, (starts_end - starts_beg + 1) / length); - return TRNS_DROP_CASE; - } - - /* Go on to additional records. */ - while (occurs_left != 0) - { - struct rpd_parse_info info; - - assert (occurs_left >= 0); - - /* Read in another record. */ - if (dfm_eof (t->reader)) - { - tmsg (SE, RPD_ERR, - _("Unexpected end of file with %d repetitions " - "remaining out of %d."), - occurs_left, occurs); - return TRNS_DROP_CASE; - } - dfm_expand_tabs (t->reader); - dfm_get_record (t->reader, &line); - dfm_forward_record (t->reader); - - /* Parse this record. */ - info.trns = t; - info.line = ls_c_str (&line); - info.len = ls_length (&line); - info.beg = cont_beg; - info.end = cont_end; - info.ofs = length; - info.c = c; - info.verify_id = 1; - info.max_occurs = occurs_left; - code = rpd_parse_record (&info);; - if (!code) - return TRNS_DROP_CASE; - occurs_left -= code; - } - - dfm_pop (t->reader); - - /* FIXME: This is a kluge until we've implemented multiplexing of - transformations. */ - return TRNS_NEXT_CASE; -} - -/* Frees a REPEATING DATA transformation. - Returns true if successful, false if an I/O error occurred. */ -bool -repeating_data_trns_free (void *rpd_) -{ - struct repeating_data_trns *rpd = rpd_; - - destroy_dls_var_spec (rpd->first); - dfm_close_reader (rpd->reader); - free (rpd->id_value); - free (rpd); - return true; -} - -/* Lets repeating_data_trns_proc() know how to write the cases - that it composes. Not elegant. */ -void -repeating_data_set_write_case (struct transformation *trns_, - write_case_func *write_case, - write_case_data wc_data) -{ - struct repeating_data_trns *t = trns_->private; - - assert (trns_->proc == repeating_data_trns_proc); - t->write_case = write_case; - t->wc_data = wc_data; -}