X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata-list.c;h=a3e6c67a2ad251251b326b63df68ccc21d0f7be8;hb=1143173e5e7e57d9020a0b3303c980e8166b3642;hp=de50b437990fdf16a88ca6b7a98646a4e9182a1d;hpb=37597beca4a11edba50b847932fdfeca3a648fa2;p=pspp-builds.git diff --git a/src/data-list.c b/src/data-list.c index de50b437..a3e6c67a 100644 --- a/src/data-list.c +++ b/src/data-list.c @@ -19,12 +19,13 @@ #include #include "data-list.h" -#include +#include "error.h" #include #include #include #include #include "alloc.h" +#include "case.h" #include "command.h" #include "data-in.h" #include "debug-print.h" @@ -88,6 +89,8 @@ struct data_list_pgm int eof; /* End of file encountered. */ int nrec; /* Number of records. */ size_t case_size; /* Case size in bytes. */ + char *delims; /* Delimiters if any; not null-terminated. */ + size_t delim_cnt; /* Number of delimiter, or 0 for spaces. */ }; static int parse_fixed (struct data_list_pgm *); @@ -111,9 +114,6 @@ cmd_data_list (void) /* 0=print no table, 1=print table. (TABLE subcommand.) */ int table = -1; - lex_match_id ("DATA"); - lex_match_id ("LIST"); - if (!case_source_is_complex (vfm_source)) discard_variables (); @@ -123,6 +123,8 @@ cmd_data_list (void) dls->end = NULL; dls->eof = 0; dls->nrec = 0; + dls->delims = NULL; + dls->delim_cnt = 0; dls->first = dls->last = NULL; while (token != '/') @@ -169,38 +171,58 @@ cmd_data_list (void) } else if (token == T_ID) { - /* Must match DLS_* constants. */ - static const char *id[] = {"FIXED", "FREE", "LIST", "NOTABLE", - "TABLE", NULL}; - const char **p; - int index; - - for (p = id; *p; p++) - if (lex_id_match (*p, tokid)) - break; - if (*p == NULL) - { - lex_error (NULL); - goto error; - } - - lex_get (); + if (lex_match_id ("NOTABLE")) + table = 0; + else if (lex_match_id ("TABLE")) + table = 1; + else + { + int type; + if (lex_match_id ("FIXED")) + type = DLS_FIXED; + else if (lex_match_id ("FREE")) + type = DLS_FREE; + else if (lex_match_id ("LIST")) + type = DLS_LIST; + else + { + lex_error (NULL); + goto error; + } - index = p - id; - if (index < 3) - { if (dls->type != -1) { msg (SE, _("Only one of FIXED, FREE, or LIST may " - "be specified.")); + "be specified.")); goto error; } - - dls->type = index; - } - else - table = index - 3; - } + dls->type = type; + + if ((dls->type == DLS_FREE || dls->type == DLS_LIST) + && lex_match ('(')) + { + while (!lex_match (')')) + { + int delim; + + if (lex_match_id ("TAB")) + delim = '\t'; + else if (token == T_STRING && tokstr.length == 1) + delim = tokstr.string[0]; + else + { + lex_error (NULL); + goto error; + } + + dls->delims = xrealloc (dls->delims, dls->delim_cnt + 1); + dls->delims[dls->delim_cnt++] = delim; + + lex_match (','); + } + } + } + } else { lex_error (NULL); @@ -237,6 +259,9 @@ cmd_data_list (void) dump_free_table (dls); } + if (!dfm_open_for_reading (dls->handle)) + goto error; + if (vfm_source != NULL) { struct data_list_pgm *new_pgm; @@ -256,6 +281,7 @@ cmd_data_list (void) error: destroy_dls_var_spec (dls->first); + free (dls->delims); free (dls); return CMD_FAILURE; } @@ -783,7 +809,7 @@ dump_fixed_table (const struct dls_var_spec *specs, fmt_to_string (&spec->input)); } - filename = fh_handle_name (handle); + filename = handle_get_filename (handle); if (filename == NULL) filename = ""; buf = local_alloc (strlen (filename) + INT_DIGITS + 80); @@ -797,7 +823,6 @@ dump_fixed_table (const struct dls_var_spec *specs, tab_title (t, 0, buf); tab_submit (t); - fh_handle_name (NULL); local_free (buf); } @@ -839,7 +864,7 @@ parse_free (struct dls_var_spec **first, struct dls_var_spec **last) input.type = FMT_F; input.w = 8; input.d = 0; - output = set_format; + output = get_format(); } if (input.type == FMT_A || input.type == FMT_AHEX) @@ -915,7 +940,7 @@ dump_free_table (const struct data_list_pgm *dls) { const char *filename; - filename = fh_handle_name (dls->handle); + filename = handle_get_filename (dls->handle); if (filename == NULL) filename = ""; tab_title (t, 1, @@ -926,68 +951,115 @@ dump_free_table (const struct data_list_pgm *dls) } tab_submit (t); - fh_handle_name (NULL); } /* Input procedure. */ -/* Extracts a field from the current position in the current record. - Fields can be unquoted or quoted with single- or double-quote - characters. *RET_LEN is set to the field length, *RET_CP is set to - the field itself. After parsing the field, sets the current - position in the record to just past the field. Returns 0 on - failure or a 1-based column number indicating the beginning of the - field on success. */ +/* Extracts a field from the current position in the current + record. Fields can be unquoted or quoted with single- or + double-quote characters. *FIELD is set to the field content. + After parsing the field, sets the current position in the + record to just past the field and any trailing delimiter. + END_BLANK is used internally; it should be initialized by the + caller to 0 and left alone afterward. Returns 0 on failure or + a 1-based column number indicating the beginning of the field + on success. */ static int -cut_field (const struct data_list_pgm *dls, char **ret_cp, int *ret_len) +cut_field (const struct data_list_pgm *dls, struct len_string *field, + int *end_blank) { - char *cp, *ep; - int len; + struct len_string line; + char *cp; + size_t column_start; - cp = dfm_get_record (dls->handle, &len); - if (!cp) + if (dfm_eof (dls->handle)) return 0; + if (dls->delim_cnt == 0) + dfm_expand_tabs (dls->handle); + dfm_get_record (dls->handle, &line); - ep = cp + len; - - /* Skip leading whitespace and commas. */ - while ((isspace ((unsigned char) *cp) || *cp == ',') && cp < ep) - cp++; - if (cp >= ep) - return 0; - - /* Three types of fields: quoted with ', quoted with ", unquoted. */ - if (*cp == '\'' || *cp == '"') + cp = ls_c_str (&line); + if (dls->delim_cnt == 0) { - int quote = *cp; - - *ret_cp = ++cp; - while (cp < ep && *cp != quote) - cp++; - *ret_len = cp - *ret_cp; - if (cp < ep) - cp++; + /* Skip leading whitespace. */ + while (cp < ls_end (&line) && isspace ((unsigned char) *cp)) + cp++; + if (cp >= ls_end (&line)) + return 0; + + /* Handle actual data, whether quoted or unquoted. */ + if (*cp == '\'' || *cp == '"') + { + int quote = *cp; + + field->string = ++cp; + while (cp < ls_end (&line) && *cp != quote) + cp++; + field->length = cp - field->string; + if (cp < ls_end (&line)) + cp++; + else + msg (SW, _("Quoted string missing terminating `%c'."), quote); + } else - msg (SW, _("Scope of string exceeds line.")); + { + field->string = cp; + while (cp < ls_end (&line) + && !isspace ((unsigned char) *cp) && *cp != ',') + cp++; + field->length = cp - field->string; + } + + /* Skip trailing whitespace and a single comma if present. */ + while (cp < ls_end (&line) && isspace ((unsigned char) *cp)) + cp++; + if (cp < ls_end (&line) && *cp == ',') + cp++; } - else + else { - *ret_cp = cp; - while (cp < ep && !isspace ((unsigned char) *cp) && *cp != ',') - cp++; - *ret_len = cp - *ret_cp; + if (cp >= ls_end (&line)) + { + int column = dfm_column_start (dls->handle); + /* A blank line or a line that ends in \t has a + trailing blank field. */ + if (column == 1 || (column > 1 && cp[-1] == '\t')) + { + if (*end_blank == 0) + { + *end_blank = 1; + field->string = ls_end (&line); + field->length = 0; + dfm_forward_record (dls->handle); + return column; + } + else + { + *end_blank = 0; + return 0; + } + } + else + return 0; + } + else + { + field->string = cp; + while (cp < ls_end (&line) + && memchr (dls->delims, *cp, dls->delim_cnt) == NULL) + cp++; + field->length = cp - field->string; + if (cp < ls_end (&line)) + cp++; + } } - - { - int beginning_column; - - dfm_set_record (dls->handle, *ret_cp); - beginning_column = dfm_get_cur_col (dls->handle) + 1; + + dfm_forward_columns (dls->handle, field->string - line.string); + column_start = dfm_column_start (dls->handle); - dfm_set_record (dls->handle, cp); + dfm_forward_columns (dls->handle, cp - field->string); - return beginning_column; - } + return column_start; } typedef int data_list_read_func (const struct data_list_pgm *, struct ccase *); @@ -1013,6 +1085,7 @@ get_data_list_read_func (const struct data_list_pgm *dls) default: assert (0); + abort (); } } @@ -1026,27 +1099,29 @@ read_from_data_list_fixed (const struct data_list_pgm *dls, struct dls_var_spec *var_spec = dls->first; int i; - if (!dfm_get_record (dls->handle, NULL)) + if (dfm_eof (dls->handle)) return -2; for (i = 1; i <= dls->nrec; i++) { - int len; - char *line = dfm_get_record (dls->handle, &len); + struct len_string line; - if (!line) + if (dfm_eof (dls->handle)) { /* Note that this can't occur on the first record. */ msg (SW, _("Partial case of %d of %d records discarded."), i - 1, dls->nrec); return -2; } + dfm_expand_tabs (dls->handle); + dfm_get_record (dls->handle, &line); for (; var_spec && i == var_spec->rec; var_spec = var_spec->next) { struct data_in di; - data_in_finite_line (&di, line, len, var_spec->fc, var_spec->lc); - di.v = &c->data[var_spec->fv]; + data_in_finite_line (&di, ls_c_str (&line), ls_length (&line), + var_spec->fc, var_spec->lc); + di.v = case_data_rw (c, var_spec->fv); di.flags = 0; di.f1 = var_spec->fc; di.format = var_spec->input; @@ -1054,7 +1129,7 @@ read_from_data_list_fixed (const struct data_list_pgm *dls, data_in (&di); } - dfm_fwd_record (dls->handle); + dfm_forward_record (dls->handle); } return -1; @@ -1068,27 +1143,27 @@ read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c) { struct dls_var_spec *var_spec; - char *field; - int len; + int end_blank = 0; for (var_spec = dls->first; var_spec; var_spec = var_spec->next) { + struct len_string field; int column; /* Cut out a field and read in a new record if necessary. */ for (;;) { - column = cut_field (dls, &field, &len); + column = cut_field (dls, &field, &end_blank); if (column != 0) break; - if (dfm_get_record (dls->handle, NULL)) - dfm_fwd_record (dls->handle); - if (!dfm_get_record (dls->handle, NULL)) + if (!dfm_eof (dls->handle)) + dfm_forward_record (dls->handle); + if (dfm_eof (dls->handle)) { if (var_spec != dls->first) msg (SW, _("Partial case discarded. The first variable " - "missing was %s."), var_spec->name); + "missing was %s."), var_spec->name); return -2; } } @@ -1096,9 +1171,9 @@ read_from_data_list_free (const struct data_list_pgm *dls, { struct data_in di; - di.s = field; - di.e = field + len; - di.v = &c->data[var_spec->fv]; + di.s = ls_c_str (&field); + di.e = ls_end (&field); + di.v = case_data_rw (c, var_spec->fv); di.flags = 0; di.f1 = column; di.format = var_spec->input; @@ -1116,31 +1191,32 @@ read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c) { struct dls_var_spec *var_spec; - char *field; - int len; + int end_blank = 0; - if (!dfm_get_record (dls->handle, NULL)) + if (dfm_eof (dls->handle)) return -2; for (var_spec = dls->first; var_spec; var_spec = var_spec->next) { + struct len_string field; + int column; + /* Cut out a field and check for end-of-line. */ - int column = cut_field (dls, &field, &len); - + column = cut_field (dls, &field, &end_blank); if (column == 0) { - if (set_undefined) + if (get_undefined ()) msg (SW, _("Missing value(s) for all variables from %s onward. " - "These will be filled with the system-missing value " - "or blanks, as appropriate."), + "These will be filled with the system-missing value " + "or blanks, as appropriate."), var_spec->name); - for (; var_spec; var_spec = var_spec->next) + for (; var_spec; var_spec = var_spec->next) { int width = get_format_var_width (&var_spec->input); if (width == 0) - c->data[var_spec->fv].f = SYSMIS; + case_data_rw (c, var_spec->fv)->f = SYSMIS; else - memset (c->data[var_spec->fv].s, ' ', width); + memset (case_data_rw (c, var_spec->fv)->s, ' ', width); } break; } @@ -1148,9 +1224,9 @@ read_from_data_list_list (const struct data_list_pgm *dls, { struct data_in di; - di.s = field; - di.e = field + len; - di.v = &c->data[var_spec->fv]; + di.s = ls_c_str (&field); + di.e = ls_end (&field); + di.v = case_data_rw (c, var_spec->fv); di.flags = 0; di.f1 = column; di.format = var_spec->input; @@ -1158,7 +1234,7 @@ read_from_data_list_list (const struct data_list_pgm *dls, } } - dfm_fwd_record (dls->handle); + dfm_forward_record (dls->handle); return -1; } @@ -1181,6 +1257,7 @@ static void data_list_trns_free (struct trns_header *pgm) { struct data_list_pgm *dls = (struct data_list_pgm *) pgm; + free (dls->delims); destroy_dls_var_spec (dls->first); fh_close_handle (dls->handle); free (pgm); @@ -1224,11 +1301,11 @@ data_list_trns_proc (struct trns_header *t, struct ccase *c, { if (retval == -2) { - c->data[dls->end->fv].f = 1.0; + case_data_rw (c, dls->end->fv)->f = 1.0; retval = -1; } else - c->data[dls->end->fv].f = 0.0; + case_data_rw (c, dls->end->fv)->f = 0.0; } dfm_pop (dls->handle); @@ -1322,9 +1399,6 @@ cmd_repeating_data (void) /* Bits are set when a particular subcommand has been seen. */ unsigned seen = 0; - lex_match_id ("REPEATING"); - lex_match_id ("DATA"); - assert (case_source_is_complex (vfm_source)); rpd = xmalloc (sizeof *rpd); @@ -1539,9 +1613,9 @@ cmd_repeating_data (void) /* Calculate starts_end, cont_end if necessary. */ if (rpd->starts_end.num == 0 && rpd->starts_end.var == NULL) - rpd->starts_end.num = fh_record_width (rpd->handle); + rpd->starts_end.num = handle_get_record_width (rpd->handle); if (rpd->cont_end.num == 0 && rpd->starts_end.var == NULL) - rpd->cont_end.num = fh_record_width (rpd->handle); + rpd->cont_end.num = handle_get_record_width (rpd->handle); /* Calculate length if possible. */ if ((seen & 4) == 0) @@ -1713,7 +1787,7 @@ realize_value (struct rpd_num_or_var *n, struct ccase *c) assert (n->num == 0); if (n->var != NULL) { - double v = c->data[n->var->fv].f; + double v = case_num (c, n->var->fv); if (v == SYSMIS || v <= INT_MIN || v >= INT_MAX) return -1; @@ -1820,7 +1894,7 @@ rpd_parse_record (const struct rpd_parse_info *info) struct data_in di; data_in_finite_line (&di, info->line, info->len, fc, lc); - di.v = &info->c->data[var_spec->fv]; + di.v = case_data_rw (info->c, var_spec->fv); di.flags = 0; di.f1 = fc + 1; di.format = var_spec->input; @@ -1850,15 +1924,14 @@ repeating_data_trns_proc (struct trns_header *trns, struct ccase *c, { struct repeating_data_trns *t = (struct repeating_data_trns *) trns; - char *line; /* Current record. */ - int len; /* Length of current record. */ + struct len_string line; /* Current record. */ int starts_beg; /* Starting column. */ int starts_end; /* Ending column. */ int occurs; /* Number of repetitions. */ int length; /* Length of each occurrence. */ - int cont_beg; /* Starting column for continuation lines. */ - int cont_end; /* Ending column for continuation lines. */ + int cont_beg; /* Starting column for continuation lines. */ + int cont_end; /* Ending column for continuation lines. */ int occurs_left; /* Number of occurrences remaining. */ @@ -1869,11 +1942,12 @@ repeating_data_trns_proc (struct trns_header *trns, struct ccase *c, dfm_push (t->handle); /* Read the current record. */ - dfm_bkwd_record (t->handle, 1); - line = dfm_get_record (t->handle, &len); - if (line == NULL) + dfm_reread_record (t->handle, 1); + dfm_expand_tabs (t->handle); + if (dfm_eof (t->handle)) return -2; - dfm_fwd_record (t->handle); + dfm_get_record (t->handle, &line); + dfm_forward_record (t->handle); /* Calculate occurs, length. */ occurs_left = occurs = realize_value (&t->occurs, c); @@ -1927,8 +2001,8 @@ repeating_data_trns_proc (struct trns_header *trns, struct ccase *c, { struct rpd_parse_info info; info.trns = t; - info.line = line; - info.len = len; + info.line = ls_c_str (&line); + info.len = ls_length (&line); info.beg = starts_beg; info.end = starts_end; info.ofs = length; @@ -1963,8 +2037,7 @@ repeating_data_trns_proc (struct trns_header *trns, struct ccase *c, assert (occurs_left >= 0); /* Read in another record. */ - line = dfm_get_record (t->handle, &len); - if (line == NULL) + if (dfm_eof (t->handle)) { tmsg (SE, RPD_ERR, _("Unexpected end of file with %d repetitions " @@ -1972,12 +2045,14 @@ repeating_data_trns_proc (struct trns_header *trns, struct ccase *c, occurs_left, occurs); return -2; } - dfm_fwd_record (t->handle); + dfm_expand_tabs (t->handle); + dfm_get_record (t->handle, &line); + dfm_forward_record (t->handle); /* Parse this record. */ info.trns = t; - info.line = line; - info.len = len; + info.line = ls_c_str (&line); + info.len = ls_length (&line); info.beg = cont_beg; info.end = cont_end; info.ofs = length;